diff options
Diffstat (limited to 'arch/arm')
217 files changed, 9488 insertions, 1749 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a2bc9b..392e7ae69452 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1,8 +1,8 @@ config ARM bool default y - select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL @@ -21,6 +21,7 @@ config ARM select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD @@ -286,6 +287,11 @@ config GENERIC_BUG def_bool y depends on BUG +config PGTABLE_LEVELS + int + default 3 if ARM_LPAE + default 2 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -619,6 +625,7 @@ config ARCH_PXA select GENERIC_CLOCKEVENTS select GPIO_PXA select HAVE_IDE + select IRQ_DOMAIN select MULTI_IRQ_HANDLER select PLAT_PXA select SPARSE_IRQ @@ -1057,7 +1064,7 @@ config ARM_ERRATA_430973 depends on CPU_V7 help This option enables the workaround for the 430973 Cortex-A8 - (r1p0..r1p2) erratum. If a code sequence containing an ARM/Thumb + r1p* erratum. If a code sequence containing an ARM/Thumb interworking branch is replaced with another code sequence at the same virtual address, whether due to self-modifying code or virtual to physical address re-mapping, Cortex-A8 does not recover from the @@ -1126,6 +1133,7 @@ config ARM_ERRATA_742231 config ARM_ERRATA_643719 bool "ARM errata: LoUIS bit field in CLIDR register is incorrect" depends on CPU_V7 && SMP + default y help This option enables the workaround for the 643719 Cortex-A9 (prior to r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR @@ -1343,7 +1351,7 @@ config SMP If you don't know what to do here, say N. config SMP_ON_UP - bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)" + bool "Allow booting SMP kernel on uniprocessor systems" depends on SMP && !XIP_KERNEL && MMU default y help @@ -2125,16 +2133,6 @@ menu "Userspace binary formats" source "fs/Kconfig.binfmt" -config ARTHUR - tristate "RISC OS personality" - depends on !AEABI - help - Say Y here to include the kernel code necessary if you want to run - Acorn RISC OS/Arthur binaries under Linux. This code is still very - experimental; if this sounds frightening, say N and sleep in peace. - You can also say M here to compile this support as a module (which - will be called arthur). - endmenu menu "Power management options" @@ -2167,6 +2165,9 @@ source "arch/arm/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" +if CRYPTO +source "arch/arm/crypto/Kconfig" +endif source "lib/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index eb7bb511f853..5575d9fa8806 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -13,7 +13,7 @@ # Ensure linker flags are correct LDFLAGS := -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 LDFLAGS_MODULE += --be8 @@ -264,6 +264,7 @@ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) core-$(CONFIG_VFP) += arch/arm/vfp/ core-$(CONFIG_XEN) += arch/arm/xen/ core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ +core-$(CONFIG_VDSO) += arch/arm/vdso/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ @@ -321,6 +322,12 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +PHONY += vdso_install +vdso_install: +ifeq ($(CONFIG_VDSO),y) + $(Q)$(MAKE) $(build)=arch/arm/vdso $@ +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) @@ -345,4 +352,5 @@ define archhelp echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' vdso_install - Install unstripped vdso.so to $$(INSTALL_MOD_PATH)/vdso' endef diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index ec2f8065f955..9eca7aee927f 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -12,7 +12,7 @@ # ifneq ($(MACHINE),) -include $(srctree)/$(MACHINE)/Makefile.boot +include $(MACHINE)/Makefile.boot endif # Note: the following conditions must always be true: diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index c41a793b519c..2c45b5709fa4 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -10,8 +10,11 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/v7m.h> + + AR_CLASS( .arch armv7-a ) + M_CLASS( .arch armv7-m ) - .arch armv7-a /* * Debugging stuff * @@ -114,7 +117,12 @@ * sort out different calling conventions */ .align - .arm @ Always enter in ARM state + /* + * Always enter in ARM state for CPUs that support the ARM ISA. + * As of today (2014) that's exactly the members of the A and R + * classes. + */ + AR_CLASS( .arm ) start: .type start,#function .rept 7 @@ -132,14 +140,15 @@ start: THUMB( .thumb ) 1: - ARM_BE8( setend be ) @ go BE8 if compiled for BE8 - mrs r9, cpsr + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 + AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly #endif mov r7, r1 @ save architecture ID mov r8, r2 @ save atags pointer +#ifndef CONFIG_CPU_V7M /* * Booting from Angel - need to enter SVC mode and disable * FIQs/IRQs (numeric definitions from angel arm.h source). @@ -155,6 +164,7 @@ not_angel: safe_svcmode_maskall r0 msr spsr_cxsf, r9 @ Save the CPU boot mode in @ SPSR +#endif /* * Note that some cache flushing and other stuff may * be needed here - is there an Angel SWI call for this? @@ -168,9 +178,26 @@ not_angel: .text #ifdef CONFIG_AUTO_ZRELADDR - @ determine final kernel image address + /* + * Find the start of physical memory. As we are executing + * without the MMU on, we are in the physical address space. + * We just need to get rid of any offset by aligning the + * address. + * + * This alignment is a balance between the requirements of + * different platforms - we have chosen 128MB to allow + * platforms which align the start of their physical memory + * to 128MB to use this feature, while allowing the zImage + * to be placed within the first 128MB of memory on other + * platforms. Increasing the alignment means we place + * stricter alignment requirements on the start of physical + * memory, but relaxing it means that we break people who + * are already placing their zImage in (eg) the top 64MB + * of this range. + */ mov r4, pc and r4, r4, #0xf8000000 + /* Determine final kernel image address. */ add r4, r4, #TEXT_OFFSET #else ldr r4, =zreladdr @@ -810,6 +837,16 @@ __common_mmu_cache_on: call_cache_fn: adr r12, proc_types #ifdef CONFIG_CPU_CP15 mrc p15, 0, r9, c0, c0 @ get processor ID +#elif defined(CONFIG_CPU_V7M) + /* + * On v7-M the processor id is located in the V7M_SCB_CPUID + * register, but as cache handling is IMPLEMENTATION DEFINED on + * v7-M (if existant at all) we just return early here. + * If V7M_SCB_CPUID were used the cpu ID functions (i.e. + * __armv7_mmu_cache_{on,off,flush}) would be selected which + * use cp15 registers that are not implemented on v7-M. + */ + bx lr #else ldr r9, =CONFIG_PROCESSOR_ID #endif @@ -1310,8 +1347,9 @@ __hyp_reentry_vectors: __enter_kernel: mov r0, #0 @ must be 0 - ARM( mov pc, r4 ) @ call kernel - THUMB( bx r4 ) @ entry point is always ARM + ARM( mov pc, r4 ) @ call kernel + M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class + THUMB( bx r4 ) @ entry point is always ARM for A/R classes reloc_code_end: diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 1943fc333e7c..8a099bc10c1e 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -15,7 +15,7 @@ / { compatible = "ti,am4372", "ti,am43"; - interrupt-parent = <&gic>; + interrupt-parent = <&wakeupgen>; aliases { @@ -48,6 +48,15 @@ #interrupt-cells = <3>; reg = <0x48241000 0x1000>, <0x48240100 0x0100>; + interrupt-parent = <&gic>; + }; + + wakeupgen: interrupt-controller@48281000 { + compatible = "ti,omap4-wugen-mpu"; + interrupt-controller; + #interrupt-cells = <3>; + reg = <0x48281000 0x1000>; + interrupt-parent = <&gic>; }; l2-cache-controller@48242000 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index f84d9715a4a9..26956cb50835 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -352,7 +352,6 @@ reg = <0x24>; compatible = "ti,tps65218"; interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */ - interrupt-parent = <&gic>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 832d24318f62..8ae29c955c11 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -392,7 +392,6 @@ tps@24 { compatible = "ti,tps65218"; reg = <0x24>; - interrupt-parent = <&gic>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 257c099c347e..1d7109196872 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -369,7 +369,6 @@ reg = <0x24>; compatible = "ti,tps65218"; interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* NMIn */ - interrupt-parent = <&gic>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 6463f9ef2b54..bd48dba16748 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -454,7 +454,6 @@ mcp_rtc: rtc@6f { compatible = "microchip,mcp7941x"; reg = <0x6f>; - interrupt-parent = <&gic>; interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>; /* IRQ_SYS_1N */ pinctrl-names = "default"; @@ -477,7 +476,7 @@ &uart3 { status = "okay"; - interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, <&dra7_pmx_core 0x248>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index e7f0a4ae271c..62d25b14deb8 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -842,7 +842,7 @@ }; macb0: ethernet@fffc4000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xfffc4000 0x100>; interrupts = <21 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index fce301c4e9d6..e4f61a979a57 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -845,7 +845,7 @@ }; macb0: ethernet@fffbc000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xfffbc000 0x100>; interrupts = <21 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 488af63d5174..8ec05b11298a 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -956,7 +956,7 @@ }; macb0: ethernet@fffbc000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xfffbc000 0x100>; interrupts = <25 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9x5_macb0.dtsi b/arch/arm/boot/dts/at91sam9x5_macb0.dtsi index 57e89d1d0325..73d7e30965ba 100644 --- a/arch/arm/boot/dts/at91sam9x5_macb0.dtsi +++ b/arch/arm/boot/dts/at91sam9x5_macb0.dtsi @@ -53,7 +53,7 @@ }; macb0: ethernet@f802c000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <24 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91sam9x5_macb1.dtsi b/arch/arm/boot/dts/at91sam9x5_macb1.dtsi index 663676c02861..d81980c40c7d 100644 --- a/arch/arm/boot/dts/at91sam9x5_macb1.dtsi +++ b/arch/arm/boot/dts/at91sam9x5_macb1.dtsi @@ -41,7 +41,7 @@ }; macb1: ethernet@f8030000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf8030000 0x100>; interrupts = <27 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index d3a29c1b8417..afe678f6d2e9 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -36,6 +36,20 @@ >; }; + mmc_pins: pinmux_mmc_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0a70, MUX_MODE0) /* SD_POW */ + DM816X_IOPAD(0x0a74, MUX_MODE0) /* SD_CLK */ + DM816X_IOPAD(0x0a78, MUX_MODE0) /* SD_CMD */ + DM816X_IOPAD(0x0a7C, MUX_MODE0) /* SD_DAT0 */ + DM816X_IOPAD(0x0a80, MUX_MODE0) /* SD_DAT1 */ + DM816X_IOPAD(0x0a84, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a88, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a8c, MUX_MODE2) /* GP1[7] */ + DM816X_IOPAD(0x0a90, MUX_MODE2) /* GP1[8] */ + >; + }; + usb0_pins: pinmux_usb0_pins { pinctrl-single,pins = < DM816X_IOPAD(0x0d00, MUX_MODE0) /* USB0_DRVVBUS */ @@ -137,7 +151,12 @@ }; &mmc1 { + pinctrl-names = "default"; + pinctrl-0 = <&mmc_pins>; vmmc-supply = <&vmmcsd_fixed>; + bus-width = <4>; + cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 3c97b5f2addc..f35715bc6992 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -150,17 +150,27 @@ }; gpio1: gpio@48032000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio1"; + ti,gpio-always-on; reg = <0x48032000 0x1000>; - interrupts = <97>; + interrupts = <96>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpio2: gpio@4804c000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio2"; + ti,gpio-always-on; reg = <0x4804c000 0x1000>; - interrupts = <99>; + interrupts = <98>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpmc: gpmc@50000000 { diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 7563d7ce01bb..b1bd06c6c2a8 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -444,7 +444,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&uart1_pins>; - interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, <&dra7_pmx_core 0x3e0>; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 127608d79033..a0afce7ad482 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -13,14 +13,13 @@ #include "skeleton.dtsi" #define MAX_SOURCES 400 -#define DIRECT_IRQ(irq) (MAX_SOURCES + irq) / { #address-cells = <1>; #size-cells = <1>; compatible = "ti,dra7xx"; - interrupt-parent = <&gic>; + interrupt-parent = <&crossbar_mpu>; aliases { i2c0 = &i2c1; @@ -50,18 +49,27 @@ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>; + interrupt-parent = <&gic>; }; gic: interrupt-controller@48211000 { compatible = "arm,cortex-a15-gic"; interrupt-controller; #interrupt-cells = <3>; - arm,routable-irqs = <192>; reg = <0x48211000 0x1000>, <0x48212000 0x1000>, <0x48214000 0x2000>, <0x48216000 0x2000>; interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>; + interrupt-parent = <&gic>; + }; + + wakeupgen: interrupt-controller@48281000 { + compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu"; + interrupt-controller; + #interrupt-cells = <3>; + reg = <0x48281000 0x1000>; + interrupt-parent = <&gic>; }; /* @@ -91,8 +99,8 @@ ti,hwmods = "l3_main_1", "l3_main_2"; reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; - interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI DIRECT_IRQ(10) IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; prm: prm@4ae06000 { compatible = "ti,dra7-prm"; @@ -344,7 +352,7 @@ uart1: serial@4806a000 { compatible = "ti,omap4-uart"; reg = <0x4806a000 0x100>; - interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; + interrupts-extended = <&crossbar_mpu GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart1"; clock-frequency = <48000000>; status = "disabled"; @@ -355,7 +363,7 @@ uart2: serial@4806c000 { compatible = "ti,omap4-uart"; reg = <0x4806c000 0x100>; - interrupts-extended = <&gic GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart2"; clock-frequency = <48000000>; status = "disabled"; @@ -366,7 +374,7 @@ uart3: serial@48020000 { compatible = "ti,omap4-uart"; reg = <0x48020000 0x100>; - interrupts-extended = <&gic GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart3"; clock-frequency = <48000000>; status = "disabled"; @@ -377,7 +385,7 @@ uart4: serial@4806e000 { compatible = "ti,omap4-uart"; reg = <0x4806e000 0x100>; - interrupts-extended = <&gic GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart4"; clock-frequency = <48000000>; status = "disabled"; @@ -388,7 +396,7 @@ uart5: serial@48066000 { compatible = "ti,omap4-uart"; reg = <0x48066000 0x100>; - interrupts-extended = <&gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart5"; clock-frequency = <48000000>; status = "disabled"; @@ -399,7 +407,7 @@ uart6: serial@48068000 { compatible = "ti,omap4-uart"; reg = <0x48068000 0x100>; - interrupts-extended = <&gic GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart6"; clock-frequency = <48000000>; status = "disabled"; @@ -410,7 +418,7 @@ uart7: serial@48420000 { compatible = "ti,omap4-uart"; reg = <0x48420000 0x100>; - interrupts-extended = <&gic GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart7"; clock-frequency = <48000000>; status = "disabled"; @@ -419,7 +427,7 @@ uart8: serial@48422000 { compatible = "ti,omap4-uart"; reg = <0x48422000 0x100>; - interrupts-extended = <&gic GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 219 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart8"; clock-frequency = <48000000>; status = "disabled"; @@ -428,7 +436,7 @@ uart9: serial@48424000 { compatible = "ti,omap4-uart"; reg = <0x48424000 0x100>; - interrupts-extended = <&gic GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart9"; clock-frequency = <48000000>; status = "disabled"; @@ -437,7 +445,7 @@ uart10: serial@4ae2b000 { compatible = "ti,omap4-uart"; reg = <0x4ae2b000 0x100>; - interrupts-extended = <&gic GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart10"; clock-frequency = <48000000>; status = "disabled"; @@ -1111,7 +1119,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie1-phy"; }; pcie2_phy: pciephy@4a095000 { @@ -1130,7 +1137,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie2-phy"; status = "disabled"; }; }; @@ -1337,9 +1343,12 @@ status = "disabled"; }; - crossbar_mpu: crossbar@4a020000 { + crossbar_mpu: crossbar@4a002a48 { compatible = "ti,irq-crossbar"; reg = <0x4a002a48 0x130>; + interrupt-controller; + interrupt-parent = <&wakeupgen>; + #interrupt-cells = <3>; ti,max-irqs = <160>; ti,max-crossbar-sources = <MAX_SOURCES>; ti,reg-size = <2>; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index 40ed539ce474..daf28110d487 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -158,7 +158,6 @@ pinctrl-0 = <&tps65917_pins_default>; interrupts = <GIC_SPI 2 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */ - interrupt-parent = <&gic>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/dra72x.dtsi b/arch/arm/boot/dts/dra72x.dtsi index e5a3d23a3df1..f7fb0d0ef25a 100644 --- a/arch/arm/boot/dts/dra72x.dtsi +++ b/arch/arm/boot/dts/dra72x.dtsi @@ -25,6 +25,7 @@ pmu { compatible = "arm,cortex-a15-pmu"; - interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&wakeupgen>; + interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; }; }; diff --git a/arch/arm/boot/dts/dra74x.dtsi b/arch/arm/boot/dts/dra74x.dtsi index 10173fab1a15..00eeed789b4b 100644 --- a/arch/arm/boot/dts/dra74x.dtsi +++ b/arch/arm/boot/dts/dra74x.dtsi @@ -41,8 +41,9 @@ pmu { compatible = "arm,cortex-a15-pmu"; - interrupts = <GIC_SPI DIRECT_IRQ(131) IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI DIRECT_IRQ(132) IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&wakeupgen>; + interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>; }; ocp { diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index ac6b0ae42caf..14ab515aa83c 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -131,6 +131,9 @@ pmu_system_controller: system-controller@10020000 { compatible = "samsung,exynos3250-pmu", "syscon"; reg = <0x10020000 0x4000>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; }; mipi_phy: video-phy@10020710 { @@ -185,6 +188,7 @@ compatible = "samsung,exynos3250-rtc"; reg = <0x10070000 0x100>; interrupts = <0 73 0>, <0 74 0>; + interrupt-parent = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index 77ea547768f4..e20cdc24c3bb 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -154,6 +154,9 @@ pmu_system_controller: system-controller@10020000 { compatible = "samsung,exynos4210-pmu", "syscon"; reg = <0x10020000 0x4000>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; }; dsi_0: dsi@11C80000 { @@ -266,6 +269,7 @@ rtc@10070000 { compatible = "samsung,s3c6410-rtc"; reg = <0x10070000 0x100>; + interrupt-parent = <&pmu_system_controller>; interrupts = <0 44 0>, <0 45 0>; clocks = <&clock CLK_RTC>; clock-names = "rtc"; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index adbde1adad95..77f656eb8e6b 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -205,6 +205,9 @@ clock-names = "clkout16"; clocks = <&clock CLK_FIN_PLL>; #clock-cells = <1>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; }; sysreg_system_controller: syscon@10050000 { @@ -241,6 +244,7 @@ rtc: rtc@101E0000 { clocks = <&clock CLK_RTC>; clock-names = "rtc"; + interrupt-parent = <&pmu_system_controller>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index c0e98cf3514f..b3d2d53820e3 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -327,6 +327,7 @@ rtc: rtc@101E0000 { clocks = <&clock CLK_RTC>; clock-names = "rtc"; + interrupt-parent = <&pmu_system_controller>; status = "disabled"; }; @@ -770,6 +771,9 @@ clock-names = "clkout16"; clocks = <&clock CLK_FIN_PLL>; #clock-cells = <1>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; }; sysreg_system_controller: syscon@10050000 { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index f4f78c40b564..3fdc84fddb70 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -92,6 +92,8 @@ ti,hwmods = "aes"; reg = <0x480c5000 0x50>; interrupts = <0>; + dmas = <&sdma 65 &sdma 66>; + dma-names = "tx", "rx"; }; prm: prm@48306000 { @@ -550,6 +552,8 @@ ti,hwmods = "sham"; reg = <0x480c3000 0x64>; interrupts = <49>; + dmas = <&sdma 69>; + dma-names = "rx"; }; smartreflex_core: smartreflex@480cb000 { diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi index e860ccd9d09c..f2a94fa62552 100644 --- a/arch/arm/boot/dts/omap4-duovero.dtsi +++ b/arch/arm/boot/dts/omap4-duovero.dtsi @@ -173,14 +173,12 @@ twl: twl@48 { reg = <0x48>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */ - interrupt-parent = <&gic>; }; twl6040: twl@4b { compatible = "ti,twl6040"; reg = <0x4b>; interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */ - interrupt-parent = <&gic>; ti,audpwron-gpio = <&gpio6 0 GPIO_ACTIVE_HIGH>; /* gpio_160 */ vio-supply = <&v1v8>; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index 150513506c19..7c15fb2e2fe4 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -372,7 +372,6 @@ reg = <0x48>; /* IRQ# = 7 */ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */ - interrupt-parent = <&gic>; }; twl6040: twl@4b { @@ -384,7 +383,6 @@ /* IRQ# = 119 */ interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */ - interrupt-parent = <&gic>; ti,audpwron-gpio = <&gpio4 31 GPIO_ACTIVE_HIGH>; /* gpio line 127 */ vio-supply = <&v1v8>; @@ -479,17 +477,17 @@ }; &uart2 { - interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART2_RX>; }; &uart3 { - interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART3_RX>; }; &uart4 { - interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART4_RX>; }; diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 3e1da43068f6..8aca8dae968a 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -363,7 +363,6 @@ reg = <0x48>; /* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */ - interrupt-parent = <&gic>; }; twl6040: twl@4b { @@ -375,7 +374,6 @@ /* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */ interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */ - interrupt-parent = <&gic>; ti,audpwron-gpio = <&gpio4 31 0>; /* gpio line 127 */ vio-supply = <&v1v8>; @@ -570,21 +568,21 @@ }; &uart2 { - interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART2_RX>; pinctrl-names = "default"; pinctrl-0 = <&uart2_pins>; }; &uart3 { - interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART3_RX>; pinctrl-names = "default"; pinctrl-0 = <&uart3_pins>; }; &uart4 { - interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH + interrupts-extended = <&wakeupgen GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH &omap4_pmx_core OMAP4_UART4_RX>; pinctrl-names = "default"; pinctrl-0 = <&uart4_pins>; diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi index 062701e1a898..a4f1ba2e1903 100644 --- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi +++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi @@ -185,7 +185,6 @@ reg = <0x48>; /* SPI = 0, IRQ# = 7, 4 = active high level-sensitive */ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_1N cascaded to gic */ - interrupt-parent = <&gic>; }; twl6040: twl@4b { @@ -197,7 +196,6 @@ /* SPI = 0, IRQ# = 119, 4 = active high level-sensitive */ interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>; /* IRQ_SYS_2N cascaded to gic */ - interrupt-parent = <&gic>; ti,audpwron-gpio = <&gpio6 22 0>; /* gpio 182 */ vio-supply = <&v1v8>; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 87401d9f4d8b..f2091d1c9c36 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -14,7 +14,7 @@ / { compatible = "ti,omap4430", "ti,omap4"; - interrupt-parent = <&gic>; + interrupt-parent = <&wakeupgen>; aliases { i2c0 = &i2c1; @@ -56,6 +56,7 @@ #interrupt-cells = <3>; reg = <0x48241000 0x1000>, <0x48240100 0x0100>; + interrupt-parent = <&gic>; }; L2: l2-cache-controller@48242000 { @@ -70,6 +71,15 @@ clocks = <&mpu_periphclk>; reg = <0x48240600 0x20>; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_HIGH)>; + interrupt-parent = <&gic>; + }; + + wakeupgen: interrupt-controller@48281000 { + compatible = "ti,omap4-wugen-mpu"; + interrupt-controller; + #interrupt-cells = <3>; + reg = <0x48281000 0x1000>; + interrupt-parent = <&gic>; }; /* @@ -319,7 +329,7 @@ uart2: serial@4806c000 { compatible = "ti,omap4-uart"; reg = <0x4806c000 0x100>; - interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart2"; clock-frequency = <48000000>; }; @@ -327,7 +337,7 @@ uart3: serial@48020000 { compatible = "ti,omap4-uart"; reg = <0x48020000 0x100>; - interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart3"; clock-frequency = <48000000>; }; @@ -335,7 +345,7 @@ uart4: serial@4806e000 { compatible = "ti,omap4-uart"; reg = <0x4806e000 0x100>; - interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart4"; clock-frequency = <48000000>; }; diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index b54b271e153b..61ad2ea34720 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -412,7 +412,6 @@ palmas: palmas@48 { compatible = "ti,palmas"; interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */ - interrupt-parent = <&gic>; reg = <0x48>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 159720d6c956..74777a6e200a 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -311,7 +311,6 @@ palmas: palmas@48 { compatible = "ti,palmas"; interrupts = <GIC_SPI 7 IRQ_TYPE_NONE>; /* IRQ_SYS_1N */ - interrupt-parent = <&gic>; reg = <0x48>; interrupt-controller; #interrupt-cells = <2>; @@ -521,7 +520,6 @@ pinctrl-0 = <&twl6040_pins>; interrupts = <GIC_SPI 119 IRQ_TYPE_NONE>; /* IRQ_SYS_2N cascaded to gic */ - interrupt-parent = <&gic>; ti,audpwron-gpio = <&gpio5 13 0>; /* gpio line 141 */ vio-supply = <&smps7_reg>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 4a485b63a141..77b5f70d0ebc 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -18,7 +18,7 @@ #size-cells = <1>; compatible = "ti,omap5"; - interrupt-parent = <&gic>; + interrupt-parent = <&wakeupgen>; aliases { i2c0 = &i2c1; @@ -79,6 +79,7 @@ <GIC_PPI 14 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 11 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_RAW(3) | IRQ_TYPE_LEVEL_LOW)>; + interrupt-parent = <&gic>; }; pmu { @@ -95,6 +96,15 @@ <0x48212000 0x1000>, <0x48214000 0x2000>, <0x48216000 0x2000>; + interrupt-parent = <&gic>; + }; + + wakeupgen: interrupt-controller@48281000 { + compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu"; + interrupt-controller; + #interrupt-cells = <3>; + reg = <0x48281000 0x1000>; + interrupt-parent = <&gic>; }; /* @@ -458,7 +468,7 @@ uart1: serial@4806a000 { compatible = "ti,omap4-uart"; reg = <0x4806a000 0x100>; - interrupts-extended = <&gic GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart1"; clock-frequency = <48000000>; }; @@ -466,7 +476,7 @@ uart2: serial@4806c000 { compatible = "ti,omap4-uart"; reg = <0x4806c000 0x100>; - interrupts-extended = <&gic GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart2"; clock-frequency = <48000000>; }; @@ -474,7 +484,7 @@ uart3: serial@48020000 { compatible = "ti,omap4-uart"; reg = <0x48020000 0x100>; - interrupts-extended = <&gic GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart3"; clock-frequency = <48000000>; }; @@ -482,7 +492,7 @@ uart4: serial@4806e000 { compatible = "ti,omap4-uart"; reg = <0x4806e000 0x100>; - interrupts-extended = <&gic GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart4"; clock-frequency = <48000000>; }; @@ -490,7 +500,7 @@ uart5: serial@48066000 { compatible = "ti,omap4-uart"; reg = <0x48066000 0x100>; - interrupts-extended = <&gic GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart5"; clock-frequency = <48000000>; }; @@ -498,7 +508,7 @@ uart6: serial@48068000 { compatible = "ti,omap4-uart"; reg = <0x48068000 0x100>; - interrupts-extended = <&gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "uart6"; clock-frequency = <48000000>; }; @@ -883,14 +893,12 @@ usbhsohci: ohci@4a064800 { compatible = "ti,ohci-omap3"; reg = <0x4a064800 0x400>; - interrupt-parent = <&gic>; interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>; }; usbhsehci: ehci@4a064c00 { compatible = "ti,ehci-omap"; reg = <0x4a064c00 0x400>; - interrupt-parent = <&gic>; interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; }; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index d771f687a13b..eccc78d3220b 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -411,6 +411,7 @@ "mac_clk_rx", "mac_clk_tx", "clk_mac_ref", "clk_mac_refout", "aclk_mac", "pclk_mac"; + status = "disabled"; }; usb_host0_ehci: usb@ff500000 { diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index fe2af9276312..b4544cf11bad 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -41,7 +41,7 @@ }; macb1: ethernet@f802c000 { - compatible = "cdns,at32ap7000-macb", "cdns,macb"; + compatible = "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 9d8760956752..d9176e606173 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -660,7 +660,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0xfff01000 0x1000>; - interrupts = <0 156 4>; + interrupts = <0 155 4>; num-cs = <4>; clocks = <&spi_m_clk>; status = "disabled"; diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi index ea28ebadab1a..eeb7afecbbe6 100644 --- a/arch/arm/boot/dts/stih416.dtsi +++ b/arch/arm/boot/dts/stih416.dtsi @@ -10,7 +10,7 @@ #include "stih416-clock.dtsi" #include "stih416-pinctrl.dtsi" -#include <dt-bindings/phy/phy-miphy365x.h> +#include <dt-bindings/phy/phy.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/reset-controller/stih416-resets.h> / { @@ -306,7 +306,7 @@ reg = <0xfe380000 0x1000>; interrupts = <GIC_SPI 157 IRQ_TYPE_NONE>; interrupt-names = "hostc"; - phys = <&phy_port0 MIPHY_TYPE_SATA>; + phys = <&phy_port0 PHY_TYPE_SATA>; phy-names = "sata-phy"; resets = <&powerdown STIH416_SATA0_POWERDOWN>, <&softreset STIH416_SATA0_SOFTRESET>; diff --git a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts index ab7891c43231..75742f8f96f3 100644 --- a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts +++ b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts @@ -56,6 +56,22 @@ model = "Olimex A10-OLinuXino-LIME"; compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10"; + cpus { + cpu0: cpu@0 { + /* + * The A10-Lime is known to be unstable + * when running at 1008 MHz + */ + operating-points = < + /* kHz uV */ + 912000 1350000 + 864000 1300000 + 624000 1250000 + >; + cooling-max-level = <2>; + }; + }; + soc@01c00000 { emac: ethernet@01c0b000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5c2925831f20..eebb7853e00b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -75,7 +75,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1056000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -83,7 +82,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <4>; + cooling-max-level = <3>; }; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index f8818f1edbbe..883cb4873688 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -47,7 +47,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1104000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -57,7 +56,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <6>; + cooling-max-level = <5>; }; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 3a8530b79f1c..fdd181792b4b 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -105,7 +105,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1008000 1450000 960000 1400000 912000 1400000 864000 1300000 @@ -116,7 +115,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <7>; + cooling-max-level = <6>; }; cpu@1 { diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi index 4296b5398bf5..f58a3d9d5f13 100644 --- a/arch/arm/boot/dts/tegra114.dtsi +++ b/arch/arm/boot/dts/tegra114.dtsi @@ -8,7 +8,7 @@ / { compatible = "nvidia,tegra114"; - interrupt-parent = <&gic>; + interrupt-parent = <&lic>; host1x@50000000 { compatible = "nvidia,tegra114-host1x", "simple-bus"; @@ -134,6 +134,19 @@ <0x50046000 0x2000>; interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; + interrupt-parent = <&gic>; + }; + + lic: interrupt-controller@60004000 { + compatible = "nvidia,tegra114-ictlr", "nvidia,tegra30-ictlr"; + reg = <0x60004000 0x100>, + <0x60004100 0x50>, + <0x60004200 0x50>, + <0x60004300 0x50>, + <0x60004400 0x50>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; }; timer@60005000 { @@ -766,5 +779,6 @@ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>; + interrupt-parent = <&gic>; }; }; diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 4be06c6ea0c8..db85695aa7aa 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -10,7 +10,7 @@ / { compatible = "nvidia,tegra124"; - interrupt-parent = <&gic>; + interrupt-parent = <&lic>; #address-cells = <2>; #size-cells = <2>; @@ -173,6 +173,7 @@ <0x0 0x50046000 0x0 0x2000>; interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; + interrupt-parent = <&gic>; }; gpu@0,57000000 { @@ -190,6 +191,18 @@ status = "disabled"; }; + lic: interrupt-controller@60004000 { + compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr"; + reg = <0x0 0x60004000 0x0 0x100>, + <0x0 0x60004100 0x0 0x100>, + <0x0 0x60004200 0x0 0x100>, + <0x0 0x60004300 0x0 0x100>, + <0x0 0x60004400 0x0 0x100>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&gic>; + }; + timer@0,60005000 { compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer"; reg = <0x0 0x60005000 0x0 0x400>; @@ -955,5 +968,6 @@ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>; + interrupt-parent = <&gic>; }; }; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index e5527f742696..adf6b048d0bb 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -7,7 +7,7 @@ / { compatible = "nvidia,tegra20"; - interrupt-parent = <&intc>; + interrupt-parent = <&lic>; host1x@50000000 { compatible = "nvidia,tegra20-host1x", "simple-bus"; @@ -142,6 +142,7 @@ timer@50040600 { compatible = "arm,cortex-a9-twd-timer"; + interrupt-parent = <&intc>; reg = <0x50040600 0x20>; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_HIGH)>; @@ -154,6 +155,7 @@ 0x50040100 0x0100>; interrupt-controller; #interrupt-cells = <3>; + interrupt-parent = <&intc>; }; cache-controller@50043000 { @@ -165,6 +167,17 @@ cache-level = <2>; }; + lic: interrupt-controller@60004000 { + compatible = "nvidia,tegra20-ictlr"; + reg = <0x60004000 0x100>, + <0x60004100 0x50>, + <0x60004200 0x50>, + <0x60004300 0x50>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&intc>; + }; + timer@60005000 { compatible = "nvidia,tegra20-timer"; reg = <0x60005000 0x60>; diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index db4810df142c..60e205a0f63d 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -8,7 +8,7 @@ / { compatible = "nvidia,tegra30"; - interrupt-parent = <&intc>; + interrupt-parent = <&lic>; pcie-controller@00003000 { compatible = "nvidia,tegra30-pcie"; @@ -228,6 +228,7 @@ timer@50040600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x50040600 0x20>; + interrupt-parent = <&intc>; interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; clocks = <&tegra_car TEGRA30_CLK_TWD>; @@ -239,6 +240,7 @@ 0x50040100 0x0100>; interrupt-controller; #interrupt-cells = <3>; + interrupt-parent = <&intc>; }; cache-controller@50043000 { @@ -250,6 +252,18 @@ cache-level = <2>; }; + lic: interrupt-controller@60004000 { + compatible = "nvidia,tegra30-ictlr"; + reg = <0x60004000 0x100>, + <0x60004100 0x50>, + <0x60004200 0x50>, + <0x60004300 0x50>, + <0x60004400 0x50>; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&intc>; + }; + timer@60005000 { compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; reg = <0x60005000 0x400>; diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 6eaddc47c43d..37dc0fe1093f 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id) unsigned int mpidr, this_cpu, that_cpu; unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; struct completion inbound_alive; - struct tick_device *tdev; - enum clock_event_mode tdev_mode; long volatile *handshake_ptr; int ipi_nr, ret; @@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id) /* redirect GIC's SGIs to our counterpart */ gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); - tdev = tick_get_device(this_cpu); - if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) - tdev = NULL; - if (tdev) { - tdev_mode = tdev->evtdev->mode; - clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); - } + tick_suspend_local(); ret = cpu_pm_enter(); @@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id) ret = cpu_pm_exit(); - if (tdev) { - clockevents_set_mode(tdev->evtdev, tdev_mode); - clockevents_program_event(tdev->evtdev, - tdev->evtdev->next_event, 1); - } + tick_resume_local(); trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr); local_fiq_enable(); diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 0494c8f229a2..d59009878312 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -12,7 +12,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_FPE_NWFPE=y CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m -CONFIG_ARTHUR=m CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig new file mode 100644 index 000000000000..8da2207b0072 --- /dev/null +++ b/arch/arm/crypto/Kconfig @@ -0,0 +1,130 @@ + +menuconfig ARM_CRYPTO + bool "ARM Accelerated Cryptographic Algorithms" + depends on ARM + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM specific CPU features or instructions. + +if ARM_CRYPTO + +config CRYPTO_SHA1_ARM + tristate "SHA1 digest algorithm (ARM-asm)" + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM assembler. + +config CRYPTO_SHA1_ARM_NEON + tristate "SHA1 digest algorithm (ARM NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_SHA1 + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using optimized ARM NEON assembly, when NEON instructions are + available. + +config CRYPTO_SHA1_ARM_CE + tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_SHA1_ARM + select CRYPTO_HASH + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + +config CRYPTO_SHA2_ARM_CE + tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_SHA256_ARM + select CRYPTO_HASH + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using special ARMv8 Crypto Extensions. + +config CRYPTO_SHA256_ARM + tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)" + select CRYPTO_HASH + depends on !CPU_V7M + help + SHA-256 secure hash standard (DFIPS 180-2) implemented + using optimized ARM assembler and NEON, when available. + +config CRYPTO_SHA512_ARM_NEON + tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_SHA512 + select CRYPTO_HASH + help + SHA-512 secure hash standard (DFIPS 180-2) implemented + using ARM NEON instructions, when available. + + This version of SHA implements a 512 bit hash with 256 bits of + security against collision attacks. + + This code also includes SHA-384, a 384 bit hash with 192 bits + of security against collision attacks. + +config CRYPTO_AES_ARM + tristate "AES cipher algorithms (ARM-asm)" + depends on ARM + select CRYPTO_ALGAPI + select CRYPTO_AES + help + Use optimized AES assembler routines for ARM platforms. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See <http://csrc.nist.gov/encryption/aes/> for more information. + +config CRYPTO_AES_ARM_BS + tristate "Bit sliced AES using NEON instructions" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES_ARM + select CRYPTO_ABLK_HELPER + help + Use a faster and more secure NEON based implementation of AES in CBC, + CTR and XTS modes + + Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode + and for XTS mode encryption, CBC and XTS mode decryption speedup is + around 25%. (CBC encryption speed is not affected by this driver.) + This implementation does not rely on any lookup tables so it is + believed to be invulnerable to cache timing attacks. + +config CRYPTO_AES_ARM_CE + tristate "Accelerated AES using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_ABLK_HELPER + help + Use an implementation of AES in CBC, CTR and XTS modes that uses + ARMv8 Crypto Extensions + +config CRYPTO_GHASH_ARM_CE + tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_CRYPTD + help + Use an implementation of GHASH (used by the GCM AEAD chaining mode) + that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) + that is part of the ARMv8 Crypto Extensions + +endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..6ea828241fcb 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o +ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o + +ifneq ($(ce-obj-y)$(ce-obj-m),) +ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) +obj-y += $(ce-obj-y) +obj-m += $(ce-obj-m) +else +$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) +$(warning $(ce-obj-y) $(ce-obj-m)) +endif +endif + aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o +sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o +sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o +aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o +ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) @@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S +$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index 000000000000..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S @@ -0,0 +1,518 @@ +/* + * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu crypto-neon-fp-armv8 + .align 3 + + .macro enc_round, state, key + aese.8 \state, \key + aesmc.8 \state, \state + .endm + + .macro dec_round, state, key + aesd.8 \state, \key + aesimc.8 \state, \state + .endm + + .macro enc_dround, key1, key2 + enc_round q0, \key1 + enc_round q0, \key2 + .endm + + .macro dec_dround, key1, key2 + dec_round q0, \key1 + dec_round q0, \key2 + .endm + + .macro enc_fround, key1, key2, key3 + enc_round q0, \key1 + aese.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro dec_fround, key1, key2, key3 + dec_round q0, \key1 + aesd.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro enc_dround_3x, key1, key2 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + enc_round q0, \key2 + enc_round q1, \key2 + enc_round q2, \key2 + .endm + + .macro dec_dround_3x, key1, key2 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + dec_round q0, \key2 + dec_round q1, \key2 + dec_round q2, \key2 + .endm + + .macro enc_fround_3x, key1, key2, key3 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + aese.8 q0, \key2 + aese.8 q1, \key2 + aese.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro dec_fround_3x, key1, key2, key3 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + aesd.8 q0, \key2 + aesd.8 q1, \key2 + aesd.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro do_block, dround, fround + cmp r3, #12 @ which key size? + vld1.8 {q10-q11}, [ip]! + \dround q8, q9 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + vld1.8 {q10-q11}, [ip]! + \dround q12, q13 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + blo 0f @ AES-128: 10 rounds + vld1.8 {q10-q11}, [ip]! + beq 1f @ AES-192: 12 rounds + \dround q12, q13 + vld1.8 {q12-q13}, [ip] + \dround q10, q11 +0: \fround q12, q13, q14 + bx lr + +1: \dround q12, q13 + \fround q10, q11, q14 + bx lr + .endm + + /* + * Internal, non-AAPCS compliant functions that implement the core AES + * transforms. These should preserve all registers except q0 - q2 and ip + * Arguments: + * q0 : first in/output block + * q1 : second in/output block (_3x version only) + * q2 : third in/output block (_3x version only) + * q8 : first round key + * q9 : secound round key + * ip : address of 3rd round key + * q14 : final round key + * r3 : number of rounds + */ + .align 6 +aes_encrypt: + add ip, r2, #32 @ 3rd round key +.Laes_encrypt_tweak: + do_block enc_dround, enc_fround +ENDPROC(aes_encrypt) + + .align 6 +aes_decrypt: + add ip, r2, #32 @ 3rd round key + do_block dec_dround, dec_fround +ENDPROC(aes_decrypt) + + .align 6 +aes_encrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block enc_dround_3x, enc_fround_3x +ENDPROC(aes_encrypt_3x) + + .align 6 +aes_decrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block dec_dround_3x, dec_fround_3x +ENDPROC(aes_decrypt_3x) + + .macro prepare_key, rk, rounds + add ip, \rk, \rounds, lsl #4 + vld1.8 {q8-q9}, [\rk] @ load first 2 round keys + vld1.8 {q14}, [ip] @ load last round key + .endm + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ +ENTRY(ce_aes_ecb_encrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbencloop3x: + subs r4, r4, #3 + bmi .Lecbenc1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_encrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbencloop3x +.Lecbenc1x: + adds r4, r4, #3 + beq .Lecbencout +.Lecbencloop: + vld1.8 {q0}, [r1, :64]! + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbencloop +.Lecbencout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_encrypt) + +ENTRY(ce_aes_ecb_decrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbdecloop3x: + subs r4, r4, #3 + bmi .Lecbdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_decrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbdecloop3x +.Lecbdec1x: + adds r4, r4, #3 + beq .Lecbdecout +.Lecbdecloop: + vld1.8 {q0}, [r1, :64]! + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbdecloop +.Lecbdecout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_decrypt) + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +ENTRY(ce_aes_cbc_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q0}, [r5] + prepare_key r2, r3 +.Lcbcencloop: + vld1.8 {q1}, [r1, :64]! @ get next pt block + veor q0, q0, q1 @ ..and xor with iv + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcencloop + vst1.8 {q0}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_encrypt) + +ENTRY(ce_aes_cbc_decrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ keep iv in q6 + prepare_key r2, r3 +.Lcbcdecloop3x: + subs r4, r4, #3 + bmi .Lcbcdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + vmov q3, q0 + vmov q4, q1 + vmov q5, q2 + bl aes_decrypt_3x + veor q0, q0, q6 + veor q1, q1, q3 + veor q2, q2, q4 + vmov q6, q5 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lcbcdecloop3x +.Lcbcdec1x: + adds r4, r4, #3 + beq .Lcbcdecout + vmov q15, q14 @ preserve last round key +.Lcbcdecloop: + vld1.8 {q0}, [r1, :64]! @ get next ct block + veor q14, q15, q6 @ combine prev ct with last key + vmov q6, q0 + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + vst1.8 {q6}, [r5] @ keep iv in q6 + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_decrypt) + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[]) + */ +ENTRY(ce_aes_ctr_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ load ctr + prepare_key r2, r3 + vmov r6, s27 @ keep swabbed ctr in r6 + rev r6, r6 + cmn r6, r4 @ 32 bit overflow? + bcs .Lctrloop +.Lctrloop3x: + subs r4, r4, #3 + bmi .Lctr1x + add r6, r6, #1 + vmov q0, q6 + vmov q1, q6 + rev ip, r6 + add r6, r6, #1 + vmov q2, q6 + vmov s7, ip + rev ip, r6 + add r6, r6, #1 + vmov s11, ip + vld1.8 {q3-q4}, [r1, :64]! + vld1.8 {q5}, [r1, :64]! + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + rev ip, r6 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + vmov s27, ip + b .Lctrloop3x +.Lctr1x: + adds r4, r4, #3 + beq .Lctrout +.Lctrloop: + vmov q0, q6 + bl aes_encrypt + subs r4, r4, #1 + bmi .Lctrhalfblock @ blocks < 0 means 1/2 block + vld1.8 {q3}, [r1, :64]! + veor q3, q0, q3 + vst1.8 {q3}, [r0, :64]! + + adds r6, r6, #1 @ increment BE ctr + rev ip, r6 + vmov s27, ip + bcs .Lctrcarry + teq r4, #0 + bne .Lctrloop +.Lctrout: + vst1.8 {q6}, [r5] + pop {r4-r6, pc} + +.Lctrhalfblock: + vld1.8 {d1}, [r1, :64] + veor d0, d0, d1 + vst1.8 {d0}, [r0, :64] + pop {r4-r6, pc} + +.Lctrcarry: + .irp sreg, s26, s25, s24 + vmov ip, \sreg @ load next word of ctr + rev ip, ip @ ... to handle the carry + adds ip, ip, #1 + rev ip, ip + vmov \sreg, ip + bcc 0f + .endr +0: teq r4, #0 + beq .Lctrout + b .Lctrloop +ENDPROC(ce_aes_ctr_encrypt) + + /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + */ + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 3 +.Lxts_mul_x: + .quad 1, 0x87 + +ce_aes_xts_init: + vldr d14, .Lxts_mul_x + vldr d15, .Lxts_mul_x + 8 + + ldrd r4, r5, [sp, #16] @ load args + ldr r6, [sp, #28] + vld1.8 {q0}, [r5] @ load iv + teq r6, #1 @ start of a block? + bxne lr + + @ Encrypt the IV in q0 with the second AES key. This should only + @ be done at the start of a block. + ldr r6, [sp, #24] @ load AES key 2 + prepare_key r6, r3 + add ip, r6, #32 @ 3rd round key of key 2 + b .Laes_encrypt_tweak @ tail call +ENDPROC(ce_aes_xts_init) + +ENTRY(ce_aes_xts_encrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsenc3x + +.Lxtsencloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsenc3x: + subs r4, r4, #3 + bmi .Lxtsenc1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsencout + b .Lxtsencloop3x +.Lxtsenc1x: + adds r4, r4, #3 + beq .Lxtsencout +.Lxtsencloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + bl aes_encrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsencout + next_tweak q3, q3, q7, q6 + b .Lxtsencloop +.Lxtsencout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_encrypt) + + +ENTRY(ce_aes_xts_decrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsdec3x + +.Lxtsdecloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsdec3x: + subs r4, r4, #3 + bmi .Lxtsdec1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_decrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsdecout + b .Lxtsdecloop3x +.Lxtsdec1x: + adds r4, r4, #3 + beq .Lxtsdecout +.Lxtsdecloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + add ip, r2, #32 @ 3rd round key + bl aes_decrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsdecout + next_tweak q3, q3, q7, q6 + b .Lxtsdecloop +.Lxtsdecout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_decrypt) + + /* + * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the + * AES sbox substitution on each byte in + * 'input' + */ +ENTRY(ce_aes_sub) + vdup.32 q1, r0 + veor q0, q0, q0 + aese.8 q0, q1 + vmov r0, s0 + bx lr +ENDPROC(ce_aes_sub) + + /* + * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns + * operation on round key *src + */ +ENTRY(ce_aes_invert) + vld1.8 {q0}, [r1] + aesimc.8 q0, q0 + vst1.8 {q0}, [r0] + bx lr +ENDPROC(ce_aes_invert) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c new file mode 100644 index 000000000000..b445a5d56f43 --- /dev/null +++ b/arch/arm/crypto/aes-ce-glue.c @@ -0,0 +1,524 @@ +/* + * aes-ce-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/hwcap.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-ce-core.S */ +asmlinkage u32 ce_aes_sub(u32 input); +asmlinkage void ce_aes_invert(void *dst, void *src); + +asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[]); + +asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); +asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + /* + * The AES key schedule round constants + */ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx->key_enc, in_key, key_len); + ctx->key_length = key_len; + + kernel_neon_begin(); + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ rcon[i]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = ce_aes_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } + } + + /* + * Generate the decryption keys for the Equivalent Inverse Cipher. + * This involves reversing the order of the round keys, and applying + * the Inverse Mix Columns transformation on all but the first and + * the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + ce_aes_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + + kernel_neon_end(); + return 0; +} + +static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, + num_rounds(ctx), blocks, walk.iv); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-ce", + .cra_driver_name = "__driver-ecb-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-ce", + .cra_driver_name = "__driver-cbc-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-ce", + .cra_driver_name = "__driver-ctr-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-ce", + .cra_driver_name = "__driver-xts-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_AES)) + return -ENODEV; + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +module_init(aes_init); +module_exit(aes_exit); diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped index 71e5fc7cfb18..1d1800f71c5b 100644 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -58,14 +58,18 @@ # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -74,8 +78,6 @@ .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2095,9 +2097,11 @@ bsaes_xts_decrypt: vld1.8 {q8}, [r0] @ initial tweak adr r2, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst r9, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne r9, #0x10 @ subtract another 16 bytes +#endif subs r9, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 15468fbbdea3..6d685298690e 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { { .cra_name = "__cbc-aes-neonbs", .cra_driver_name = "__driver-cbc-aes-neonbs", .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .cra_alignmask = 7, @@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { { .cra_name = "__ctr-aes-neonbs", .cra_driver_name = "__driver-ctr-aes-neonbs", .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), .cra_alignmask = 7, @@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { { .cra_name = "__xts-aes-neonbs", .cra_driver_name = "__driver-xts-aes-neonbs", .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aesbs_xts_ctx), .cra_alignmask = 7, diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl index be068db960ee..a4d3856e7d24 100644 --- a/arch/arm/crypto/bsaes-armv7.pl +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -701,14 +701,18 @@ $code.=<<___; # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -717,8 +721,6 @@ $code.=<<___; .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2076,9 +2078,11 @@ bsaes_xts_decrypt: vld1.8 {@XMM[8]}, [r0] @ initial tweak adr $magic, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes +#endif subs $len, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..f6ab8bcc9efe --- /dev/null +++ b/arch/arm/crypto/ghash-ce-core.S @@ -0,0 +1,94 @@ +/* + * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. + * + * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + SHASH .req q0 + SHASH2 .req q1 + T1 .req q2 + T2 .req q3 + MASK .req q4 + XL .req q5 + XM .req q6 + XH .req q7 + IN1 .req q7 + + SHASH_L .req d0 + SHASH_H .req d1 + SHASH2_L .req d2 + T1_L .req d4 + MASK_L .req d8 + XL_L .req d10 + XL_H .req d11 + XM_L .req d12 + XM_H .req d13 + XH_L .req d14 + + .text + .fpu crypto-neon-fp-armv8 + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + vld1.64 {SHASH}, [r3] + vld1.64 {XL}, [r1] + vmov.i8 MASK, #0xe1 + vext.8 SHASH2, SHASH, SHASH, #8 + vshl.u64 MASK, MASK, #57 + veor SHASH2, SHASH2, SHASH + + /* do the head block first, if supplied */ + ldr ip, [sp] + teq ip, #0 + beq 0f + vld1.64 {T1}, [ip] + teq r0, #0 + b 1f + +0: vld1.64 {T1}, [r2]! + subs r0, r0, #1 + +1: /* multiply XL by SHASH in GF(2^128) */ +#ifndef CONFIG_CPU_BIG_ENDIAN + vrev64.8 T1, T1 +#endif + vext.8 T2, XL, XL, #8 + vext.8 IN1, T1, T1, #8 + veor T1, T1, T2 + veor XL, XL, IN1 + + vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 + veor T1, T1, XL + vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 + vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) + + vext.8 T1, XL, XH, #8 + veor T2, XL, XH + veor XM, XM, T1 + veor XM, XM, T2 + vmull.p64 T2, XL_L, MASK_L + + vmov XH_L, XM_H + vmov XM_H, XL_L + + veor XL, XM, T2 + vext.8 T2, XL, XL, #8 + vmull.p64 XL, XL_L, MASK_L + veor T2, T2, XH + veor XL, XL, T2 + + bne 0b + + vst1.64 {XL}, [r1] + bx lr +ENDPROC(pmull_ghash_update) diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..03a39fe29246 --- /dev/null +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -0,0 +1,320 @@ +/* + * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. + * + * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> +#include <crypto/cryptd.h> +#include <crypto/internal/hash.h> +#include <crypto/gf128mul.h> +#include <linux/crypto.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_key { + u64 a; + u64 b; +}; + +struct ghash_desc_ctx { + u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; + u8 buf[GHASH_BLOCK_SIZE]; + u32 count; +}; + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, const char *head); + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + ctx->count += len; + + if ((partial + len) >= GHASH_BLOCK_SIZE) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; + + if (partial) { + int p = GHASH_BLOCK_SIZE - partial; + + memcpy(ctx->buf + partial, src, p); + src += p; + len -= p; + } + + blocks = len / GHASH_BLOCK_SIZE; + len %= GHASH_BLOCK_SIZE; + + kernel_neon_begin(); + pmull_ghash_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + kernel_neon_end(); + src += blocks * GHASH_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(ctx->buf + partial, src, len); + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + if (partial) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + + memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + kernel_neon_begin(); + pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); + kernel_neon_end(); + } + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* perform multiplication by 'x' in GF(2^128) */ + b = get_unaligned_be64(inkey); + a = get_unaligned_be64(inkey + 8); + + key->a = (a << 1) | (b >> 63); + key->b = (b << 1) | (a >> 63); + + if (b >> 63) + key->b ^= 0xc200000000000000UL; + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "__driver-ghash-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_key), + .cra_module = THIS_MODULE, + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!may_use_simd()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!may_use_simd()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!may_use_simd()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!may_use_simd()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return err; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg.digestsize = GHASH_DIGEST_SIZE, + .halg.base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_ctxsize = sizeof(struct ghash_async_ctx), + .cra_module = THIS_MODULE, + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, +}; + +static int __init ghash_ce_mod_init(void) +{ + int err; + + if (!(elf_hwcap2 & HWCAP2_PMULL)) + return -ENODEV; + + err = crypto_register_shash(&ghash_alg); + if (err) + return err; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); + return err; +} + +static void __exit ghash_ce_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_ce_mod_init); +module_exit(ghash_ce_mod_exit); diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..b623f51ccbcf --- /dev/null +++ b/arch/arm/crypto/sha1-ce-core.S @@ -0,0 +1,125 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu crypto-neon-fp-armv8 + + k0 .req q0 + k1 .req q1 + k2 .req q2 + k3 .req q3 + + ta0 .req q4 + ta1 .req q5 + tb0 .req q5 + tb1 .req q4 + + dga .req q6 + dgb .req q7 + dgbs .req s28 + + dg0 .req q12 + dg1a0 .req q13 + dg1a1 .req q14 + dg1b0 .req q14 + dg1b1 .req q13 + + .macro add_only, op, ev, rc, s0, dg1 + .ifnb \s0 + vadd.u32 tb\ev, q\s0, \rc + .endif + sha1h.32 dg1b\ev, dg0 + .ifb \dg1 + sha1\op\().32 dg0, dg1a\ev, ta\ev + .else + sha1\op\().32 dg0, \dg1, ta\ev + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0.32 q\s0, q\s1, q\s2 + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1.32 q\s0, q\s3 + .endm + + .align 6 +.Lsha1_rcon: + .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 + .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 + .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc + .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 + + /* + * void sha1_ce_transform(struct sha1_state *sst, u8 const *src, + * int blocks); + */ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr ip, .Lsha1_rcon + vld1.32 {k0-k1}, [ip, :128]! + vld1.32 {k2-k3}, [ip, :128] + + /* load state */ + vld1.32 {dga}, [r0] + vldr dgbs, [r0, #16] + + /* load input */ +0: vld1.32 {q8-q9}, [r1]! + vld1.32 {q10-q11}, [r1]! + subs r2, r2, #1 + +#ifndef CONFIG_CPU_BIG_ENDIAN + vrev32.8 q8, q8 + vrev32.8 q9, q9 + vrev32.8 q10, q10 + vrev32.8 q11, q11 +#endif + + vadd.u32 ta0, q8, k0 + vmov dg0, dga + + add_update c, 0, k0, 8, 9, 10, 11, dgb + add_update c, 1, k0, 9, 10, 11, 8 + add_update c, 0, k0, 10, 11, 8, 9 + add_update c, 1, k0, 11, 8, 9, 10 + add_update c, 0, k1, 8, 9, 10, 11 + + add_update p, 1, k1, 9, 10, 11, 8 + add_update p, 0, k1, 10, 11, 8, 9 + add_update p, 1, k1, 11, 8, 9, 10 + add_update p, 0, k1, 8, 9, 10, 11 + add_update p, 1, k2, 9, 10, 11, 8 + + add_update m, 0, k2, 10, 11, 8, 9 + add_update m, 1, k2, 11, 8, 9, 10 + add_update m, 0, k2, 8, 9, 10, 11 + add_update m, 1, k2, 9, 10, 11, 8 + add_update m, 0, k3, 10, 11, 8, 9 + + add_update p, 1, k3, 11, 8, 9, 10 + add_only p, 0, k3, 9 + add_only p, 1, k3, 10 + add_only p, 0, k3, 11 + add_only p, 1 + + /* update state */ + vadd.u32 dga, dga, dg0 + vadd.u32 dgb, dgb, dg1a0 + bne 0b + + /* store new state */ + vst1.32 {dga}, [r0] + vstr dgbs, [r0, #16] + bx lr +ENDPROC(sha1_ce_transform) diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..80bc2fcd241a --- /dev/null +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -0,0 +1,96 @@ +/* + * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha1_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +#include "sha1.h" + +MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src, + int blocks); + +static int sha1_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + if (!may_use_simd() || + (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) + return sha1_update_arm(desc, data, len); + + kernel_neon_begin(); + sha1_base_do_update(desc, data, len, sha1_ce_transform); + kernel_neon_end(); + + return 0; +} + +static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha1_base_do_update(desc, data, len, sha1_ce_transform); + sha1_base_do_finalize(desc, sha1_ce_transform); + kernel_neon_end(); + + return sha1_base_finish(desc, out); +} + +static int sha1_ce_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ce_finup(desc, NULL, 0, out); +} + +static struct shash_alg alg = { + .init = sha1_base_init, + .update = sha1_ce_update, + .final = sha1_ce_final, + .finup = sha1_ce_finup, + .descsize = sizeof(struct sha1_state), + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_ce_mod_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_SHA1)) + return -ENODEV; + return crypto_register_shash(&alg); +} + +static void __exit sha1_ce_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_ce_mod_init); +module_exit(sha1_ce_mod_fini); diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 75e6a417416b..ffd8bd08b1a7 100644 --- a/arch/arm/include/asm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -7,4 +7,7 @@ extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); +extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); + #endif diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index e31b0440c613..6fc73bf8766d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -22,127 +22,47 @@ #include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> +#include <crypto/sha1_base.h> #include <asm/byteorder.h> -#include <asm/crypto/sha1.h> +#include "sha1.h" asmlinkage void sha1_block_data_order(u32 *digest, const unsigned char *data, unsigned int rounds); - -static int sha1_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - - -static int __sha1_update(struct sha1_state *sctx, const u8 *data, - unsigned int len, unsigned int partial) -{ - unsigned int done = 0; - - sctx->count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx->buffer + partial, data, done); - sha1_block_data_order(sctx->state, sctx->buffer, 1); - } - - if (len - done >= SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - sha1_block_data_order(sctx->state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx->buffer, data + done, len - done); - return 0; -} - - int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - int res; + /* make sure casting to sha1_block_fn() is safe */ + BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); - /* Handle the fast case right here */ - if (partial + len < SHA1_BLOCK_SIZE) { - sctx->count += len; - memcpy(sctx->buffer + partial, data, len); - return 0; - } - res = __sha1_update(sctx, data, len, partial); - return res; + return sha1_base_do_update(desc, data, len, + (sha1_block_fn *)sha1_block_data_order); } EXPORT_SYMBOL_GPL(sha1_update_arm); - -/* Add padding and return the message digest. */ static int sha1_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx->count << 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx->count % SHA1_BLOCK_SIZE; - padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - /* We need to fill a whole block for __sha1_update() */ - if (padlen <= 56) { - sctx->count += padlen; - memcpy(sctx->buffer + index, padding, padlen); - } else { - __sha1_update(sctx, padding, padlen, index); - } - __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); - - /* Store state in digest */ - for (i = 0; i < 5; i++) - dst[i] = cpu_to_be32(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - return 0; + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order); + return sha1_base_finish(desc, out); } - -static int sha1_export(struct shash_desc *desc, void *out) +int sha1_finup_arm(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - memcpy(out, sctx, sizeof(*sctx)); - return 0; + sha1_base_do_update(desc, data, len, + (sha1_block_fn *)sha1_block_data_order); + return sha1_final(desc, out); } - - -static int sha1_import(struct shash_desc *desc, const void *in) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - memcpy(sctx, in, sizeof(*sctx)); - return 0; -} - +EXPORT_SYMBOL_GPL(sha1_finup_arm); static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = sha1_base_init, .update = sha1_update_arm, .final = sha1_final, - .export = sha1_export, - .import = sha1_import, + .finup = sha1_finup_arm, .descsize = sizeof(struct sha1_state), - .statesize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-asm", diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0b0083757d47..4e22f122f966 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -25,147 +25,60 @@ #include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> -#include <asm/byteorder.h> +#include <crypto/sha1_base.h> #include <asm/neon.h> #include <asm/simd.h> -#include <asm/crypto/sha1.h> +#include "sha1.h" asmlinkage void sha1_transform_neon(void *state_h, const char *data, unsigned int rounds); - -static int sha1_neon_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - -static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx->buffer + partial, data, done); - sha1_transform_neon(sctx->state, sctx->buffer, 1); - } - - if (len - done >= SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - - sha1_transform_neon(sctx->state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx->buffer, data + done, len - done); - - return 0; -} - static int sha1_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - int res; - /* Handle the fast case right here */ - if (partial + len < SHA1_BLOCK_SIZE) { - sctx->count += len; - memcpy(sctx->buffer + partial, data, len); + if (!may_use_simd() || + (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) + return sha1_update_arm(desc, data, len); - return 0; - } - - if (!may_use_simd()) { - res = sha1_update_arm(desc, data, len); - } else { - kernel_neon_begin(); - res = __sha1_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha1_neon_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx->count << 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx->count % SHA1_BLOCK_SIZE; - padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - if (!may_use_simd()) { - sha1_update_arm(desc, padding, padlen); - sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha1_neon_update() */ - if (padlen <= 56) { - sctx->count += padlen; - memcpy(sctx->buffer + index, padding, padlen); - } else { - __sha1_neon_update(desc, padding, padlen, index); - } - __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); - kernel_neon_end(); - } - - /* Store state in digest */ - for (i = 0; i < 5; i++) - dst[i] = cpu_to_be32(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); + kernel_neon_begin(); + sha1_base_do_update(desc, data, len, + (sha1_block_fn *)sha1_transform_neon); + kernel_neon_end(); return 0; } -static int sha1_neon_export(struct shash_desc *desc, void *out) +static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); + if (!may_use_simd()) + return sha1_finup_arm(desc, data, len, out); - memcpy(out, sctx, sizeof(*sctx)); + kernel_neon_begin(); + if (len) + sha1_base_do_update(desc, data, len, + (sha1_block_fn *)sha1_transform_neon); + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon); + kernel_neon_end(); - return 0; + return sha1_base_finish(desc, out); } -static int sha1_neon_import(struct shash_desc *desc, const void *in) +static int sha1_neon_final(struct shash_desc *desc, u8 *out) { - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; + return sha1_neon_finup(desc, NULL, 0, out); } static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_neon_init, + .init = sha1_base_init, .update = sha1_neon_update, .final = sha1_neon_final, - .export = sha1_neon_export, - .import = sha1_neon_import, + .finup = sha1_neon_finup, .descsize = sizeof(struct sha1_state), - .statesize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name = "sha1-neon", diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..87ec11a5f405 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-core.S @@ -0,0 +1,125 @@ +/* + * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu crypto-neon-fp-armv8 + + k0 .req q7 + k1 .req q8 + rk .req r3 + + ta0 .req q9 + ta1 .req q10 + tb0 .req q10 + tb1 .req q9 + + dga .req q11 + dgb .req q12 + + dg0 .req q13 + dg1 .req q14 + dg2 .req q15 + + .macro add_only, ev, s0 + vmov dg2, dg0 + .ifnb \s0 + vld1.32 {k\ev}, [rk, :128]! + .endif + sha256h.32 dg0, dg1, tb\ev + sha256h2.32 dg1, dg2, tb\ev + .ifnb \s0 + vadd.u32 ta\ev, q\s0, k\ev + .endif + .endm + + .macro add_update, ev, s0, s1, s2, s3 + sha256su0.32 q\s0, q\s1 + add_only \ev, \s1 + sha256su1.32 q\s0, q\s2, q\s3 + .endm + + .align 6 +.Lsha256_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + /* + * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, + int blocks); + */ +ENTRY(sha2_ce_transform) + /* load state */ + vld1.32 {dga-dgb}, [r0] + + /* load input */ +0: vld1.32 {q0-q1}, [r1]! + vld1.32 {q2-q3}, [r1]! + subs r2, r2, #1 + +#ifndef CONFIG_CPU_BIG_ENDIAN + vrev32.8 q0, q0 + vrev32.8 q1, q1 + vrev32.8 q2, q2 + vrev32.8 q3, q3 +#endif + + /* load first round constant */ + adr rk, .Lsha256_rcon + vld1.32 {k0}, [rk, :128]! + + vadd.u32 ta0, q0, k0 + vmov dg0, dga + vmov dg1, dgb + + add_update 1, 0, 1, 2, 3 + add_update 0, 1, 2, 3, 0 + add_update 1, 2, 3, 0, 1 + add_update 0, 3, 0, 1, 2 + add_update 1, 0, 1, 2, 3 + add_update 0, 1, 2, 3, 0 + add_update 1, 2, 3, 0, 1 + add_update 0, 3, 0, 1, 2 + add_update 1, 0, 1, 2, 3 + add_update 0, 1, 2, 3, 0 + add_update 1, 2, 3, 0, 1 + add_update 0, 3, 0, 1, 2 + + add_only 1, 1 + add_only 0, 2 + add_only 1, 3 + add_only 0 + + /* update state */ + vadd.u32 dga, dga, dg0 + vadd.u32 dgb, dgb, dg1 + bne 0b + + /* store new state */ + vst1.32 {dga-dgb}, [r0] + bx lr +ENDPROC(sha2_ce_transform) diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..0755b2d657f3 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -0,0 +1,114 @@ +/* + * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha256_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/simd.h> +#include <asm/neon.h> +#include <asm/unaligned.h> + +#include "sha256_glue.h" + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src, + int blocks); + +static int sha2_ce_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + if (!may_use_simd() || + (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) + return crypto_sha256_arm_update(desc, data, len); + + kernel_neon_begin(); + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha2_ce_transform); + kernel_neon_end(); + + return 0; +} + +static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) + return crypto_sha256_arm_finup(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha2_ce_transform); + sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); + kernel_neon_end(); + + return sha256_base_finish(desc, out); +} + +static int sha2_ce_final(struct shash_desc *desc, u8 *out) +{ + return sha2_ce_finup(desc, NULL, 0, out); +} + +static struct shash_alg algs[] = { { + .init = sha224_base_init, + .update = sha2_ce_update, + .final = sha2_ce_final, + .finup = sha2_ce_finup, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha256_base_init, + .update = sha2_ce_update, + .final = sha2_ce_final, + .finup = sha2_ce_finup, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha2_ce_mod_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_SHA2)) + return -ENODEV; + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha2_ce_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(sha2_ce_mod_init); +module_exit(sha2_ce_mod_fini); diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 000000000000..fac0533ea633 --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl @@ -0,0 +1,716 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA256 block procedure for ARMv4. May 2007. + +# Performance is ~2x better than gcc 3.4 generated code and in "abso- +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +# byte [on single-issue Xscale PXA250 core]. + +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 22% improvement on +# Cortex A8 core and ~20 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; $t0="r0"; +$inp="r1"; $t4="r1"; +$len="r2"; $t1="r2"; +$T1="r3"; $t3="r3"; +$A="r4"; +$B="r5"; +$C="r6"; +$D="r7"; +$E="r8"; +$F="r9"; +$G="r10"; +$H="r11"; +@V=($A,$B,$C,$D,$E,$F,$G,$H); +$t2="r12"; +$Ktbl="r14"; + +@Sigma0=( 2,13,22); +@Sigma1=( 6,11,25); +@sigma0=( 7,18, 3); +@sigma1=(17,19,10); + +sub BODY_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___ if ($i<16); +#if __ARM_ARCH__>=7 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +# ifndef __ARMEB__ + rev $t1,$t1 +# endif +#else + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + ldrb $t2,[$inp,#2] + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +#endif +___ +$code.=<<___; + ldr $t2,[$Ktbl],#4 @ *K256++ + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] + eor $t1,$f,$g + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) + and $t1,$t1,$e + add $h,$h,$t2 @ h+=K256[i] + eor $t1,$t1,$g @ Ch(e,f,g) + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? +#endif +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) +___ + ($t2,$t3)=($t3,$t2); +} + +sub BODY_16_XX { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___; + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] +___ + &BODY_00_15(@_); +} + +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} + ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} + sub $Ktbl,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 +___ +for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=".Lrounds_16_xx:\n"; +for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] + add $A,$A,$t0 + ldr $t0,[$t3,#12] + add $B,$B,$t1 + ldr $t1,[$t3,#16] + add $C,$C,$t2 + ldr $t2,[$t3,#20] + add $D,$D,$t0 + ldr $t0,[$t3,#24] + add $E,$E,$t1 + ldr $t1,[$t3,#28] + add $F,$F,$t2 + ldr $inp,[sp,#17*4] @ pull inp + ldr $t2,[sp,#18*4] @ pull inp+len + add $G,$G,$t0 + add $H,$H,$t1 + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} + cmp $inp,$t2 + sub $Ktbl,$Ktbl,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#`16+3`*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub $H,sp,#16*4+16 + adrl $Ktbl,K256 + bic $H,$H,#15 @ align for 128-bit stores + mov $t2,sp + mov sp,$H @ alloca + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + it eq + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + it ne + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + ittte ne + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] +# ifdef __thumb2__ + adr $Ktbl,.LARMv8 + sub $Ktbl,$Ktbl,#.LARMv8-K256 +# else + adrl $Ktbl,K256 +# endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + it ne + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 000000000000..555a1a8eec90 --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -0,0 +1,2808 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub r11,sp,#16*4+16 + adrl r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..a84e869ef900 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c @@ -0,0 +1,128 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using optimized ARM assembler and NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha256_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <crypto/sha256_base.h> +#include <asm/simd.h> +#include <asm/neon.h> + +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + +int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + /* make sure casting to sha256_block_fn() is safe */ + BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); + + return sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); +} +EXPORT_SYMBOL(crypto_sha256_arm_update); + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + return sha256_base_finish(desc, out); +} + +int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + return sha256_final(desc, out); +} +EXPORT_SYMBOL(crypto_sha256_arm_finup); + +static struct shash_alg algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = crypto_sha256_arm_update, + .final = sha256_final, + .finup = crypto_sha256_arm_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = crypto_sha256_arm_update, + .final = sha256_final, + .finup = crypto_sha256_arm_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha256_mod_init(void) +{ + int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); + + if (res < 0) + return res; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + res = crypto_register_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); + + if (res < 0) + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + } + + return res; +} + +static void __exit sha256_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); +} + +module_init(sha256_mod_init); +module_exit(sha256_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); + +MODULE_ALIAS_CRYPTO("sha256"); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 000000000000..7cf0bf786ada --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h @@ -0,0 +1,14 @@ +#ifndef _CRYPTO_SHA256_GLUE_H +#define _CRYPTO_SHA256_GLUE_H + +#include <linux/crypto.h> + +extern struct shash_alg sha256_neon_algs[2]; + +int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *hash); + +#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c new file mode 100644 index 000000000000..39ccd658817e --- /dev/null +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -0,0 +1,101 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha512_neon_glue.c: + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <crypto/sha256_base.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> + +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, + unsigned int num_blks); + +static int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + if (!may_use_simd() || + (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) + return crypto_sha256_arm_update(desc, data, len); + + kernel_neon_begin(); + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order_neon); + kernel_neon_end(); + + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) + return crypto_sha256_arm_finup(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order_neon); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order_neon); + kernel_neon_end(); + + return sha256_base_finish(desc, out); +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + return sha256_finup(desc, NULL, 0, out); +} + +struct shash_alg sha256_neon_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index fe74c0d1e485..eb0f43f3e3f1 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,6 +1,5 @@ -generic-y += auxvec.h generic-y += bitsperlong.h generic-y += cputime.h generic-y += current.h diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index f67fd3afebdf..186270b3e194 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -237,6 +237,9 @@ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ 9997: instr ;\ + .if . - 9997b == 2 ;\ + nop ;\ + .endif ;\ .if . - 9997b != 4 ;\ .error "ALT_UP() content must assemble to exactly 4 bytes";\ .endif ;\ diff --git a/arch/arm/include/asm/auxvec.h b/arch/arm/include/asm/auxvec.h new file mode 100644 index 000000000000..fbd388c46299 --- /dev/null +++ b/arch/arm/include/asm/auxvec.h @@ -0,0 +1 @@ +#include <uapi/asm/auxvec.h> diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index af319ac4960c..0f8424924902 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARM_CPUIDLE_H #define __ASM_ARM_CPUIDLE_H +#include <asm/proc-fns.h> + #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); @@ -25,4 +27,25 @@ static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, */ #define ARM_CPUIDLE_WFI_STATE ARM_CPUIDLE_WFI_STATE_PWR(UINT_MAX) +struct device_node; + +struct cpuidle_ops { + int (*suspend)(int cpu, unsigned long arg); + int (*init)(struct device_node *, int cpu); +}; + +struct of_cpuidle_method { + const char *method; + struct cpuidle_ops *ops; +}; + +#define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \ + static const struct of_cpuidle_method __cpuidle_method_of_table_##name \ + __used __section(__cpuidle_method_of_table) \ + = { .method = _method, .ops = _ops } + +extern int arm_cpuidle_suspend(int index); + +extern int arm_cpuidle_init(int cpu); + #endif diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 819777d0e91f..85e374f873ac 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -253,4 +253,20 @@ static inline int cpu_is_pj4(void) #else #define cpu_is_pj4() 0 #endif + +static inline int __attribute_const__ cpuid_feature_extract_field(u32 features, + int field) +{ + int feature = (features >> field) & 15; + + /* feature registers are signed values */ + if (feature > 8) + feature -= 16; + + return feature; +} + +#define cpuid_feature_extract(reg, field) \ + cpuid_feature_extract_field(read_cpuid_ext(reg), field) + #endif diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index afb9cafd3786..d2315ffd8f12 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -1,7 +1,9 @@ #ifndef __ASMARM_ELF_H #define __ASMARM_ELF_H +#include <asm/auxvec.h> #include <asm/hwcap.h> +#include <asm/vdso_datapage.h> /* * ELF register definitions.. @@ -115,7 +117,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we @@ -125,11 +127,14 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); extern void elf_set_personality(const struct elf32_hdr *); #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #ifdef CONFIG_MMU +#ifdef CONFIG_VDSO +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (elf_addr_t)current->mm->context.vdso); \ +} while (0) +#endif #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; int arch_setup_additional_pages(struct linux_binprm *, int); diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 53e69dae796f..4e78065a16aa 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -13,7 +13,7 @@ " .align 3\n" \ " .long 1b, 4f, 2b, 4f\n" \ " .popsection\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, " err_reg "\n" \ " b 3b\n" \ diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 70f9b9bfb1f9..5f337dc5c108 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_ARM_JUMP_LABEL_H #define _ASM_ARM_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/types.h> @@ -27,8 +27,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u32 jump_label_t; struct jump_entry { @@ -37,4 +35,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 816db0bf2dd8..d995821f1698 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -185,6 +185,7 @@ #define HSR_COND (0xfU << HSR_COND_SHIFT) #define FSC_FAULT (0x04) +#define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 41008cd7c53f..d71607c16601 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -27,6 +27,8 @@ #include <asm/fpstate.h> #include <kvm/arm_arch_timer.h> +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else @@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2f6cf0..d8e90c8cb5fa 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 018760675b56..405aa1883307 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#define kvm_pgd_index(addr) pgd_index(addr) + static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); return page_count(ptr_page) == 1; } - #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) #define kvm_pud_table_empty(kvm, pudp) (0) #define KVM_PREALLOC_LEVEL 0 -static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) +static inline void *kvm_get_hwpgd(struct kvm *kvm) { - return 0; + return kvm->arch.pgd; } -static inline void kvm_free_hwpgd(struct kvm *kvm) { } - -static inline void *kvm_get_hwpgd(struct kvm *kvm) +static inline unsigned int kvm_get_hwpgd_size(void) { - return kvm->arch.pgd; + return PTRS_PER_S2_PGD * sizeof(pgd_t); } struct kvm; diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 90c12e1e695c..0f79e4dec7f9 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -12,8 +12,7 @@ extern void timer_tick(void); -struct timespec; -typedef void (*clock_access_fn)(struct timespec *); +typedef void (*clock_access_fn)(struct timespec64 *); extern int register_persistent_clock(clock_access_fn read_boot, clock_access_fn read_persistent); diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 64fd15159b7d..a5b47421059d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -11,6 +11,9 @@ typedef struct { #endif unsigned int vmalloc_seq; unsigned long sigpage; +#ifdef CONFIG_VDSO + unsigned long vdso; +#endif } mm_context_t; #ifdef CONFIG_CPU_HAS_ASID diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index b1596bd59129..675e4ab79f68 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -92,6 +92,7 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 0ad7d490ee6f..993e5224d8f7 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -104,6 +104,7 @@ static inline u32 mpidr_hash_size(void) return 1 << mpidr_hash.bits; } +extern int platform_can_secondary_boot(void); extern int platform_can_cpu_hotplug(void); #endif diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 72812a1f3d1c..bd32eded3e50 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -23,7 +23,6 @@ #ifndef __ASSEMBLY__ struct task_struct; -struct exec_domain; #include <asm/types.h> #include <asm/domain.h> @@ -53,7 +52,6 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ - struct exec_domain *exec_domain; /* execution domain */ __u32 cpu; /* cpu */ __u32 cpu_domain; /* cpu domain */ struct cpu_context_save cpu_context; /* cpu context */ @@ -73,7 +71,6 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ - .exec_domain = &default_exec_domain, \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index ce0786efd26c..74b17d09ef7a 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -315,7 +315,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(ldrb) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " mov %1, #0\n" \ @@ -351,7 +351,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(ldr) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " mov %1, #0\n" \ @@ -397,7 +397,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(strb) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " b 2b\n" \ @@ -430,7 +430,7 @@ do { \ __asm__ __volatile__( \ "1: " TUSER(str) " %1,[%2],#0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %3\n" \ " b 2b\n" \ @@ -458,7 +458,7 @@ do { \ THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, %3\n" \ " b 3b\n" \ diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index b88beaba6b4a..200f9a7cd623 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -24,6 +24,14 @@ .syntax unified #endif +#ifdef CONFIG_CPU_V7M +#define AR_CLASS(x...) +#define M_CLASS(x...) x +#else +#define AR_CLASS(x...) x +#define M_CLASS(x...) +#endif + #ifdef CONFIG_THUMB2_KERNEL #if __GNUC__ < 4 diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h new file mode 100644 index 000000000000..d0295f1dd1a3 --- /dev/null +++ b/arch/arm/include/asm/vdso.h @@ -0,0 +1,32 @@ +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +struct mm_struct; + +#ifdef CONFIG_VDSO + +void arm_install_vdso(struct mm_struct *mm, unsigned long addr); + +extern char vdso_start, vdso_end; + +extern unsigned int vdso_total_pages; + +#else /* CONFIG_VDSO */ + +static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr) +{ +} + +#define vdso_total_pages 0 + +#endif /* CONFIG_VDSO */ + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_H */ diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h new file mode 100644 index 000000000000..9be259442fca --- /dev/null +++ b/arch/arm/include/asm/vdso_datapage.h @@ -0,0 +1,60 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_VDSO_DATAPAGE_H +#define __ASM_VDSO_DATAPAGE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +#include <asm/page.h> + +/* Try to be cache-friendly on systems that don't implement the + * generic timer: fit the unconditionally updated fields in the first + * 32 bytes. + */ +struct vdso_data { + u32 seq_count; /* sequence count - odd during updates */ + u16 tk_is_cntvct; /* fall back to syscall if false */ + u16 cs_shift; /* clocksource shift */ + u32 xtime_coarse_sec; /* coarse time */ + u32 xtime_coarse_nsec; + + u32 wtm_clock_sec; /* wall to monotonic offset */ + u32 wtm_clock_nsec; + u32 xtime_clock_sec; /* CLOCK_REALTIME - seconds */ + u32 cs_mult; /* clocksource multiplier */ + + u64 cs_cycle_last; /* last cycle value */ + u64 cs_mask; /* clocksource mask */ + + u64 xtime_clock_snsec; /* CLOCK_REALTIME sub-ns base */ + u32 tz_minuteswest; /* timezone info for gettimeofday(2) */ + u32 tz_dsttime; +}; + +union vdso_data_store { + struct vdso_data data; + u8 page[PAGE_SIZE]; +}; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index a6d0a29861e7..5831dce4b51c 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -71,7 +71,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) asm( "1: ldr %0, [%2]\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" + " .pushsection .text.fixup,\"ax\"\n" " .align 2\n" "3: and %1, %2, #0x3\n" " bic %2, %2, #0x3\n" diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 70a1c9da30ca..a1c05f93d920 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += auxvec.h header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h diff --git a/arch/arm/include/uapi/asm/auxvec.h b/arch/arm/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..cb02a767a500 --- /dev/null +++ b/arch/arm/include/uapi/asm/auxvec.h @@ -0,0 +1,7 @@ +#ifndef __ASM_AUXVEC_H +#define __ASM_AUXVEC_H + +/* VDSO location */ +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 0db25bc32864..2499867dd0d8 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -198,6 +198,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 902397dd1000..752725dcbf42 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o \ + process.o ptrace.o reboot.o return_address.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o @@ -34,7 +34,6 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o -obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o @@ -75,6 +74,7 @@ obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o +obj-$(CONFIG_VDSO) += vdso.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o @@ -86,7 +86,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o +obj-y += psci.o psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/arthur.c b/arch/arm/kernel/arthur.c deleted file mode 100644 index 321c5291d05f..000000000000 --- a/arch/arm/kernel/arthur.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * linux/arch/arm/kernel/arthur.c - * - * Copyright (C) 1998, 1999, 2000, 2001 Philip Blundell - * - * Arthur personality - */ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/personality.h> -#include <linux/stddef.h> -#include <linux/signal.h> -#include <linux/init.h> -#include <linux/sched.h> - -#include <asm/ptrace.h> - -/* Arthur doesn't have many signals, and a lot of those that it does - have don't map easily to any Linux equivalent. Never mind. */ - -#define ARTHUR_SIGABRT 1 -#define ARTHUR_SIGFPE 2 -#define ARTHUR_SIGILL 3 -#define ARTHUR_SIGINT 4 -#define ARTHUR_SIGSEGV 5 -#define ARTHUR_SIGTERM 6 -#define ARTHUR_SIGSTAK 7 -#define ARTHUR_SIGUSR1 8 -#define ARTHUR_SIGUSR2 9 -#define ARTHUR_SIGOSERROR 10 - -static unsigned long arthur_to_linux_signals[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 -}; - -static unsigned long linux_to_arthur_signals[32] = { - 0, -1, ARTHUR_SIGINT, -1, - ARTHUR_SIGILL, 5, ARTHUR_SIGABRT, 7, - ARTHUR_SIGFPE, 9, ARTHUR_SIGUSR1, ARTHUR_SIGSEGV, - ARTHUR_SIGUSR2, 13, 14, ARTHUR_SIGTERM, - 16, 17, 18, 19, - 20, 21, 22, 23, - 24, 25, 26, 27, - 28, 29, 30, 31 -}; - -static void arthur_lcall7(int nr, struct pt_regs *regs) -{ - struct siginfo info; - info.si_signo = SIGSWI; - info.si_errno = nr; - /* Bounce it to the emulator */ - send_sig_info(SIGSWI, &info, current); -} - -static struct exec_domain arthur_exec_domain = { - .name = "Arthur", - .handler = arthur_lcall7, - .pers_low = PER_RISCOS, - .pers_high = PER_RISCOS, - .signal_map = arthur_to_linux_signals, - .signal_invmap = linux_to_arthur_signals, - .module = THIS_MODULE, -}; - -/* - * We could do with some locking to stop Arthur being removed while - * processes are using it. - */ - -static int __init arthur_init(void) -{ - return register_exec_domain(&arthur_exec_domain); -} - -static void __exit arthur_exit(void) -{ - unregister_exec_domain(&arthur_exec_domain); -} - -module_init(arthur_init); -module_exit(arthur_exit); - -MODULE_LICENSE("GPL"); diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d6087b9b1..871b8267d211 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -25,6 +25,7 @@ #include <asm/memory.h> #include <asm/procinfo.h> #include <asm/suspend.h> +#include <asm/vdso_datapage.h> #include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> @@ -66,7 +67,6 @@ int main(void) DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); @@ -190,7 +190,6 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); -#ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); @@ -200,15 +199,16 @@ int main(void) DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); -#ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); -#endif DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); -#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif + BLANK(); +#ifdef CONFIG_VDSO + DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); +#endif return 0; } diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index ab19b7c03423..fcbbbb1b9e95 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -618,21 +618,15 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { - struct pci_sys_data *root = dev->sysdata; - unsigned long phys; - - if (mmap_state == pci_mmap_io) { + if (mmap_state == pci_mmap_io) return -EINVAL; - } else { - phys = vma->vm_pgoff + (root->mem_offset >> PAGE_SHIFT); - } /* * Mark this as IO */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vma->vm_start, phys, + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 89545f6c8403..318da33465f4 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -10,8 +10,28 @@ */ #include <linux/cpuidle.h> -#include <asm/proc-fns.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <asm/cpuidle.h> +extern struct of_cpuidle_method __cpuidle_method_of_table[]; + +static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel + __used __section(__cpuidle_method_of_table_end); + +static struct cpuidle_ops cpuidle_ops[NR_CPUS]; + +/** + * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle() + * @dev: not used + * @drv: not used + * @index: not used + * + * A trivial wrapper to allow the cpu_do_idle function to be assigned as a + * cpuidle callback by matching the function signature. + * + * Returns the index passed as parameter + */ int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { @@ -19,3 +39,114 @@ int arm_cpuidle_simple_enter(struct cpuidle_device *dev, return index; } + +/** + * arm_cpuidle_suspend() - function to enter low power idle states + * @index: an integer used as an identifier for the low level PM callbacks + * + * This function calls the underlying arch specific low level PM code as + * registered at the init time. + * + * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the + * callback otherwise. + */ +int arm_cpuidle_suspend(int index) +{ + int ret = -EOPNOTSUPP; + int cpu = smp_processor_id(); + + if (cpuidle_ops[cpu].suspend) + ret = cpuidle_ops[cpu].suspend(cpu, index); + + return ret; +} + +/** + * arm_cpuidle_get_ops() - find a registered cpuidle_ops by name + * @method: the method name + * + * Search in the __cpuidle_method_of_table array the cpuidle ops matching the + * method name. + * + * Returns a struct cpuidle_ops pointer, NULL if not found. + */ +static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method) +{ + struct of_cpuidle_method *m = __cpuidle_method_of_table; + + for (; m->method; m++) + if (!strcmp(m->method, method)) + return m->ops; + + return NULL; +} + +/** + * arm_cpuidle_read_ops() - Initialize the cpuidle ops with the device tree + * @dn: a pointer to a struct device node corresponding to a cpu node + * @cpu: the cpu identifier + * + * Get the method name defined in the 'enable-method' property, retrieve the + * associated cpuidle_ops and do a struct copy. This copy is needed because all + * cpuidle_ops are tagged __initdata and will be unloaded after the init + * process. + * + * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if + * no cpuidle_ops is registered for the 'enable-method'. + */ +static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method; + struct cpuidle_ops *ops; + + enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) + return -ENOENT; + + ops = arm_cpuidle_get_ops(enable_method); + if (!ops) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + cpuidle_ops[cpu] = *ops; /* structure copy */ + + pr_notice("cpuidle: enable-method property '%s'" + " found operations\n", enable_method); + + return 0; +} + +/** + * arm_cpuidle_init() - Initialize cpuidle_ops for a specific cpu + * @cpu: the cpu to be initialized + * + * Initialize the cpuidle ops with the device for the cpu and then call + * the cpu's idle initialization callback. This may fail if the underlying HW + * is not operational. + * + * Returns: + * 0 on success, + * -ENODEV if it fails to find the cpu node in the device tree, + * -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu, + * -ENOENT if it fails to find an 'enable-method' property, + * -ENXIO if the HW reports a failure or a misconfiguration, + * -ENOMEM if the HW report an memory allocation failure + */ +int __init arm_cpuidle_init(int cpu) +{ + struct device_node *cpu_node = of_cpu_device_node_get(cpu); + int ret; + + if (!cpu_node) + return -ENODEV; + + ret = arm_cpuidle_read_ops(cpu_node, cpu); + if (!ret && cpuidle_ops[cpu].init) + ret = cpuidle_ops[cpu].init(cpu_node, cpu); + + of_node_put(cpu_node); + + return ret; +} diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 672b21942fff..570306c49406 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -545,7 +545,7 @@ ENDPROC(__und_usr) /* * The out of line fixup for the ldrt instructions above. */ - .pushsection .fixup, "ax" + .pushsection .text.fixup, "ax" .align 2 4: str r4, [sp, #S_PC] @ retry current instruction ret r9 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 01963273c07a..3637973a9708 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -138,9 +138,9 @@ ENTRY(stext) @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address mov r8, r4 @ set TTBR1 to swapper_pg_dir - ARM( add pc, r10, #PROCINFO_INITFUNC ) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 + ret r12 1: b __enable_mmu ENDPROC(stext) .ltorg @@ -386,10 +386,10 @@ ENTRY(secondary_startup) ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address - ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor - @ (return control reg) - THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( ret r12 ) + ldr r12, [r10, #PROCINFO_INITFUNC] + add r12, r12, r10 @ initialise processor + @ (return control reg) + ret r12 ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c index c4cc50e58c13..a71501ff6f18 100644 --- a/arch/arm/kernel/hibernate.c +++ b/arch/arm/kernel/hibernate.c @@ -22,6 +22,7 @@ #include <asm/suspend.h> #include <asm/memory.h> #include <asm/sections.h> +#include "reboot.h" int pfn_is_nosave(unsigned long pfn) { @@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused) ret = swsusp_save(); if (ret == 0) - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); return ret; } @@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused) for (pbe = restore_pblist; pbe; pbe = pbe->next) copy_page(pbe->orig_address, pbe->address); - soft_restart(virt_to_phys(cpu_resume)); + _soft_restart(virt_to_phys(cpu_resume), false); } static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; @@ -99,7 +100,6 @@ static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; */ int swsusp_arch_resume(void) { - extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); call_with_stack(arch_restore_image, 0, resume_stack + ARRAY_SIZE(resume_stack)); return 0; diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7fc70ae21185..dc7d0a95bd36 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * Per-cpu breakpoints are not supported by our stepping * mechanism. */ - if (!bp->hw.bp_target) + if (!bp->hw.target) return -EINVAL; /* diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index de2b085ad753..8bf3b7c09888 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -46,7 +46,8 @@ int machine_kexec_prepare(struct kimage *image) * and implements CPU hotplug for the current HW. If not, we won't be * able to kexec reliably, so fail the prepare operation. */ - if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug()) + if (num_possible_cpus() > 1 && platform_can_secondary_boot() && + !platform_can_cpu_hotplug()) return -EINVAL; /* diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 2e11961f65ae..af791f4a6205 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -98,14 +98,19 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + if (sym->st_value & 3) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (ARM -> Thumb)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + offset = __mem_to_opcode_arm(*(u32 *)loc); offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; offset += sym->st_value - loc; - if (offset & 3 || - offset <= (s32)0xfe000000 || + if (offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, @@ -155,6 +160,22 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: + /* + * For function symbols, only Thumb addresses are + * allowed (no interworking). + * + * For non-function symbols, the destination + * has no specific ARM/Thumb disposition, so + * the branch is resolved under the assumption + * that interworking is not required. + */ + if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC && + !(sym->st_value & 1)) { + pr_err("%s: section %u reloc %u sym '%s': unsupported interworking call (Thumb -> ARM)\n", + module->name, relindex, i, symname); + return -ENOEXEC; + } + upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -182,18 +203,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; - /* - * For function symbols, only Thumb addresses are - * allowed (no interworking). - * - * For non-function symbols, the destination - * has no specific ARM/Thumb disposition, so - * the branch is resolved under the assumption - * that interworking is not required. - */ - if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC && - !(offset & 1)) || - offset <= (s32)0xff000000 || + if (offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", module->name, relindex, i, symname, diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 557e128e4df0..4a86a0133ac3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -259,20 +259,29 @@ out: } static int -validate_event(struct pmu_hw_events *hw_events, - struct perf_event *event) +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) { - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct arm_pmu *armpmu; if (is_software_event(event)) return 1; + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + if (event->state < PERF_EVENT_STATE_OFF) return 1; if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; + armpmu = to_arm_pmu(event->pmu); return armpmu->get_event_idx(hw_events, event) >= 0; } @@ -288,15 +297,15 @@ validate_group(struct perf_event *event) */ memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - if (!validate_event(&fake_pmu, leader)) + if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) + if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; } - if (!validate_event(&fake_pmu, event)) + if (!validate_event(event->pmu, &fake_pmu, event)) return -EINVAL; return 0; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 61b53c46edfa..91c7ba182dcd 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -92,11 +92,16 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) free_percpu_irq(irq, &hw_events->percpu_pmu); } else { for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i)); + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); } } } @@ -128,32 +133,37 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); } else { for (i = 0; i < irqs; ++i) { + int cpu = i; + err = 0; irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + /* * If we have a single PMU interrupt that we can't shift, * assume that we're running on a uniprocessor machine and * continue. Otherwise, continue without this interrupt. */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); + irq, cpu); continue; } err = request_irq(irq, handler, IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, i)); + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", irq); return err; } - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); } } @@ -243,6 +253,8 @@ static const struct of_device_id cpu_pmu_of_device_ids[] = { {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {.compatible = "qcom,scorpion-pmu", .data = scorpion_pmu_init}, + {.compatible = "qcom,scorpion-mp-pmu", .data = scorpion_mp_pmu_init}, {}, }; @@ -289,6 +301,48 @@ static int probe_current_pmu(struct arm_pmu *pmu) return ret; } +static int of_pmu_irq_cfg(struct platform_device *pdev) +{ + int i; + int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + + if (!irqs) + return -ENOMEM; + + for (i = 0; i < pdev->num_resources; ++i) { + struct device_node *dn; + int cpu; + + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", + i); + if (!dn) { + pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", + of_node_full_name(dn), i); + break; + } + + for_each_possible_cpu(cpu) + if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + break; + + of_node_put(dn); + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + break; + } + + irqs[i] = cpu; + } + + if (i == pdev->num_resources) + cpu_pmu->irq_affinity = irqs; + else + kfree(irqs); + + return 0; +} + static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; @@ -313,7 +367,10 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { init_fn = of_id->data; - ret = init_fn(pmu); + + ret = of_pmu_irq_cfg(pdev); + if (!ret) + ret = init_fn(pmu); } else { ret = probe_current_pmu(pmu); } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 8993770c47de..f4207a4dcb01 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -140,6 +140,23 @@ enum krait_perf_types { KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, }; +/* ARMv7 Scorpion specific event types */ +enum scorpion_perf_types { + SCORPION_LPM0_GROUP0 = 0x4c, + SCORPION_LPM1_GROUP0 = 0x50, + SCORPION_LPM2_GROUP0 = 0x54, + SCORPION_L2LPM_GROUP0 = 0x58, + SCORPION_VLPM_GROUP0 = 0x5c, + + SCORPION_ICACHE_ACCESS = 0x10053, + SCORPION_ICACHE_MISS = 0x10052, + + SCORPION_DTLB_ACCESS = 0x12013, + SCORPION_DTLB_MISS = 0x12012, + + SCORPION_ITLB_MISS = 0x12021, +}; + /* * Cortex-A8 HW events mapping * @@ -482,6 +499,49 @@ static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] }; /* + * Scorpion HW events mapping + */ +static const unsigned scorpion_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + /* + * The performance counters don't differentiate between read and write + * accesses/misses so this isn't strictly correct, but it's the best we + * can do. Writes and reads get combined. + */ + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ICACHE_MISS, + /* + * Only ITLB misses and DTLB refills are supported. If users want the + * DTLB refills misses a raw counter must be used. + */ + [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = SCORPION_DTLB_ACCESS, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_DTLB_MISS, + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = SCORPION_ITLB_MISS, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +/* * Perf Events' indices */ #define ARMV7_IDX_CYCLE_COUNTER 0 @@ -976,6 +1036,12 @@ static int krait_map_event_no_branch(struct perf_event *event) &krait_perf_cache_map, 0xFFFFF); } +static int scorpion_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &scorpion_perf_map, + &scorpion_perf_cache_map, 0xFFFFF); +} + static void armv7pmu_init(struct arm_pmu *cpu_pmu) { cpu_pmu->handle_irq = armv7pmu_handle_irq; @@ -1103,6 +1169,12 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) #define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) #define PMRESRn_EN BIT(31) +#define EVENT_REGION(event) (((event) >> 12) & 0xf) /* R */ +#define EVENT_GROUP(event) ((event) & 0xf) /* G */ +#define EVENT_CODE(event) (((event) >> 4) & 0xff) /* CC */ +#define EVENT_VENUM(event) (!!(event & VENUM_EVENT)) /* N=2 */ +#define EVENT_CPU(event) (!!(event & KRAIT_EVENT)) /* N=1 */ + static u32 krait_read_pmresrn(int n) { u32 val; @@ -1141,19 +1213,19 @@ static void krait_write_pmresrn(int n, u32 val) } } -static u32 krait_read_vpmresr0(void) +static u32 venum_read_pmresr(void) { u32 val; asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); return val; } -static void krait_write_vpmresr0(u32 val) +static void venum_write_pmresr(u32 val) { asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); } -static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) +static void venum_pre_pmresr(u32 *venum_orig_val, u32 *fp_orig_val) { u32 venum_new_val; u32 fp_new_val; @@ -1170,7 +1242,7 @@ static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) fmxr(FPEXC, fp_new_val); } -static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val) +static void venum_post_pmresr(u32 venum_orig_val, u32 fp_orig_val) { BUG_ON(preemptible()); /* Restore FPEXC */ @@ -1193,16 +1265,11 @@ static void krait_evt_setup(int idx, u32 config_base) u32 val; u32 mask; u32 vval, fval; - unsigned int region; - unsigned int group; - unsigned int code; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); unsigned int group_shift; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - code = (config_base >> 4) & 0xff; - group = (config_base >> 0) & 0xf; + bool venum_event = EVENT_VENUM(config_base); group_shift = group * 8; mask = 0xff << group_shift; @@ -1217,16 +1284,14 @@ static void krait_evt_setup(int idx, u32 config_base) val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); armv7_pmnc_write_evtsel(idx, val); - asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); - if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); val &= ~mask; val |= code << group_shift; val |= PMRESRn_EN; - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); val &= ~mask; @@ -1236,7 +1301,7 @@ static void krait_evt_setup(int idx, u32 config_base) } } -static u32 krait_clear_pmresrn_group(u32 val, int group) +static u32 clear_pmresrn_group(u32 val, int group) { u32 mask; int group_shift; @@ -1256,23 +1321,19 @@ static void krait_clearpmu(u32 config_base) { u32 val; u32 vval, fval; - unsigned int region; - unsigned int group; - bool venum_event; - - venum_event = !!(config_base & VENUM_EVENT); - region = (config_base >> 12) & 0xf; - group = (config_base >> 0) & 0xf; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); if (venum_event) { - krait_pre_vpmresr0(&vval, &fval); - val = krait_read_vpmresr0(); - val = krait_clear_pmresrn_group(val, group); - krait_write_vpmresr0(val); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); } else { val = krait_read_pmresrn(region); - val = krait_clear_pmresrn_group(val, group); + val = clear_pmresrn_group(val, group); krait_write_pmresrn(region, val); } } @@ -1342,6 +1403,8 @@ static void krait_pmu_enable_event(struct perf_event *event) static void krait_pmu_reset(void *info) { u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; armv7pmu_reset(info); @@ -1350,9 +1413,16 @@ static void krait_pmu_reset(void *info) krait_write_pmresrn(1, 0); krait_write_pmresrn(2, 0); - krait_pre_vpmresr0(&vval, &fval); - krait_write_vpmresr0(0); - krait_post_vpmresr0(vval, fval); + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } + } static int krait_event_to_bit(struct perf_event *event, unsigned int region, @@ -1386,26 +1456,18 @@ static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, { int idx; int bit = -1; - unsigned int prefix; - unsigned int region; - unsigned int code; - unsigned int group; - bool krait_event; struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int code = EVENT_CODE(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - region = (hwc->config_base >> 12) & 0xf; - code = (hwc->config_base >> 4) & 0xff; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); - - if (krait_event) { + if (venum_event || krait_event) { /* Ignore invalid events */ if (group > 3 || region > 2) return -EINVAL; - prefix = hwc->config_base & KRAIT_EVENT_MASK; - if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT) - return -EINVAL; - if (prefix == VENUM_EVENT && (code & 0xe0)) + if (venum_event && (code & 0xe0)) return -EINVAL; bit = krait_event_to_bit(event, region, group); @@ -1425,15 +1487,12 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, { int bit; struct hw_perf_event *hwc = &event->hw; - unsigned int region; - unsigned int group; - bool krait_event; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool krait_event = EVENT_CPU(hwc->config_base); - region = (hwc->config_base >> 12) & 0xf; - group = (hwc->config_base >> 0) & 0xf; - krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); - - if (krait_event) { + if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); } @@ -1458,6 +1517,344 @@ static int krait_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; return 0; } + +/* + * Scorpion Local Performance Monitor Register (LPMn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * LPM0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * LPM1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * LPM2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * L2LPM | EN | CC | CC | CC | CC | N = 1, R = 3 + * +--------------------------------+ + * VLPM | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Scorpion CPU (LPMn/L2LPM), 2 for Venum VFP (VLPM) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Scorpion CPU event in LPM2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +static u32 scorpion_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + break; + case 3: + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } + + return val; +} + +static void scorpion_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); + break; + case 3: + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in scorpion_pmu_get_event_idx() */ + } +} + +static u32 scorpion_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { SCORPION_LPM0_GROUP0, + SCORPION_LPM1_GROUP0, + SCORPION_LPM2_GROUP0, + SCORPION_L2LPM_GROUP0 }; + return pmresrn_table[region]; +} + +static void scorpion_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + unsigned int code = EVENT_CODE(config_base); + unsigned int group_shift; + bool venum_event = EVENT_VENUM(config_base); + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = SCORPION_VLPM_GROUP0; + else + val = scorpion_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region = EVENT_REGION(config_base); + unsigned int group = EVENT_GROUP(config_base); + bool venum_event = EVENT_VENUM(config_base); + + if (venum_event) { + venum_pre_pmresr(&vval, &fval); + val = venum_read_pmresr(); + val = clear_pmresrn_group(val, group); + venum_write_pmresr(val); + venum_post_pmresr(vval, fval); + } else { + val = scorpion_read_pmresrn(region); + val = clear_pmresrn_group(val, group); + scorpion_write_pmresrn(region, val); + } +} + +static void scorpion_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* Disable counter and interrupt */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void scorpion_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't set the event for the cycle counter because we + * don't have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + scorpion_evt_setup(idx, hwc->config_base); + else if (idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void scorpion_pmu_reset(void *info) +{ + u32 vval, fval; + struct arm_pmu *cpu_pmu = info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + scorpion_write_pmresrn(0, 0); + scorpion_write_pmresrn(1, 0); + scorpion_write_pmresrn(2, 0); + scorpion_write_pmresrn(3, 0); + + venum_pre_pmresr(&vval, &fval); + venum_write_pmresr(0); + venum_post_pmresr(vval, fval); + + /* Reset PMxEVNCTCR to sane default */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_select_counter(idx); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + } +} + +static int scorpion_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = SCORPION_VLPM_GROUP0; + else + bit = scorpion_get_pmresrn_event(region); + bit -= scorpion_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int scorpion_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + /* Ignore invalid events */ + if (group > 3 || region > 3) + return -EINVAL; + + bit = scorpion_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region = EVENT_REGION(hwc->config_base); + unsigned int group = EVENT_GROUP(hwc->config_base); + bool venum_event = EVENT_VENUM(hwc->config_base); + bool scorpion_event = EVENT_CPU(hwc->config_base); + + if (venum_event || scorpion_event) { + bit = scorpion_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return 0; +} + +static int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "armv7_scorpion_mp"; + cpu_pmu->map_event = scorpion_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->reset = scorpion_pmu_reset; + cpu_pmu->enable = scorpion_pmu_enable_event; + cpu_pmu->disable = scorpion_pmu_disable_event; + cpu_pmu->get_event_idx = scorpion_pmu_get_event_idx; + cpu_pmu->clear_event_idx = scorpion_pmu_clear_event_idx; + return 0; +} #else static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { @@ -1498,4 +1895,14 @@ static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) { return -ENODEV; } + +static inline int scorpion_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int scorpion_mp_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fdfa3a78ec8c..f192a2a41719 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -17,12 +17,9 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/user.h> -#include <linux/delay.h> -#include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/init.h> -#include <linux/cpu.h> #include <linux/elfcore.h> #include <linux/pm.h> #include <linux/tick.h> @@ -31,16 +28,14 @@ #include <linux/random.h> #include <linux/hw_breakpoint.h> #include <linux/leds.h> -#include <linux/reboot.h> -#include <asm/cacheflush.h> -#include <asm/idmap.h> #include <asm/processor.h> #include <asm/thread_notify.h> #include <asm/stacktrace.h> #include <asm/system_misc.h> #include <asm/mach/time.h> #include <asm/tls.h> +#include <asm/vdso.h> #ifdef CONFIG_CC_STACKPROTECTOR #include <linux/stackprotector.h> @@ -59,69 +54,6 @@ static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; -extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); -typedef void (*phys_reset_t)(unsigned long); - -/* - * A temporary stack to use for CPU reset. This is static so that we - * don't clobber it with the identity mapping. When running with this - * stack, any references to the current task *will not work* so you - * should really do as little as possible before jumping to your reset - * code. - */ -static u64 soft_restart_stack[16]; - -static void __soft_restart(void *addr) -{ - phys_reset_t phys_reset; - - /* Take out a flat memory mapping. */ - setup_mm_for_reboot(); - - /* Clean and invalidate caches */ - flush_cache_all(); - - /* Turn off caching */ - cpu_proc_fin(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); - - /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); - phys_reset((unsigned long)addr); - - /* Should never get here. */ - BUG(); -} - -void soft_restart(unsigned long addr) -{ - u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); - - /* Disable interrupts first */ - raw_local_irq_disable(); - local_fiq_disable(); - - /* Disable the L2 if we're the last man standing. */ - if (num_online_cpus() == 1) - outer_disable(); - - /* Change to the new stack and continue with the reset. */ - call_with_stack(__soft_restart, (void *)addr, (void *)stack); - - /* Should never get here. */ - BUG(); -} - -/* - * Function pointers to optional machine specific functions - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); - /* * This is our default idle handler. */ @@ -166,79 +98,6 @@ void arch_cpu_idle_dead(void) } #endif -/* - * Called by kexec, immediately prior to machine_kexec(). - * - * This must completely disable all secondary CPUs; simply causing those CPUs - * to execute e.g. a RAM-based pin loop is not sufficient. This allows the - * kexec'd kernel to use any and all RAM as it sees fit, without having to - * avoid any code or data used by any SW CPU pin loop. The CPU hotplug - * functionality embodied in disable_nonboot_cpus() to achieve this. - */ -void machine_shutdown(void) -{ - disable_nonboot_cpus(); -} - -/* - * Halting simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. - */ -void machine_halt(void) -{ - local_irq_disable(); - smp_send_stop(); - - local_irq_disable(); - while (1); -} - -/* - * Power-off simply requires that the secondary CPUs stop performing any - * activity (executing tasks, handling interrupts). smp_send_stop() - * achieves this. When the system power is turned off, it will take all CPUs - * with it. - */ -void machine_power_off(void) -{ - local_irq_disable(); - smp_send_stop(); - - if (pm_power_off) - pm_power_off(); -} - -/* - * Restart requires that the secondary CPUs stop performing any activity - * while the primary CPU resets the system. Systems with a single CPU can - * use soft_restart() as their machine descriptor's .restart hook, since that - * will cause the only available CPU to reset. Systems with multiple CPUs must - * provide a HW restart implementation, to ensure that all CPUs reset at once. - * This is required so that any code running after reset on the primary CPU - * doesn't have to co-ordinate with other CPUs to ensure they aren't still - * executing pre-reset code, and using RAM that the primary CPU's code wishes - * to use. Implementing such co-ordination would be essentially impossible. - */ -void machine_restart(char *cmd) -{ - local_irq_disable(); - smp_send_stop(); - - if (arm_pm_restart) - arm_pm_restart(reboot_mode, cmd); - else - do_kernel_restart(cmd); - - /* Give a grace period for failure to restart of 1s */ - mdelay(1000); - - /* Whoops - the platform was unable to reboot. Tell the user! */ - printk("Reboot failed -- System halted\n"); - local_irq_disable(); - while (1); -} - void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -475,7 +334,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) } /* If possible, provide a placement hint at a random offset from the - * stack for the signal page. + * stack for the sigpage and vdso pages. */ static unsigned long sigpage_addr(const struct mm_struct *mm, unsigned int npages) @@ -519,6 +378,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long npages; unsigned long addr; unsigned long hint; int ret = 0; @@ -528,9 +388,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!signal_page) return -ENOMEM; + npages = 1; /* for sigpage */ + npages += vdso_total_pages; + down_write(&mm->mmap_sem); - hint = sigpage_addr(mm, 1); - addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0); + hint = sigpage_addr(mm, npages); + addr = get_unmapped_area(NULL, hint, npages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -547,6 +410,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm->context.sigpage = addr; + /* Unlike the sigpage, failure to install the vdso is unlikely + * to be fatal to the process, so no error check needed + * here. + */ + arm_install_vdso(mm, addr + PAGE_SIZE); + up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S new file mode 100644 index 000000000000..a78e9e1e206d --- /dev/null +++ b/arch/arm/kernel/psci-call.S @@ -0,0 +1,31 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + * + * Author: Mark Rutland <mark.rutland@arm.com> + */ + +#include <linux/linkage.h> + +#include <asm/opcodes-sec.h> +#include <asm/opcodes-virt.h> + +/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_hvc) + __HVC(0) + bx lr +ENDPROC(__invoke_psci_fn_hvc) + +/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ +ENTRY(__invoke_psci_fn_smc) + __SMC(0) + bx lr +ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index f73891b6b730..f90fdf4ce7c7 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -23,8 +23,6 @@ #include <asm/compiler.h> #include <asm/errno.h> -#include <asm/opcodes-sec.h> -#include <asm/opcodes-virt.h> #include <asm/psci.h> #include <asm/system_misc.h> @@ -33,6 +31,9 @@ struct psci_operations psci_ops; static int (*invoke_psci_fn)(u32, u32, u32, u32); typedef int (*psci_initcall_t)(const struct device_node *); +asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); +asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); + enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, @@ -71,40 +72,6 @@ static u32 psci_power_state_pack(struct psci_power_state state) & PSCI_0_2_POWER_STATE_AFFL_MASK); } -/* - * The following two functions are invoked via the invoke_psci_fn pointer - * and will not be inlined, allowing us to piggyback on the AAPCS. - */ -static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __HVC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - -static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, - u32 arg2) -{ - asm volatile( - __asmeq("%0", "r0") - __asmeq("%1", "r1") - __asmeq("%2", "r2") - __asmeq("%3", "r3") - __SMC(0) - : "+r" (function_id) - : "r" (arg0), "r" (arg1), "r" (arg2)); - - return function_id; -} - static int psci_get_version(void) { int err; diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c new file mode 100644 index 000000000000..1a4d232796be --- /dev/null +++ b/arch/arm/kernel/reboot.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Original Copyright (C) 1995 Linus Torvalds + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/cpu.h> +#include <linux/delay.h> +#include <linux/reboot.h> + +#include <asm/cacheflush.h> +#include <asm/idmap.h> + +#include "reboot.h" + +typedef void (*phys_reset_t)(unsigned long); + +/* + * Function pointers to optional machine specific functions + */ +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +/* + * A temporary stack to use for CPU reset. This is static so that we + * don't clobber it with the identity mapping. When running with this + * stack, any references to the current task *will not work* so you + * should really do as little as possible before jumping to your reset + * code. + */ +static u64 soft_restart_stack[16]; + +static void __soft_restart(void *addr) +{ + phys_reset_t phys_reset; + + /* Take out a flat memory mapping. */ + setup_mm_for_reboot(); + + /* Clean and invalidate caches */ + flush_cache_all(); + + /* Turn off caching */ + cpu_proc_fin(); + + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); + + /* Switch to the identity mapping. */ + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset((unsigned long)addr); + + /* Should never get here. */ + BUG(); +} + +void _soft_restart(unsigned long addr, bool disable_l2) +{ + u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); + + /* Disable interrupts first */ + raw_local_irq_disable(); + local_fiq_disable(); + + /* Disable the L2 if we're the last man standing. */ + if (disable_l2) + outer_disable(); + + /* Change to the new stack and continue with the reset. */ + call_with_stack(__soft_restart, (void *)addr, (void *)stack); + + /* Should never get here. */ + BUG(); +} + +void soft_restart(unsigned long addr) +{ + _soft_restart(addr, num_online_cpus() == 1); +} + +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ +void machine_shutdown(void) +{ + disable_nonboot_cpus(); +} + +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ +void machine_halt(void) +{ + local_irq_disable(); + smp_send_stop(); + + local_irq_disable(); + while (1); +} + +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ +void machine_power_off(void) +{ + local_irq_disable(); + smp_send_stop(); + + if (pm_power_off) + pm_power_off(); +} + +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ +void machine_restart(char *cmd) +{ + local_irq_disable(); + smp_send_stop(); + + if (arm_pm_restart) + arm_pm_restart(reboot_mode, cmd); + else + do_kernel_restart(cmd); + + /* Give a grace period for failure to restart of 1s */ + mdelay(1000); + + /* Whoops - the platform was unable to reboot. Tell the user! */ + printk("Reboot failed -- System halted\n"); + local_irq_disable(); + while (1); +} diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h new file mode 100644 index 000000000000..bf7a0b1f076e --- /dev/null +++ b/arch/arm/kernel/reboot.h @@ -0,0 +1,7 @@ +#ifndef REBOOT_H +#define REBOOT_H + +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +extern void _soft_restart(unsigned long addr, bool disable_l2); + +#endif diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 24b4a04846eb..36ed35073289 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -56,8 +56,6 @@ void *return_address(unsigned int level) return NULL; } -#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ +#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e55408e96559..6c777e908a24 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -246,12 +246,9 @@ static int __get_cpu_architecture(void) if (cpu_arch) cpu_arch += CPU_ARCH_ARMv3; } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) { - unsigned int mmfr0; - /* Revised CPUID format. Read the Memory Model Feature * Register 0 and check for VMSAv7 or PMSAv7 */ - asm("mrc p15, 0, %0, c0, c1, 4" - : "=r" (mmfr0)); + unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0); if ((mmfr0 & 0x0000000f) >= 0x00000003 || (mmfr0 & 0x000000f0) >= 0x00000030) cpu_arch = CPU_ARCH_ARMv7; @@ -375,30 +372,48 @@ void __init early_print(const char *str, ...) static void __init cpuid_init_hwcaps(void) { - unsigned int divide_instrs, vmsa; + int block; + u32 isar5; if (cpu_architecture() < CPU_ARCH_ARMv7) return; - divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; - - switch (divide_instrs) { - case 2: + block = cpuid_feature_extract(CPUID_EXT_ISAR0, 24); + if (block >= 2) elf_hwcap |= HWCAP_IDIVA; - case 1: + if (block >= 1) elf_hwcap |= HWCAP_IDIVT; - } /* LPAE implies atomic ldrd/strd instructions */ - vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; - if (vmsa >= 5) + block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0); + if (block >= 5) elf_hwcap |= HWCAP_LPAE; + + /* check for supported v8 Crypto instructions */ + isar5 = read_cpuid_ext(CPUID_EXT_ISAR5); + + block = cpuid_feature_extract_field(isar5, 4); + if (block >= 2) + elf_hwcap2 |= HWCAP2_PMULL; + if (block >= 1) + elf_hwcap2 |= HWCAP2_AES; + + block = cpuid_feature_extract_field(isar5, 8); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA1; + + block = cpuid_feature_extract_field(isar5, 12); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SHA2; + + block = cpuid_feature_extract_field(isar5, 16); + if (block >= 1) + elf_hwcap2 |= HWCAP2_CRC32; } static void __init elf_hwcap_fixup(void) { unsigned id = read_cpuid_id(); - unsigned sync_prim; /* * HWCAP_TLS is available only on 1136 r1p0 and later, @@ -419,9 +434,9 @@ static void __init elf_hwcap_fixup(void) * avoid advertising SWP; it may not be atomic with * multiprocessing cores. */ - sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) | - ((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f); - if (sync_prim >= 0x13) + if (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) > 1 || + (cpuid_feature_extract(CPUID_EXT_ISAR3, 12) == 1 && + cpuid_feature_extract(CPUID_EXT_ISAR3, 20) >= 3)) elf_hwcap &= ~HWCAP_SWP; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 023ac905e4c3..423663e23791 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -318,17 +318,6 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize) return frame; } -/* - * translate the signal - */ -static inline int map_sig(int sig) -{ - struct thread_info *thread = current_thread_info(); - if (sig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) - sig = thread->exec_domain->signal_invmap[sig]; - return sig; -} - static int setup_return(struct pt_regs *regs, struct ksignal *ksig, unsigned long __user *rc, void __user *frame) @@ -412,7 +401,7 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, } } - regs->ARM_r0 = map_sig(ksig->sig); + regs->ARM_r0 = ksig->sig; regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = handler; diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index e1e60e5a7a27..7d37bfc50830 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -116,14 +116,7 @@ cpu_resume_after_mmu: ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_resume_after_mmu) -/* - * Note: Yes, part of the following code is located into the .data section. - * This is to allow sleep_save_sp to be accessed with a relative load - * while we can't rely on any MMU translation. We could have put - * sleep_save_sp in the .text section as well, but some setups might - * insist on it to be truly read-only. - */ - .data + .text .align ENTRY(cpu_resume) ARM_BE8(setend be) @ ensure we are in BE mode @@ -145,6 +138,8 @@ ARM_BE8(setend be) @ ensure we are in BE mode compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: adr r0, _sleep_save_sp + ldr r2, [r0] + add r0, r0, r2 ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] ldr r0, [r0, r1, lsl #2] @@ -156,10 +151,12 @@ THUMB( bx r3 ) ENDPROC(cpu_resume) .align 2 +_sleep_save_sp: + .long sleep_save_sp - . mpidr_hash_ptr: .long mpidr_hash - . @ mpidr_hash struct offset + .data .type sleep_save_sp, #object ENTRY(sleep_save_sp) -_sleep_save_sp: .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 86ef244c5a24..cca5b8758185 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -145,6 +145,11 @@ void __init smp_init_cpus(void) smp_ops.smp_init_cpus(); } +int platform_can_secondary_boot(void) +{ + return !!smp_ops.smp_boot_secondary; +} + int platform_can_cpu_hotplug(void) { #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index afdd51e30bec..1361756782c7 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -42,7 +42,7 @@ " cmp %0, #0\n" \ " movne %0, %4\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ + " .section .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, %5\n" \ " b 2b\n" \ diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 0cc7e58c47cc..a66e37e211a9 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -76,7 +76,7 @@ void timer_tick(void) } #endif -static void dummy_clock_access(struct timespec *ts) +static void dummy_clock_access(struct timespec64 *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; @@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts) static clock_access_fn __read_persistent_clock = dummy_clock_access; static clock_access_fn __read_boot_clock = dummy_clock_access;; -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock(struct timespec *ts) +void read_boot_clock64(struct timespec64 *ts) { __read_boot_clock(ts); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 788e23fe64d8..3dce1a342030 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -505,12 +505,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason) static int bad_syscall(int n, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); siginfo_t info; - if ((current->personality & PER_MASK) != PER_LINUX && - thread->exec_domain->handler) { - thread->exec_domain->handler(n, regs); + if ((current->personality & PER_MASK) != PER_LINUX) { + send_sig(SIGSEGV, current, 1); return regs->ARM_r0; } diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c new file mode 100644 index 000000000000..efe17dd9b921 --- /dev/null +++ b/arch/arm/kernel/vdso.c @@ -0,0 +1,337 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/timekeeper_internal.h> +#include <linux/vmalloc.h> +#include <asm/arch_timer.h> +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/page.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> +#include <clocksource/arm_arch_timer.h> + +#define MAX_SYMNAME 64 + +static struct page **vdso_text_pagelist; + +/* Total number of pages needed for the data and text portions of the VDSO. */ +unsigned int vdso_total_pages __read_mostly; + +/* + * The VDSO data page. + */ +static union vdso_data_store vdso_data_store __page_aligned_data; +static struct vdso_data *vdso_data = &vdso_data_store.data; + +static struct page *vdso_data_page; +static struct vm_special_mapping vdso_data_mapping = { + .name = "[vvar]", + .pages = &vdso_data_page, +}; + +static struct vm_special_mapping vdso_text_mapping = { + .name = "[vdso]", +}; + +struct elfinfo { + Elf32_Ehdr *hdr; /* ptr to ELF */ + Elf32_Sym *dynsym; /* ptr to .dynsym section */ + unsigned long dynsymsize; /* size of .dynsym section */ + char *dynstr; /* ptr to .dynstr section */ +}; + +/* Cached result of boot-time check for whether the arch timer exists, + * and if so, whether the virtual counter is useable. + */ +static bool cntvct_ok __read_mostly; + +static bool __init cntvct_functional(void) +{ + struct device_node *np; + bool ret = false; + + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + goto out; + + /* The arm_arch_timer core should export + * arch_timer_use_virtual or similar so we don't have to do + * this. + */ + np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + goto out_put; + + if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured")) + goto out_put; + + ret = true; + +out_put: + of_node_put(np); +out: + return ret; +} + +static void * __init find_section(Elf32_Ehdr *ehdr, const char *name, + unsigned long *size) +{ + Elf32_Shdr *sechdrs; + unsigned int i; + char *secnames; + + /* Grab section headers and strings so we can tell who is who */ + sechdrs = (void *)ehdr + ehdr->e_shoff; + secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; + + /* Find the section they want */ + for (i = 1; i < ehdr->e_shnum; i++) { + if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) { + if (size) + *size = sechdrs[i].sh_size; + return (void *)ehdr + sechdrs[i].sh_offset; + } + } + + if (size) + *size = 0; + return NULL; +} + +static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname) +{ + unsigned int i; + + for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { + char name[MAX_SYMNAME], *c; + + if (lib->dynsym[i].st_name == 0) + continue; + strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + MAX_SYMNAME); + c = strchr(name, '@'); + if (c) + *c = 0; + if (strcmp(symname, name) == 0) + return &lib->dynsym[i]; + } + return NULL; +} + +static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname) +{ + Elf32_Sym *sym; + + sym = find_symbol(lib, symname); + if (!sym) + return; + + sym->st_name = 0; +} + +static void __init patch_vdso(void *ehdr) +{ + struct elfinfo einfo; + + einfo = (struct elfinfo) { + .hdr = ehdr, + }; + + einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize); + einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL); + + /* If the virtual counter is absent or non-functional we don't + * want programs to incur the slight additional overhead of + * dispatching through the VDSO only to fall back to syscalls. + */ + if (!cntvct_ok) { + vdso_nullpatch_one(&einfo, "__vdso_gettimeofday"); + vdso_nullpatch_one(&einfo, "__vdso_clock_gettime"); + } +} + +static int __init vdso_init(void) +{ + unsigned int text_pages; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("VDSO is not a valid ELF object!\n"); + return -ENOEXEC; + } + + text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + + /* Allocate the VDSO text pagelist */ + vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), + GFP_KERNEL); + if (vdso_text_pagelist == NULL) + return -ENOMEM; + + /* Grab the VDSO data page. */ + vdso_data_page = virt_to_page(vdso_data); + + /* Grab the VDSO text pages. */ + for (i = 0; i < text_pages; i++) { + struct page *page; + + page = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_text_pagelist[i] = page; + } + + vdso_text_mapping.pages = vdso_text_pagelist; + + vdso_total_pages = 1; /* for the data/vvar page */ + vdso_total_pages += text_pages; + + cntvct_ok = cntvct_functional(); + + patch_vdso(&vdso_start); + + return 0; +} +arch_initcall(vdso_init); + +static int install_vvar(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + + vma = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_MAYREAD, + &vdso_data_mapping); + + return IS_ERR(vma) ? PTR_ERR(vma) : 0; +} + +/* assumes mmap_sem is write-locked */ +void arm_install_vdso(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + unsigned long len; + + mm->context.vdso = 0; + + if (vdso_text_pagelist == NULL) + return; + + if (install_vvar(mm, addr)) + return; + + /* Account for vvar page. */ + addr += PAGE_SIZE; + len = (vdso_total_pages - 1) << PAGE_SHIFT; + + vma = _install_special_mapping(mm, addr, len, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &vdso_text_mapping); + + if (!IS_ERR(vma)) + mm->context.vdso = addr; +} + +static void vdso_write_begin(struct vdso_data *vdata) +{ + ++vdso_data->seq_count; + smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */ +} + +static void vdso_write_end(struct vdso_data *vdata) +{ + smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */ + ++vdso_data->seq_count; +} + +static bool tk_is_cntvct(const struct timekeeper *tk) +{ + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + return false; + + if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0) + return false; + + return true; +} + +/** + * update_vsyscall - update the vdso data page + * + * Increment the sequence counter, making it odd, indicating to + * userspace that an update is in progress. Update the fields used + * for coarse clocks and, if the architected system timer is in use, + * the fields used for high precision clocks. Increment the sequence + * counter again, making it even, indicating to userspace that the + * update is finished. + * + * Userspace is expected to sample seq_count before reading any other + * fields from the data page. If seq_count is odd, userspace is + * expected to wait until it becomes even. After copying data from + * the page, userspace must sample seq_count again; if it has changed + * from its previous value, userspace must retry the whole sequence. + * + * Calls to update_vsyscall are serialized by the timekeeping core. + */ +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec xtime_coarse; + struct timespec64 *wtm = &tk->wall_to_monotonic; + + if (!cntvct_ok) { + /* The entry points have been zeroed, so there is no + * point in updating the data page. + */ + return; + } + + vdso_write_begin(vdso_data); + + xtime_coarse = __current_kernel_time(); + vdso_data->tk_is_cntvct = tk_is_cntvct(tk); + vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; + vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = wtm->tv_sec; + vdso_data->wtm_clock_nsec = wtm->tv_nsec; + + if (vdso_data->tk_is_cntvct) { + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->xtime_clock_sec = tk->xtime_sec; + vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec; + vdso_data->cs_mult = tk->tkr_mono.mult; + vdso_data->cs_shift = tk->tkr_mono.shift; + vdso_data->cs_mask = tk->tkr_mono.mask; + } + + vdso_write_end(vdso_data); + + flush_dcache_page(virt_to_page(vdso_data)); +} + +void update_vsyscall_tz(void) +{ + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + flush_dcache_page(virt_to_page(vdso_data)); +} diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index f2db429ea75d..8b60fde5ce48 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS ARM_EXIT_DISCARD(EXIT_DATA) EXIT_CALL #ifndef CONFIG_MMU - *(.fixup) + *(.text.fixup) *(__ex_table) #endif #ifndef CONFIG_SMP_ON_UP @@ -100,6 +100,7 @@ SECTIONS .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ + IDMAP_TEXT __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -108,10 +109,6 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT - IDMAP_TEXT -#ifdef CONFIG_MMU - *(.fixup) -#endif *(.gnu.warning) *(.glue_7) *(.glue_7t) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 338ace78ed18..f1f79d104309 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -26,10 +27,12 @@ config KVM select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU - depends on ARM_VIRT_EXT && ARM_LPAE + select MMU_NOTIFIER + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD + depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support hosting virtualized guest machines. This module provides access to the hardware capabilities through a character device node named /dev/kvm. @@ -37,10 +40,7 @@ config KVM If unsure, say N. config KVM_ARM_HOST - bool "KVM host support for ARM cpus." - depends on KVM - depends on MMU - select MMU_NOTIFIER + bool ---help--- Provides host support for ARM processors. @@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool "KVM support for Virtual GIC" - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool "KVM support for Architected Timers" - depends on KVM_ARM_VGIC && ARM_ARCH_TIMER - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for the Architected Timers in virtual machines - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 443b8bea43e9..139e46c08b6e 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. @@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +obj-y += $(KVM)/arm/vgic.o +obj-y += $(KVM)/arm/vgic-v2.o +obj-y += $(KVM)/arm/vgic-v2-emul.o +obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5560f74f9eee..6f536451ab78 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); -static bool vgic_present; - static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: - r = vgic_present; - break; + case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: @@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + return kvm_timer_should_fire(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + if (vcpu->arch.pause) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.pause = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu->arch.pause = true; + break; + default: + return -EINVAL; + } + + return 0; } /** @@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + static void vcpu_pause(struct kvm_vcpu *vcpu) { wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); @@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: - if (!vgic_present) - return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { - if (vgic_present) - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); - else - return -ENXIO; + return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { struct kvm_arm_device_addr dev_addr; @@ -1035,10 +1050,6 @@ static int init_hyp_mode(void) if (err) goto out_free_context; -#ifdef CONFIG_KVM_ARM_VGIC - vgic_present = true; -#endif - /* * Init HYP architected timer support */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 384bab67c462..d503fbb787d3 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } -#ifndef CONFIG_KVM_ARM_TIMER - -#define NUM_TIMER_REGS 0 - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - return 0; -} - -static bool is_timer_reg(u64 index) -{ - return false; -} - -#else - #define NUM_TIMER_REGS 3 static bool is_timer_reg(u64 index) @@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return 0; } -#endif - static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(long)reg->addr; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 14d488388480..35e4a3a0c476 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm /* @@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 ) * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm #define CNTHCTL_PL1PCTEN (1 << 0) @@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 ) * Clobbers r2-r5 */ .macro save_timer_state -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: -#endif @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) @@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 ) bic r2, r2, #CNTHCTL_PL1PCEN mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 ) and r2, r2, #3 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL 1: -#endif .endm .equ vmentry, 0 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0eb3f0..974b1c606d04 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) +static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) { unsigned long rt; - int len; - bool is_write, sign_extend; + int access_size; + bool sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ @@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - len = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(len < 0)) - return len; + access_size = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(access_size < 0)) + return access_size; - is_write = kvm_vcpu_dabt_iswrite(vcpu); + *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio->is_write = is_write; - mmio->phys_addr = fault_ipa; - mmio->len = len; + *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; @@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; + bool is_write; + int len; + u8 data_buf[8]; /* - * Prepare MMIO operation. First stash it in a private - * structure that we can use for in-kernel emulation. If the - * kernel can't handle it, copy it into run->mmio and let user - * space do its magic. + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, fault_ipa, &mmio); + ret = decode_hsr(vcpu, &is_write, &len); if (ret) return ret; } else { @@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, rt = vcpu->arch.mmio_decode.rt; - if (mmio.is_write) { - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), - mmio.len); + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + mmio_write_buf(data_buf, len, data); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, - fault_ipa, data); - mmio_write_buf(mmio.data, mmio.len, data); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } else { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, fault_ipa, 0); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } - if (vgic_handle_mmio(vcpu, run, &mmio)) + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + memcpy(run->mmio.data, data_buf, len); + + if (!ret) { + /* We handled the access successfully in the kernel. */ + kvm_handle_mmio_return(vcpu, run); return 1; + } - kvm_prepare_mmio(run, &mmio); + run->exit_reason = KVM_EXIT_MMIO; return 0; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 69c2b4ce6160..1d5accbd3dcf 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, phys_addr_t addr = start, end = start + size; phys_addr_t next; - pgd = pgdp + pgd_index(addr); + pgd = pgdp + kvm_pgd_index(addr); do { next = kvm_pgd_addr_end(addr, end); if (!pgd_none(*pgd)) @@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); do { next = kvm_pgd_addr_end(addr, end); stage2_flush_puds(kvm, pgd, addr, next); @@ -634,6 +634,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } +/* Free the HW pgd, one page at a time */ +static void kvm_free_hwpgd(void *hwpgd) +{ + free_pages_exact(hwpgd, kvm_get_hwpgd_size()); +} + +/* Allocate the HW PGD, making sure that each page gets its own refcount */ +static void *kvm_alloc_hwpgd(void) +{ + unsigned int size = kvm_get_hwpgd_size(); + + return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); +} + /** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. @@ -647,15 +661,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { - int ret; pgd_t *pgd; + void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } + hwpgd = kvm_alloc_hwpgd(); + if (!hwpgd) + return -ENOMEM; + + /* When the kernel uses more levels of page tables than the + * guest, we allocate a fake PGD and pre-populate it to point + * to the next-level page table, which will be the real + * initial page table pointed to by the VTTBR. + * + * When KVM_PREALLOC_LEVEL==2, we allocate a single page for + * the PMD and the kernel will use folded pud. + * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD + * pages. + */ if (KVM_PREALLOC_LEVEL > 0) { + int i; + /* * Allocate fake pgd for the page table manipulation macros to * work. This is not used by the hardware and we have no @@ -663,30 +693,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) */ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO); + + if (!pgd) { + kvm_free_hwpgd(hwpgd); + return -ENOMEM; + } + + /* Plug the HW PGD into the fake one. */ + for (i = 0; i < PTRS_PER_S2_PGD; i++) { + if (KVM_PREALLOC_LEVEL == 1) + pgd_populate(NULL, pgd + i, + (pud_t *)hwpgd + i * PTRS_PER_PUD); + else if (KVM_PREALLOC_LEVEL == 2) + pud_populate(NULL, pud_offset(pgd, 0) + i, + (pmd_t *)hwpgd + i * PTRS_PER_PMD); + } } else { /* * Allocate actual first-level Stage-2 page table used by the * hardware for Stage-2 page table walks. */ - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); + pgd = (pgd_t *)hwpgd; } - if (!pgd) - return -ENOMEM; - - ret = kvm_prealloc_hwpgd(kvm, pgd); - if (ret) - goto out_err; - kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; -out_err: - if (KVM_PREALLOC_LEVEL > 0) - kfree(pgd); - else - free_pages((unsigned long)pgd, S2_PGD_ORDER); - return ret; } /** @@ -787,11 +819,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - kvm_free_hwpgd(kvm); + kvm_free_hwpgd(kvm_get_hwpgd(kvm)); if (KVM_PREALLOC_LEVEL > 0) kfree(kvm->arch.pgd); - else - free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); + kvm->arch.pgd = NULL; } @@ -801,7 +832,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); if (WARN_ON(pgd_none(*pgd))) { if (!cache) return NULL; @@ -1091,7 +1122,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1301,10 +1332,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, out_unlock: spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret; } +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pmd_t *pmd; + pte_t *pte; + pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + goto out; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + goto out; + } + + pte = pte_offset_kernel(pmd, fault_ipa); + if (pte_none(*pte)) /* Nothing there either */ + goto out; + + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1335,7 +1407,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault_type(vcpu); - if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1371,6 +1444,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; @@ -1379,15 +1458,16 @@ out_unlock: return ret; } -static void handle_hva_to_gpa(struct kvm *kvm, - unsigned long start, - unsigned long end, - void (*handler)(struct kvm *kvm, - gpa_t gpa, void *data), - void *data) +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, void *data), + void *data) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + int ret = 0; slots = kvm_memslots(kvm); @@ -1411,14 +1491,17 @@ static void handle_hva_to_gpa(struct kvm *kvm, for (; gfn < gfn_end; ++gfn) { gpa_t gpa = gfn << PAGE_SHIFT; - handler(kvm, gpa, data); + ret |= handler(kvm, gpa, data); } } + + return ret; } -static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); + return 0; } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -1444,7 +1527,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; } -static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; @@ -1456,6 +1539,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) * through this calling path. */ stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; } @@ -1472,6 +1556,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_young(*pmd)) { + *pmd = pmd_mkold(*pmd); + return 1; + } + + return 0; + } + + pte = pte_offset_kernel(pmd, gpa); + if (pte_none(*pte)) + return 0; + + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); /* Just a page... */ + return 1; + } + + return 0; +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + return pmd_young(*pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (!pte_none(*pte)) /* Just a page... */ + return pte_young(*pte); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 6817664b46b8..0ec35392d208 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault, __entry->hxfar, __entry->vcpu_pc) ); +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + TRACE_EVENT(kvm_irq_line, TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), TP_ARGS(type, vcpu_idx, irq_num, level), @@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva, TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) ); +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + TRACE_EVENT(kvm_hvc, TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), TP_ARGS(vcpu_pc, r0, imm), diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 14a0d988c82c..1710fd7db2d5 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -47,7 +47,7 @@ USER( strnebt r2, [r0]) ENDPROC(__clear_user) ENDPROC(__clear_user_std) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 9001: ldmfd sp!, {r0, pc} .popsection diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index a9d3db16ecb5..9648b0675a3e 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -100,7 +100,7 @@ WEAK(__copy_to_user) ENDPROC(__copy_to_user) ENDPROC(__copy_to_user_std) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 7d08b43d2c0e..1d0957e61f89 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -68,7 +68,7 @@ * so properly, we would have to add in whatever registers were loaded before * the fault, which, with the current asm above is not predictable. */ - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 4 9001: mov r4, #-EFAULT ldr r5, [sp, #8*4] @ *err_ptr diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 312d43eb686a..8044591dca72 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -83,6 +83,12 @@ void __init register_current_timer_delay(const struct delay_timer *timer) NSEC_PER_SEC, 3600); res = cyc_to_ns(1ULL, new_mult, new_shift); + if (res > 1000) { + pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n", + timer, res); + return; + } + if (!delay_calibrated && (!delay_res || (res < delay_res))) { pr_info("Switching to timer-based delay loop, resolution %lluns\n", res); delay_timer = timer; diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c index e365c1bb1265..306ebc51599a 100644 --- a/arch/arm/mach-davinci/cpuidle.c +++ b/arch/arm/mach-davinci/cpuidle.c @@ -17,7 +17,6 @@ #include <linux/cpuidle.h> #include <linux/io.h> #include <linux/export.h> -#include <asm/proc-fns.h> #include <asm/cpuidle.h> #include <mach/cpuidle.h> diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c index 8a275f297522..91fe97144570 100644 --- a/arch/arm/mach-dove/pcie.c +++ b/arch/arm/mach-dove/pcie.c @@ -155,17 +155,13 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL, PCI_ANY_ID, rc_pci_fixup); static struct pci_bus __init * dove_pcie_scan_bus(int nr, struct pci_sys_data *sys) { - struct pci_bus *bus; - - if (nr < num_pcie_ports) { - bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, - &sys->resources); - } else { - bus = NULL; + if (nr >= num_pcie_ports) { BUG(); + return NULL; } - return bus; + return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, + &sys->resources); } static int __init dove_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 9e9dfdfad9d7..f44c2e05c82e 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -166,16 +166,14 @@ static void __init exynos_init_io(void) exynos_map_io(); } +/* + * Apparently, these SoCs are not able to wake-up from suspend using + * the PMU. Too bad. Should they suddenly become capable of such a + * feat, the matches below should be moved to suspend.c. + */ static const struct of_device_id exynos_dt_pmu_match[] = { - { .compatible = "samsung,exynos3250-pmu" }, - { .compatible = "samsung,exynos4210-pmu" }, - { .compatible = "samsung,exynos4212-pmu" }, - { .compatible = "samsung,exynos4412-pmu" }, - { .compatible = "samsung,exynos4415-pmu" }, - { .compatible = "samsung,exynos5250-pmu" }, { .compatible = "samsung,exynos5260-pmu" }, { .compatible = "samsung,exynos5410-pmu" }, - { .compatible = "samsung,exynos5420-pmu" }, { /*sentinel*/ }, }; @@ -186,9 +184,6 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); - - if (!pmu_base_addr) - panic("failed to find exynos pmu register\n"); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S index 31d25834b9c4..cf950790fbdc 100644 --- a/arch/arm/mach-exynos/sleep.S +++ b/arch/arm/mach-exynos/sleep.S @@ -23,14 +23,7 @@ #define CPU_MASK 0xff0ffff0 #define CPU_CORTEX_A9 0x410fc090 - /* - * The following code is located into the .data section. This is to - * allow l2x0_regs_phys to be accessed with a relative load while we - * can't rely on any MMU translation. We could have put l2x0_regs_phys - * in the .text section as well, but some setups might insist on it to - * be truly read-only. (Reference from: arch/arm/kernel/sleep.S) - */ - .data + .text .align /* @@ -69,10 +62,12 @@ ENTRY(exynos_cpu_resume_ns) cmp r0, r1 bne skip_cp15 - adr r0, cp15_save_power + adr r0, _cp15_save_power ldr r1, [r0] - adr r0, cp15_save_diag + ldr r1, [r0, r1] + adr r0, _cp15_save_diag ldr r2, [r0] + ldr r2, [r0, r2] mov r0, #SMC_CMD_C15RESUME dsb smc #0 @@ -118,14 +113,20 @@ skip_l2x0: skip_cp15: b cpu_resume ENDPROC(exynos_cpu_resume_ns) + + .align +_cp15_save_power: + .long cp15_save_power - . +_cp15_save_diag: + .long cp15_save_diag - . +#ifdef CONFIG_CACHE_L2X0 +1: .long l2x0_saved_regs - . +#endif /* CONFIG_CACHE_L2X0 */ + + .data .globl cp15_save_diag cp15_save_diag: .long 0 @ cp15 diagnostic .globl cp15_save_power cp15_save_power: .long 0 @ cp15 power control - -#ifdef CONFIG_CACHE_L2X0 - .align -1: .long l2x0_saved_regs - . -#endif /* CONFIG_CACHE_L2X0 */ diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 318d127df147..2146d918aedd 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -18,7 +18,9 @@ #include <linux/syscore_ops.h> #include <linux/cpu_pm.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/of_address.h> #include <linux/err.h> #include <linux/regulator/machine.h> @@ -43,8 +45,8 @@ #define EXYNOS5420_CPU_STATE 0x28 /** - * struct exynos_wkup_irq - Exynos GIC to PMU IRQ mapping - * @hwirq: Hardware IRQ signal of the GIC + * struct exynos_wkup_irq - PMU IRQ to mask mapping + * @hwirq: Hardware IRQ signal of the PMU * @mask: Mask in PMU wake-up mask register */ struct exynos_wkup_irq { @@ -93,14 +95,14 @@ static const struct exynos_wkup_irq exynos3250_wkup_irq[] = { }; static const struct exynos_wkup_irq exynos4_wkup_irq[] = { - { 76, BIT(1) }, /* RTC alarm */ - { 77, BIT(2) }, /* RTC tick */ + { 44, BIT(1) }, /* RTC alarm */ + { 45, BIT(2) }, /* RTC tick */ { /* sentinel */ }, }; static const struct exynos_wkup_irq exynos5250_wkup_irq[] = { - { 75, BIT(1) }, /* RTC alarm */ - { 76, BIT(2) }, /* RTC tick */ + { 43, BIT(1) }, /* RTC alarm */ + { 44, BIT(2) }, /* RTC tick */ { /* sentinel */ }, }; @@ -167,6 +169,113 @@ static int exynos_irq_set_wake(struct irq_data *data, unsigned int state) return -ENOENT; } +static struct irq_chip exynos_pmu_chip = { + .name = "PMU", + .irq_eoi = irq_chip_eoi_parent, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_wake = exynos_irq_set_wake, +#ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, +#endif +}; + +static int exynos_pmu_domain_xlate(struct irq_domain *domain, + struct device_node *controller, + const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (domain->of_node != controller) + return -EINVAL; /* Shouldn't happen, really... */ + if (intsize != 3) + return -EINVAL; /* Not GIC compliant */ + if (intspec[0] != 0) + return -EINVAL; /* No PPI should point to this domain */ + + *out_hwirq = intspec[1]; + *out_type = intspec[2]; + return 0; +} + +static int exynos_pmu_domain_alloc(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs, void *data) +{ + struct of_phandle_args *args = data; + struct of_phandle_args parent_args; + irq_hw_number_t hwirq; + int i; + + if (args->args_count != 3) + return -EINVAL; /* Not GIC compliant */ + if (args->args[0] != 0) + return -EINVAL; /* No PPI should point to this domain */ + + hwirq = args->args[1]; + + for (i = 0; i < nr_irqs; i++) + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &exynos_pmu_chip, NULL); + + parent_args = *args; + parent_args.np = domain->parent->of_node; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args); +} + +static struct irq_domain_ops exynos_pmu_domain_ops = { + .xlate = exynos_pmu_domain_xlate, + .alloc = exynos_pmu_domain_alloc, + .free = irq_domain_free_irqs_common, +}; + +static int __init exynos_pmu_irq_init(struct device_node *node, + struct device_node *parent) +{ + struct irq_domain *parent_domain, *domain; + + if (!parent) { + pr_err("%s: no parent, giving up\n", node->full_name); + return -ENODEV; + } + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("%s: unable to obtain parent domain\n", node->full_name); + return -ENXIO; + } + + pmu_base_addr = of_iomap(node, 0); + + if (!pmu_base_addr) { + pr_err("%s: failed to find exynos pmu register\n", + node->full_name); + return -ENOMEM; + } + + domain = irq_domain_add_hierarchy(parent_domain, 0, 0, + node, &exynos_pmu_domain_ops, + NULL); + if (!domain) { + iounmap(pmu_base_addr); + return -ENOMEM; + } + + return 0; +} + +#define EXYNOS_PMU_IRQ(symbol, name) OF_DECLARE_2(irqchip, symbol, name, exynos_pmu_irq_init) + +EXYNOS_PMU_IRQ(exynos3250_pmu_irq, "samsung,exynos3250-pmu"); +EXYNOS_PMU_IRQ(exynos4210_pmu_irq, "samsung,exynos4210-pmu"); +EXYNOS_PMU_IRQ(exynos4212_pmu_irq, "samsung,exynos4212-pmu"); +EXYNOS_PMU_IRQ(exynos4412_pmu_irq, "samsung,exynos4412-pmu"); +EXYNOS_PMU_IRQ(exynos4415_pmu_irq, "samsung,exynos4415-pmu"); +EXYNOS_PMU_IRQ(exynos5250_pmu_irq, "samsung,exynos5250-pmu"); +EXYNOS_PMU_IRQ(exynos5420_pmu_irq, "samsung,exynos5420-pmu"); + static int exynos_cpu_do_idle(void) { /* issue the standby signal into the pm unit. */ @@ -615,17 +724,19 @@ static struct syscore_ops exynos_pm_syscore_ops; void __init exynos_pm_init(void) { const struct of_device_id *match; + struct device_node *np; u32 tmp; - of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match); - if (!match) { + np = of_find_matching_node_and_match(NULL, exynos_pmu_of_device_ids, &match); + if (!np) { pr_err("Failed to find PMU node\n"); return; } - pm_data = (struct exynos_pm_data *) match->data; - /* Platform-specific GIC callback */ - gic_arch_extn.irq_set_wake = exynos_irq_set_wake; + if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) + pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); + + pm_data = (struct exynos_pm_data *) match->data; /* All wakeup disable */ tmp = pmu_raw_readl(S5P_WAKEUP_MASK); diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index e8627e04e1e6..c8dffcee9736 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -631,6 +631,7 @@ config SOC_IMX6SX config SOC_VF610 bool "Vybrid Family VF610 support" + select IRQ_DOMAIN_HIERARCHY select ARM_GIC select PINCTRL_VF610 select PL310_ERRATA_769419 if CACHE_L2X0 diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index d76d08623f9f..8e21ccc1eda2 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -9,7 +9,6 @@ #include <linux/cpuidle.h> #include <linux/module.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include "common.h" #include "cpuidle.h" diff --git a/arch/arm/mach-imx/cpuidle-imx6sl.c b/arch/arm/mach-imx/cpuidle-imx6sl.c index 7d92e6584551..5742a9fd1ef2 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sl.c +++ b/arch/arm/mach-imx/cpuidle-imx6sl.c @@ -9,7 +9,6 @@ #include <linux/cpuidle.h> #include <linux/module.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include "common.h" #include "cpuidle.h" diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index 5a36722b089d..2c9f1a8bf245 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -10,7 +10,6 @@ #include <linux/cpu_pm.h> #include <linux/module.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include <asm/suspend.h> #include "common.h" diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c index 445e553f4a28..097ea4cb1136 100644 --- a/arch/arm/mach-mv78xx0/pcie.c +++ b/arch/arm/mach-mv78xx0/pcie.c @@ -197,17 +197,13 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL, PCI_ANY_ID, rc_pci_fixup); static struct pci_bus __init * mv78xx0_pcie_scan_bus(int nr, struct pci_sys_data *sys) { - struct pci_bus *bus; - - if (nr < num_pcie_ports) { - bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, - &sys->resources); - } else { - bus = NULL; + if (nr >= num_pcie_ports) { BUG(); + return NULL; } - return bus; + return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, + &sys->resources); } static int __init mv78xx0_pcie_map_irq(const struct pci_dev *dev, u8 slot, diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 01e398a868bc..4b8e9f4d59ea 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -14,10 +14,9 @@ #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/export.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include "common.h" #include "pm.h" @@ -84,7 +83,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, { struct idle_statedata *cx = state_ptr + index; u32 mpuss_can_lose_context = 0; - int cpu_id = smp_processor_id(); /* * CPU0 has to wait and stay ON until CPU1 is OFF state. @@ -112,7 +110,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id); + tick_broadcast_enter(); /* * Call idle CPU PM enter notifier chain so that @@ -169,7 +167,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0 && mpuss_can_lose_context) cpu_cluster_pm_exit(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id); + tick_broadcast_exit(); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); @@ -184,8 +182,7 @@ fail: */ static void omap_setup_broadcast_timer(void *arg) { - int cpu = smp_processor_id(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); + tick_broadcast_enable(); } static struct cpuidle_driver omap4_idle_driver = { diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c index dc6e79c4484a..9a8611ab5dfa 100644 --- a/arch/arm/mach-omap2/hsmmc.c +++ b/arch/arm/mach-omap2/hsmmc.c @@ -150,9 +150,13 @@ static int nop_mmc_set_power(struct device *dev, int power_on, int vdd) static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data *mmc_controller, int controller_nr) { - if (gpio_is_valid(mmc_controller->switch_pin) && - (mmc_controller->switch_pin < OMAP_MAX_GPIO_LINES)) - omap_mux_init_gpio(mmc_controller->switch_pin, + if (gpio_is_valid(mmc_controller->gpio_cd) && + (mmc_controller->gpio_cd < OMAP_MAX_GPIO_LINES)) + omap_mux_init_gpio(mmc_controller->gpio_cd, + OMAP_PIN_INPUT_PULLUP); + if (gpio_is_valid(mmc_controller->gpio_cod) && + (mmc_controller->gpio_cod < OMAP_MAX_GPIO_LINES)) + omap_mux_init_gpio(mmc_controller->gpio_cod, OMAP_PIN_INPUT_PULLUP); if (gpio_is_valid(mmc_controller->gpio_wp) && (mmc_controller->gpio_wp < OMAP_MAX_GPIO_LINES)) @@ -250,15 +254,20 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, mmc->internal_clock = !c->ext_clock; mmc->reg_offset = 0; - mmc->switch_pin = c->gpio_cd; + if (c->cover_only) { + /* detect if mobile phone cover removed */ + mmc->gpio_cd = -EINVAL; + mmc->gpio_cod = c->gpio_cd; + } else { + /* card detect pin on the mmc socket itself */ + mmc->gpio_cd = c->gpio_cd; + mmc->gpio_cod = -EINVAL; + } mmc->gpio_wp = c->gpio_wp; mmc->remux = c->remux; mmc->init_card = c->init_card; - if (c->cover_only) - mmc->cover = 1; - if (c->nonremovable) mmc->nonremovable = 1; @@ -358,7 +367,15 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) if (!mmc_pdata) continue; - mmc_pdata->switch_pin = c->gpio_cd; + if (c->cover_only) { + /* detect if mobile phone cover removed */ + mmc_pdata->gpio_cd = -EINVAL; + mmc_pdata->gpio_cod = c->gpio_cd; + } else { + /* card detect pin on the mmc socket itself */ + mmc_pdata->gpio_cd = c->gpio_cd; + mmc_pdata->gpio_cod = -EINVAL; + } mmc_pdata->gpio_wp = c->gpio_wp; res = omap_device_register(pdev); diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2a2f4d56e4c8..25f1beea453e 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -720,6 +720,8 @@ static const char * __init omap_get_family(void) return kasprintf(GFP_KERNEL, "OMAP4"); else if (soc_is_omap54xx()) return kasprintf(GFP_KERNEL, "OMAP5"); + else if (soc_is_am33xx() || soc_is_am335x()) + return kasprintf(GFP_KERNEL, "AM33xx"); else if (soc_is_am43xx()) return kasprintf(GFP_KERNEL, "AM43xx"); else if (soc_is_dra7xx()) diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index f961c46453b9..3b56722dfd8a 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -20,11 +20,12 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/of_address.h> #include <linux/platform_device.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/cpu_pm.h> -#include <linux/irqchip/arm-gic.h> #include "omap-wakeupgen.h" #include "omap-secure.h" @@ -78,29 +79,12 @@ static inline void sar_writel(u32 val, u32 offset, u8 idx) static inline int _wakeupgen_get_irq_info(u32 irq, u32 *bit_posn, u8 *reg_index) { - unsigned int spi_irq; - - /* - * PPIs and SGIs are not supported. - */ - if (irq < OMAP44XX_IRQ_GIC_START) - return -EINVAL; - - /* - * Subtract the GIC offset. - */ - spi_irq = irq - OMAP44XX_IRQ_GIC_START; - if (spi_irq > MAX_IRQS) { - pr_err("omap wakeupGen: Invalid IRQ%d\n", irq); - return -EINVAL; - } - /* * Each WakeupGen register controls 32 interrupt. * i.e. 1 bit per SPI IRQ */ - *reg_index = spi_irq >> 5; - *bit_posn = spi_irq %= 32; + *reg_index = irq >> 5; + *bit_posn = irq %= 32; return 0; } @@ -141,6 +125,7 @@ static void wakeupgen_mask(struct irq_data *d) raw_spin_lock_irqsave(&wakeupgen_lock, flags); _wakeupgen_clear(d->hwirq, irq_target_cpu[d->hwirq]); raw_spin_unlock_irqrestore(&wakeupgen_lock, flags); + irq_chip_mask_parent(d); } /* @@ -153,6 +138,7 @@ static void wakeupgen_unmask(struct irq_data *d) raw_spin_lock_irqsave(&wakeupgen_lock, flags); _wakeupgen_set(d->hwirq, irq_target_cpu[d->hwirq]); raw_spin_unlock_irqrestore(&wakeupgen_lock, flags); + irq_chip_unmask_parent(d); } #ifdef CONFIG_HOTPLUG_CPU @@ -400,15 +386,91 @@ int omap_secure_apis_support(void) return omap_secure_apis; } +static struct irq_chip wakeupgen_chip = { + .name = "WUGEN", + .irq_eoi = irq_chip_eoi_parent, + .irq_mask = wakeupgen_mask, + .irq_unmask = wakeupgen_unmask, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND, +#ifdef CONFIG_SMP + .irq_set_affinity = irq_chip_set_affinity_parent, +#endif +}; + +static int wakeupgen_domain_xlate(struct irq_domain *domain, + struct device_node *controller, + const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (domain->of_node != controller) + return -EINVAL; /* Shouldn't happen, really... */ + if (intsize != 3) + return -EINVAL; /* Not GIC compliant */ + if (intspec[0] != 0) + return -EINVAL; /* No PPI should point to this domain */ + + *out_hwirq = intspec[1]; + *out_type = intspec[2]; + return 0; +} + +static int wakeupgen_domain_alloc(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs, void *data) +{ + struct of_phandle_args *args = data; + struct of_phandle_args parent_args; + irq_hw_number_t hwirq; + int i; + + if (args->args_count != 3) + return -EINVAL; /* Not GIC compliant */ + if (args->args[0] != 0) + return -EINVAL; /* No PPI should point to this domain */ + + hwirq = args->args[1]; + if (hwirq >= MAX_IRQS) + return -EINVAL; /* Can't deal with this */ + + for (i = 0; i < nr_irqs; i++) + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &wakeupgen_chip, NULL); + + parent_args = *args; + parent_args.np = domain->parent->of_node; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args); +} + +static struct irq_domain_ops wakeupgen_domain_ops = { + .xlate = wakeupgen_domain_xlate, + .alloc = wakeupgen_domain_alloc, + .free = irq_domain_free_irqs_common, +}; + /* * Initialise the wakeupgen module. */ -int __init omap_wakeupgen_init(void) +static int __init wakeupgen_init(struct device_node *node, + struct device_node *parent) { + struct irq_domain *parent_domain, *domain; int i; unsigned int boot_cpu = smp_processor_id(); u32 val; + if (!parent) { + pr_err("%s: no parent, giving up\n", node->full_name); + return -ENODEV; + } + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("%s: unable to obtain parent domain\n", node->full_name); + return -ENXIO; + } /* Not supported on OMAP4 ES1.0 silicon */ if (omap_rev() == OMAP4430_REV_ES1_0) { WARN(1, "WakeupGen: Not supported on OMAP4430 ES1.0\n"); @@ -416,7 +478,7 @@ int __init omap_wakeupgen_init(void) } /* Static mapping, never released */ - wakeupgen_base = ioremap(OMAP_WKUPGEN_BASE, SZ_4K); + wakeupgen_base = of_iomap(node, 0); if (WARN_ON(!wakeupgen_base)) return -ENOMEM; @@ -429,6 +491,14 @@ int __init omap_wakeupgen_init(void) max_irqs = AM43XX_IRQS; } + domain = irq_domain_add_hierarchy(parent_domain, 0, max_irqs, + node, &wakeupgen_domain_ops, + NULL); + if (!domain) { + iounmap(wakeupgen_base); + return -ENOMEM; + } + /* Clear all IRQ bitmasks at wakeupGen level */ for (i = 0; i < irq_banks; i++) { wakeupgen_writel(0, i, CPU0_ID); @@ -437,14 +507,6 @@ int __init omap_wakeupgen_init(void) } /* - * Override GIC architecture specific functions to add - * OMAP WakeupGen interrupt controller along with GIC - */ - gic_arch_extn.irq_mask = wakeupgen_mask; - gic_arch_extn.irq_unmask = wakeupgen_unmask; - gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE; - - /* * FIXME: Add support to set_smp_affinity() once the core * GIC code has necessary hooks in place. */ @@ -474,3 +536,9 @@ int __init omap_wakeupgen_init(void) return 0; } + +/* + * We cannot use the IRQCHIP_DECLARE macro that lives in + * drivers/irqchip, so we're forced to roll our own. Not very nice. + */ +OF_DECLARE_2(irqchip, ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init); diff --git a/arch/arm/mach-omap2/omap-wakeupgen.h b/arch/arm/mach-omap2/omap-wakeupgen.h index b3c8eccfae79..a3491ad12368 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.h +++ b/arch/arm/mach-omap2/omap-wakeupgen.h @@ -33,7 +33,6 @@ #define OMAP_TIMESTAMPCYCLELO 0xc08 #define OMAP_TIMESTAMPCYCLEHI 0xc0c -extern int __init omap_wakeupgen_init(void); extern void __iomem *omap_get_wakeupgen_base(void); extern int omap_secure_apis_support(void); #endif diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index cee0fe1ee6ff..7bb116a6f86f 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -22,7 +22,6 @@ #include <linux/of_platform.h> #include <linux/export.h> #include <linux/irqchip/arm-gic.h> -#include <linux/irqchip/irq-crossbar.h> #include <linux/of_address.h> #include <linux/reboot.h> #include <linux/genalloc.h> @@ -242,26 +241,26 @@ static int __init omap4_sar_ram_init(void) } omap_early_initcall(omap4_sar_ram_init); -static const struct of_device_id gic_match[] = { - { .compatible = "arm,cortex-a9-gic", }, - { .compatible = "arm,cortex-a15-gic", }, +static const struct of_device_id intc_match[] = { + { .compatible = "ti,omap4-wugen-mpu", }, + { .compatible = "ti,omap5-wugen-mpu", }, { }, }; -static struct device_node *gic_node; +static struct device_node *intc_node; unsigned int omap4_xlate_irq(unsigned int hwirq) { struct of_phandle_args irq_data; unsigned int irq; - if (!gic_node) - gic_node = of_find_matching_node(NULL, gic_match); + if (!intc_node) + intc_node = of_find_matching_node(NULL, intc_match); - if (WARN_ON(!gic_node)) + if (WARN_ON(!intc_node)) return hwirq; - irq_data.np = gic_node; + irq_data.np = intc_node; irq_data.args_count = 3; irq_data.args[0] = 0; irq_data.args[1] = hwirq - OMAP44XX_IRQ_GIC_START; @@ -278,6 +277,12 @@ void __init omap_gic_of_init(void) { struct device_node *np; + intc_node = of_find_matching_node(NULL, intc_match); + if (WARN_ON(!intc_node)) { + pr_err("No WUGEN found in DT, system will misbehave.\n"); + pr_err("UPDATE YOUR DEVICE TREE!\n"); + } + /* Extract GIC distributor and TWD bases for OMAP4460 ROM Errata WA */ if (!cpu_is_omap446x()) goto skip_errata_init; @@ -291,9 +296,5 @@ void __init omap_gic_of_init(void) WARN_ON(!twd_base); skip_errata_init: - omap_wakeupgen_init(); -#ifdef CONFIG_IRQ_CROSSBAR - irqcrossbar_init(); -#endif irqchip_init(); } diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 87a12d6930ff..b02f3947be51 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -540,37 +540,33 @@ void __init orion5x_pci_set_cardbus_mode(void) int __init orion5x_pci_sys_setup(int nr, struct pci_sys_data *sys) { - int ret = 0; - vga_base = ORION5X_PCIE_MEM_PHYS_BASE; if (nr == 0) { orion_pcie_set_local_bus_nr(PCIE_BASE, sys->busnr); - ret = pcie_setup(sys); - } else if (nr == 1 && !orion5x_pci_disabled) { + return pcie_setup(sys); + } + + if (nr == 1 && !orion5x_pci_disabled) { orion5x_pci_set_bus_nr(sys->busnr); - ret = pci_setup(sys); + return pci_setup(sys); } - return ret; + return 0; } struct pci_bus __init *orion5x_pci_sys_scan_bus(int nr, struct pci_sys_data *sys) { - struct pci_bus *bus; + if (nr == 0) + return pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, + &sys->resources); - if (nr == 0) { - bus = pci_scan_root_bus(NULL, sys->busnr, &pcie_ops, sys, - &sys->resources); - } else if (nr == 1 && !orion5x_pci_disabled) { - bus = pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys, - &sys->resources); - } else { - bus = NULL; - BUG(); - } + if (nr == 1 && !orion5x_pci_disabled) + return pci_scan_root_bus(NULL, sys->busnr, &pci_ops, sys, + &sys->resources); - return bus; + BUG(); + return NULL; } int __init orion5x_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 0eecd83c624e..89a7c06570d3 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -11,6 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/bitops.h> #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> @@ -40,7 +41,6 @@ #define ICHP_VAL_IRQ (1 << 31) #define ICHP_IRQ(i) (((i) >> 16) & 0x7fff) #define IPR_VALID (1 << 31) -#define IRQ_BIT(n) (((n) - PXA_IRQ(0)) & 0x1f) #define MAX_INTERNAL_IRQS 128 @@ -51,6 +51,7 @@ static void __iomem *pxa_irq_base; static int pxa_internal_irq_nr; static bool cpu_has_ipr; +static struct irq_domain *pxa_irq_domain; static inline void __iomem *irq_base(int i) { @@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i) void pxa_mask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr &= ~(1 << IRQ_BIT(d->irq)); + icmr &= ~BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } void pxa_unmask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr |= 1 << IRQ_BIT(d->irq); + icmr |= BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } @@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs) } while (1); } -void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +static int pxa_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) { - int irq, i, n; + void __iomem *base = irq_base(hw / 32); - BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + /* initialize interrupt priority */ + if (cpu_has_ipr) + __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); + + irq_set_chip_and_handler(virq, &pxa_internal_irq_chip, + handle_level_irq); + irq_set_chip_data(virq, base); + set_irq_flags(virq, IRQF_VALID); + + return 0; +} + +static struct irq_domain_ops pxa_irq_ops = { + .map = pxa_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static __init void +pxa_init_irq_common(struct device_node *node, int irq_nr, + int (*fn)(struct irq_data *, unsigned int)) +{ + int n; pxa_internal_irq_nr = irq_nr; - cpu_has_ipr = !cpu_is_pxa25x(); - pxa_irq_base = io_p2v(0x40d00000); + pxa_irq_domain = irq_domain_add_legacy(node, irq_nr, + PXA_IRQ(0), 0, + &pxa_irq_ops, NULL); + if (!pxa_irq_domain) + panic("Unable to add PXA IRQ domain\n"); + irq_set_default_host(pxa_irq_domain); for (n = 0; n < irq_nr; n += 32) { void __iomem *base = irq_base(n >> 5); __raw_writel(0, base + ICMR); /* disable all IRQs */ __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - for (i = n; (i < (n + 32)) && (i < irq_nr); i++) { - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i)); - - irq = PXA_IRQ(i); - irq_set_chip_and_handler(irq, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(irq, base); - set_irq_flags(irq, IRQF_VALID); - } } - /* only unmasked interrupts kick us out of idle */ __raw_writel(1, irq_base(0) + ICCR); pxa_internal_irq_chip.irq_set_wake = fn; } +void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +{ + BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + + pxa_irq_base = io_p2v(0x40d00000); + cpu_has_ipr = !cpu_is_pxa25x(); + pxa_init_irq_common(NULL, irq_nr, fn); +} + #ifdef CONFIG_PM static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32]; static unsigned long saved_ipr[MAX_INTERNAL_IRQS]; @@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = { }; #ifdef CONFIG_OF -static struct irq_domain *pxa_irq_domain; - -static int pxa_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - void __iomem *base = irq_base(hw / 32); - - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); - - irq_set_chip_and_handler(hw, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(hw, base); - set_irq_flags(hw, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops pxa_irq_ops = { - .map = pxa_irq_map, - .xlate = irq_domain_xlate_onecell, -}; - static const struct of_device_id intc_ids[] __initconst = { { .compatible = "marvell,pxa-intc", }, {} @@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) { struct device_node *node; struct resource res; - int n, ret; + int ret; node = of_find_matching_node(NULL, intc_ids); if (!node) { @@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) return; } - pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0, - &pxa_irq_ops, NULL); - if (!pxa_irq_domain) - panic("Unable to add PXA IRQ domain\n"); - - irq_set_default_host(pxa_irq_domain); - - for (n = 0; n < pxa_internal_irq_nr; n += 32) { - void __iomem *base = irq_base(n >> 5); - - __raw_writel(0, base + ICMR); /* disable all IRQs */ - __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - } - - /* only unmasked interrupts kick us out of idle */ - __raw_writel(1, irq_base(0) + ICCR); - - pxa_internal_irq_chip.irq_set_wake = fn; + pxa_init_irq_common(node, pxa_internal_irq_nr, fn); } #endif /* CONFIG_OF */ diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c index a762b23ac830..6dc4f025e674 100644 --- a/arch/arm/mach-pxa/raumfeld.c +++ b/arch/arm/mach-pxa/raumfeld.c @@ -758,8 +758,10 @@ static void raumfeld_power_signal_charged(void) struct power_supply *psy = power_supply_get_by_name(raumfeld_power_supplicants[0]); - if (psy) + if (psy) { power_supply_set_battery_charged(psy); + power_supply_put(psy); + } } static int raumfeld_power_resume(void) diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index 205f9bf3821e..ac2ae5c71ab4 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = { }; static struct platform_device can_regulator_device = { - .name = "reg-fixed-volage", + .name = "reg-fixed-voltage", .id = 0, .dev = { .platform_data = &can_regulator_pdata, diff --git a/arch/arm/mach-s3c64xx/cpuidle.c b/arch/arm/mach-s3c64xx/cpuidle.c index 2eb072440dfa..93aa8cb70195 100644 --- a/arch/arm/mach-s3c64xx/cpuidle.c +++ b/arch/arm/mach-s3c64xx/cpuidle.c @@ -16,7 +16,7 @@ #include <linux/export.h> #include <linux/time.h> -#include <asm/proc-fns.h> +#include <asm/cpuidle.h> #include <mach/map.h> diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S index 7c43ddd33ba8..dfbfc0f7f8b8 100644 --- a/arch/arm/mach-s5pv210/sleep.S +++ b/arch/arm/mach-s5pv210/sleep.S @@ -14,7 +14,7 @@ #include <linux/linkage.h> - .data + .text .align /* diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 6d949f1c850b..36aaeb12e1a5 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -1015,7 +1015,6 @@ static struct asoc_simple_card_info fsi_wm8978_info = { .platform = "sh_fsi2", .daifmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBM_CFM, .cpu_dai = { - .fmt = SND_SOC_DAIFMT_IB_NF, .name = "fsia-dai", }, .codec_dai = { @@ -1040,9 +1039,9 @@ static struct asoc_simple_card_info fsi2_hdmi_info = { .card = "FSI2B-HDMI", .codec = "sh-mobile-hdmi", .platform = "sh_fsi2", + .daifmt = SND_SOC_DAIFMT_CBS_CFS, .cpu_dai = { .name = "fsib-dai", - .fmt = SND_SOC_DAIFMT_CBS_CFS, }, .codec_dai = { .name = "sh_mobile_hdmi-hifi", diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c index 9e3618028acc..fd63ae6532fc 100644 --- a/arch/arm/mach-shmobile/intc-sh73a0.c +++ b/arch/arm/mach-shmobile/intc-sh73a0.c @@ -252,11 +252,6 @@ static irqreturn_t sh73a0_intcs_demux(int irq, void *dev_id) return IRQ_HANDLED; } -static int sh73a0_set_wake(struct irq_data *data, unsigned int on) -{ - return 0; /* always allow wakeup */ -} - #define PINTER0_PHYS 0xe69000a0 #define PINTER1_PHYS 0xe69000a4 #define PINTER0_VIRT IOMEM(0xe69000a0) @@ -318,8 +313,8 @@ void __init sh73a0_init_irq(void) void __iomem *gic_cpu_base = IOMEM(0xf0000100); void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); + gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); gic_init(0, 29, gic_dist_base, gic_cpu_base); - gic_arch_extn.irq_set_wake = sh73a0_set_wake; register_intc_controller(&intcs_desc); register_intc_controller(&intc_pint0_desc); diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index 27dceaf9e688..c03e562be12b 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -713,18 +713,13 @@ void __init r8a7779_init_late(void) } #ifdef CONFIG_USE_OF -static int r8a7779_set_wake(struct irq_data *data, unsigned int on) -{ - return 0; /* always allow wakeup */ -} - void __init r8a7779_init_irq_dt(void) { #ifdef CONFIG_ARCH_SHMOBILE_LEGACY void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000); void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000); #endif - gic_arch_extn.irq_set_wake = r8a7779_set_wake; + gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); #ifdef CONFIG_ARCH_SHMOBILE_LEGACY gic_init(0, 29, gic_dist_base, gic_cpu_base); diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index a77604fbaf25..81502b90dd91 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -1,10 +1,12 @@ menuconfig ARCH_SUNXI bool "Allwinner SoCs" if ARCH_MULTI_V7 select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_RESET_CONTROLLER select CLKSRC_MMIO select GENERIC_IRQ_CHIP select PINCTRL select SUN4I_TIMER + select RESET_CONTROLLER if ARCH_SUNXI @@ -20,10 +22,8 @@ config MACH_SUN5I config MACH_SUN6I bool "Allwinner A31 (sun6i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER select SUN5I_HSTIMER config MACH_SUN7I @@ -37,16 +37,12 @@ config MACH_SUN7I config MACH_SUN8I bool "Allwinner A23 (sun8i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER config MACH_SUN9I bool "Allwinner (sun9i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC - select RESET_CONTROLLER endif diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c index f2b586d7b15d..155807fa6fdd 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra114.c +++ b/arch/arm/mach-tegra/cpuidle-tegra114.c @@ -15,7 +15,7 @@ */ #include <asm/firmware.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> @@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, tegra_set_cpu_in_lp2(); cpu_pm_enter(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); call_firmware_op(prepare_idle); @@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, if (call_firmware_op(do_idle, 0) == -ENOSYS) cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); cpu_pm_exit(); tegra_clear_cpu_in_lp2(); diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 4f25a7c7ca0f..88de2dce2e87 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -20,14 +20,13 @@ */ #include <linux/clk/tegra.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include <asm/smp_plat.h> #include <asm/suspend.h> @@ -136,11 +135,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev, if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready()) return false; - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); if (cpu_online(1)) tegra20_wake_cpu1_from_reset(); @@ -153,13 +152,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); cpu_suspend(0, tegra20_sleep_cpu_secondary_finish); tegra20_cpu_clear_resettable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c index f8815ed65d9d..4dbe1dae937c 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra30.c +++ b/arch/arm/mach-tegra/cpuidle-tegra30.c @@ -20,14 +20,13 @@ */ #include <linux/clk/tegra.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/module.h> #include <asm/cpuidle.h> -#include <asm/proc-fns.h> #include <asm/smp_plat.h> #include <asm/suspend.h> @@ -76,11 +75,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev, return false; } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } @@ -90,13 +89,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); smp_wmb(); cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h index ee79808e93a3..81dc950b4881 100644 --- a/arch/arm/mach-tegra/iomap.h +++ b/arch/arm/mach-tegra/iomap.h @@ -31,21 +31,6 @@ #define TEGRA_ARM_INT_DIST_BASE 0x50041000 #define TEGRA_ARM_INT_DIST_SIZE SZ_4K -#define TEGRA_PRIMARY_ICTLR_BASE 0x60004000 -#define TEGRA_PRIMARY_ICTLR_SIZE SZ_64 - -#define TEGRA_SECONDARY_ICTLR_BASE 0x60004100 -#define TEGRA_SECONDARY_ICTLR_SIZE SZ_64 - -#define TEGRA_TERTIARY_ICTLR_BASE 0x60004200 -#define TEGRA_TERTIARY_ICTLR_SIZE SZ_64 - -#define TEGRA_QUATERNARY_ICTLR_BASE 0x60004300 -#define TEGRA_QUATERNARY_ICTLR_SIZE SZ_64 - -#define TEGRA_QUINARY_ICTLR_BASE 0x60004400 -#define TEGRA_QUINARY_ICTLR_SIZE SZ_64 - #define TEGRA_TMR1_BASE 0x60005000 #define TEGRA_TMR1_SIZE SZ_8 diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c index ab95f5391a2b..3b9098d27ea5 100644 --- a/arch/arm/mach-tegra/irq.c +++ b/arch/arm/mach-tegra/irq.c @@ -30,43 +30,9 @@ #include "board.h" #include "iomap.h" -#define ICTLR_CPU_IEP_VFIQ 0x08 -#define ICTLR_CPU_IEP_FIR 0x14 -#define ICTLR_CPU_IEP_FIR_SET 0x18 -#define ICTLR_CPU_IEP_FIR_CLR 0x1c - -#define ICTLR_CPU_IER 0x20 -#define ICTLR_CPU_IER_SET 0x24 -#define ICTLR_CPU_IER_CLR 0x28 -#define ICTLR_CPU_IEP_CLASS 0x2C - -#define ICTLR_COP_IER 0x30 -#define ICTLR_COP_IER_SET 0x34 -#define ICTLR_COP_IER_CLR 0x38 -#define ICTLR_COP_IEP_CLASS 0x3c - -#define FIRST_LEGACY_IRQ 32 -#define TEGRA_MAX_NUM_ICTLRS 5 - #define SGI_MASK 0xFFFF -static int num_ictlrs; - -static void __iomem *ictlr_reg_base[] = { - IO_ADDRESS(TEGRA_PRIMARY_ICTLR_BASE), - IO_ADDRESS(TEGRA_SECONDARY_ICTLR_BASE), - IO_ADDRESS(TEGRA_TERTIARY_ICTLR_BASE), - IO_ADDRESS(TEGRA_QUATERNARY_ICTLR_BASE), - IO_ADDRESS(TEGRA_QUINARY_ICTLR_BASE), -}; - #ifdef CONFIG_PM_SLEEP -static u32 cop_ier[TEGRA_MAX_NUM_ICTLRS]; -static u32 cop_iep[TEGRA_MAX_NUM_ICTLRS]; -static u32 cpu_ier[TEGRA_MAX_NUM_ICTLRS]; -static u32 cpu_iep[TEGRA_MAX_NUM_ICTLRS]; - -static u32 ictlr_wake_mask[TEGRA_MAX_NUM_ICTLRS]; static void __iomem *tegra_gic_cpu_base; #endif @@ -83,140 +49,7 @@ bool tegra_pending_sgi(void) return false; } -static inline void tegra_irq_write_mask(unsigned int irq, unsigned long reg) -{ - void __iomem *base; - u32 mask; - - BUG_ON(irq < FIRST_LEGACY_IRQ || - irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32); - - base = ictlr_reg_base[(irq - FIRST_LEGACY_IRQ) / 32]; - mask = BIT((irq - FIRST_LEGACY_IRQ) % 32); - - __raw_writel(mask, base + reg); -} - -static void tegra_mask(struct irq_data *d) -{ - if (d->hwirq < FIRST_LEGACY_IRQ) - return; - - tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_CLR); -} - -static void tegra_unmask(struct irq_data *d) -{ - if (d->hwirq < FIRST_LEGACY_IRQ) - return; - - tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IER_SET); -} - -static void tegra_ack(struct irq_data *d) -{ - if (d->hwirq < FIRST_LEGACY_IRQ) - return; - - tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR); -} - -static void tegra_eoi(struct irq_data *d) -{ - if (d->hwirq < FIRST_LEGACY_IRQ) - return; - - tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_CLR); -} - -static int tegra_retrigger(struct irq_data *d) -{ - if (d->hwirq < FIRST_LEGACY_IRQ) - return 0; - - tegra_irq_write_mask(d->hwirq, ICTLR_CPU_IEP_FIR_SET); - - return 1; -} - #ifdef CONFIG_PM_SLEEP -static int tegra_set_wake(struct irq_data *d, unsigned int enable) -{ - u32 irq = d->hwirq; - u32 index, mask; - - if (irq < FIRST_LEGACY_IRQ || - irq >= FIRST_LEGACY_IRQ + num_ictlrs * 32) - return -EINVAL; - - index = ((irq - FIRST_LEGACY_IRQ) / 32); - mask = BIT((irq - FIRST_LEGACY_IRQ) % 32); - if (enable) - ictlr_wake_mask[index] |= mask; - else - ictlr_wake_mask[index] &= ~mask; - - return 0; -} - -static int tegra_legacy_irq_suspend(void) -{ - unsigned long flags; - int i; - - local_irq_save(flags); - for (i = 0; i < num_ictlrs; i++) { - void __iomem *ictlr = ictlr_reg_base[i]; - /* Save interrupt state */ - cpu_ier[i] = readl_relaxed(ictlr + ICTLR_CPU_IER); - cpu_iep[i] = readl_relaxed(ictlr + ICTLR_CPU_IEP_CLASS); - cop_ier[i] = readl_relaxed(ictlr + ICTLR_COP_IER); - cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS); - - /* Disable COP interrupts */ - writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR); - - /* Disable CPU interrupts */ - writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR); - - /* Enable the wakeup sources of ictlr */ - writel_relaxed(ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET); - } - local_irq_restore(flags); - - return 0; -} - -static void tegra_legacy_irq_resume(void) -{ - unsigned long flags; - int i; - - local_irq_save(flags); - for (i = 0; i < num_ictlrs; i++) { - void __iomem *ictlr = ictlr_reg_base[i]; - writel_relaxed(cpu_iep[i], ictlr + ICTLR_CPU_IEP_CLASS); - writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR); - writel_relaxed(cpu_ier[i], ictlr + ICTLR_CPU_IER_SET); - writel_relaxed(cop_iep[i], ictlr + ICTLR_COP_IEP_CLASS); - writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR); - writel_relaxed(cop_ier[i], ictlr + ICTLR_COP_IER_SET); - } - local_irq_restore(flags); -} - -static struct syscore_ops tegra_legacy_irq_syscore_ops = { - .suspend = tegra_legacy_irq_suspend, - .resume = tegra_legacy_irq_resume, -}; - -int tegra_legacy_irq_syscore_init(void) -{ - register_syscore_ops(&tegra_legacy_irq_syscore_ops); - - return 0; -} - static int tegra_gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) { @@ -251,45 +84,19 @@ static void tegra114_gic_cpu_pm_registration(void) cpu_pm_register_notifier(&tegra_gic_notifier_block); } #else -#define tegra_set_wake NULL static void tegra114_gic_cpu_pm_registration(void) { } #endif +static const struct of_device_id tegra_ictlr_match[] __initconst = { + { .compatible = "nvidia,tegra20-ictlr" }, + { .compatible = "nvidia,tegra30-ictlr" }, + { } +}; + void __init tegra_init_irq(void) { - int i; - void __iomem *distbase; - - distbase = IO_ADDRESS(TEGRA_ARM_INT_DIST_BASE); - num_ictlrs = readl_relaxed(distbase + GIC_DIST_CTR) & 0x1f; - - if (num_ictlrs > ARRAY_SIZE(ictlr_reg_base)) { - WARN(1, "Too many (%d) interrupt controllers found. Maximum is %d.", - num_ictlrs, ARRAY_SIZE(ictlr_reg_base)); - num_ictlrs = ARRAY_SIZE(ictlr_reg_base); - } - - for (i = 0; i < num_ictlrs; i++) { - void __iomem *ictlr = ictlr_reg_base[i]; - writel(~0, ictlr + ICTLR_CPU_IER_CLR); - writel(0, ictlr + ICTLR_CPU_IEP_CLASS); - } - - gic_arch_extn.irq_ack = tegra_ack; - gic_arch_extn.irq_eoi = tegra_eoi; - gic_arch_extn.irq_mask = tegra_mask; - gic_arch_extn.irq_unmask = tegra_unmask; - gic_arch_extn.irq_retrigger = tegra_retrigger; - gic_arch_extn.irq_set_wake = tegra_set_wake; - gic_arch_extn.flags = IRQCHIP_MASK_ON_SUSPEND; - - /* - * Check if there is a devicetree present, since the GIC will be - * initialized elsewhere under DT. - */ - if (!of_have_populated_dt()) - gic_init(0, 29, distbase, - IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100)); + if (WARN_ON(!of_find_matching_node(NULL, tegra_ictlr_match))) + pr_warn("Outdated DT detected, suspend/resume will NOT work\n"); tegra114_gic_cpu_pm_registration(); } diff --git a/arch/arm/mach-tegra/irq.h b/arch/arm/mach-tegra/irq.h index bc05ce5613fb..5142649bba05 100644 --- a/arch/arm/mach-tegra/irq.h +++ b/arch/arm/mach-tegra/irq.h @@ -19,10 +19,4 @@ bool tegra_pending_sgi(void); -#ifdef CONFIG_PM_SLEEP -int tegra_legacy_irq_syscore_init(void); -#else -static inline int tegra_legacy_irq_syscore_init(void) { return 0; } -#endif - #endif diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index 914341bcef25..861d88486dbe 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -82,7 +82,6 @@ static void __init tegra_dt_init_irq(void) { tegra_init_irq(); irqchip_init(); - tegra_legacy_irq_syscore_init(); } static void __init tegra_dt_init(void) diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index dbb2970ee7da..6ced0f680262 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -52,7 +52,7 @@ void ux500_restart(enum reboot_mode mode, const char *cmd) */ void __init ux500_init_irq(void) { - gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND; + gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); /* diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 3c2509b4b694..4be537977040 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -42,6 +42,7 @@ if ARCH_VEXPRESS config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA bool "Enable A5 and A9 only errata work-arounds" default y + select ARM_ERRATA_643719 if SMP select ARM_ERRATA_720789 select PL310_ERRATA_753970 if CACHE_L2X0 help diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index c887196cfdbe..58ef2a700414 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -186,7 +186,7 @@ static void __init zynq_map_io(void) static void __init zynq_irq_init(void) { - gic_arch_extn.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND; + gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 9b4f29e595a4..b7644310236b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -738,7 +738,7 @@ config CPU_ICACHE_DISABLE config CPU_DCACHE_DISABLE bool "Disable D-Cache (C-bit)" - depends on CPU_CP15 + depends on CPU_CP15 && !SMP help Say Y here to disable the processor data cache. Unless you have a reason not to or are unsure, say N. @@ -825,6 +825,20 @@ config KUSER_HELPERS Say N here only if you are absolutely certain that you do not need these helpers; otherwise, the safe option is to say Y. +config VDSO + bool "Enable VDSO for acceleration of some system calls" + depends on AEABI && MMU + default y if ARM_ARCH_TIMER + select GENERIC_TIME_VSYSCALL + help + Place in the process address space an ELF shared object + providing fast implementations of gettimeofday and + clock_gettime. Systems that implement the ARM architected + timer will receive maximum benefit. + + You must have glibc 2.22 or later for programs to seamlessly + take advantage of this. + config DMA_CACHE_RWFO bool "Enable read/write for ownership DMA cache maintenance" depends on CPU_V6K && SMP diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 2c0c541c60ca..9769f1eefe3b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -201,7 +201,7 @@ union offset_union { THUMB( "1: "ins" %1, [%2]\n" ) \ THUMB( " add %2, %2, #1\n" ) \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, #1\n" \ " b 2b\n" \ @@ -261,7 +261,7 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "2: "ins" %1, [%2]\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, #1\n" \ " b 3b\n" \ @@ -301,7 +301,7 @@ union offset_union { " mov %1, %1, "NEXT_BYTE"\n" \ "4: "ins" %1, [%2]\n" \ "5:\n" \ - " .pushsection .fixup,\"ax\"\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ "6: mov %0, #1\n" \ " b 5b\n" \ diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c6c7696b8db9..e309c8f35af5 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1131,23 +1131,22 @@ static void __init l2c310_of_parse(const struct device_node *np, } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); - if (ret) - return; - - switch (assoc) { - case 16: - *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; - *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; - *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; - break; - case 8: - *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; - *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; - break; - default: - pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", - assoc); - break; + if (!ret) { + switch (assoc) { + case 16: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + case 8: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + default: + pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", + assoc); + break; + } } prefetch = l2x0_saved_regs.prefetch_ctrl; @@ -1648,6 +1647,7 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) struct device_node *np; struct resource res; u32 cache_id, old_aux; + u32 cache_level = 2; np = of_find_matching_node(NULL, l2x0_ids); if (!np) @@ -1680,6 +1680,12 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (!of_property_read_bool(np, "cache-unified")) pr_err("L2C: device tree omits to specify unified cache\n"); + if (of_property_read_u32(np, "cache-level", &cache_level)) + pr_err("L2C: device tree omits to specify cache-level\n"); + + if (cache_level != 2) + pr_err("L2C: device tree specifies invalid cache level\n"); + /* Read back current (default) hardware configuration */ if (data->save) data->save(l2x0_base); diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index b966656d2c2d..a134d8a13d00 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -36,10 +36,10 @@ ENTRY(v7_invalidate_l1) mcr p15, 2, r0, c0, c0, 0 mrc p15, 1, r0, c0, c0, 0 - ldr r1, =0x7fff + movw r1, #0x7fff and r2, r1, r0, lsr #13 - ldr r1, =0x3ff + movw r1, #0x3ff and r3, r1, r0, lsr #3 @ NumWays - 1 add r2, r2, #1 @ NumSets @@ -90,21 +90,20 @@ ENDPROC(v7_flush_icache_all) ENTRY(v7_flush_dcache_louis) dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr - ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr - ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr +ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position +ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position + ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr + bne start_flush_levels @ LoU != 0, start flushing #ifdef CONFIG_ARM_ERRATA_643719 - ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register - ALT_UP(reteq lr) @ LoUU is zero, so nothing to do - ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? - biceq r2, r2, #0x0000000f @ clear minor revision number - teqeq r2, r1 @ test for errata affected core and if so... - orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') +ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register +ALT_UP( ret lr) @ LoUU is zero, so nothing to do + movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? + movt r1, #:upper16:(0x410fc090 >> 4) + teq r1, r2, lsr #4 @ test for errata affected core and if so... + moveq r3, #1 << 1 @ fix LoUIS value + beq start_flush_levels @ start flushing cache levels #endif - ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 - ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 - reteq lr @ return if level == 0 - mov r10, #0 @ r10 (starting level) = 0 - b flush_levels @ start flushing cache levels + ret lr ENDPROC(v7_flush_dcache_louis) /* @@ -119,9 +118,10 @@ ENDPROC(v7_flush_dcache_louis) ENTRY(v7_flush_dcache_all) dmb @ ensure ordering with previous memory accesses mrc p15, 1, r0, c0, c0, 1 @ read clidr - ands r3, r0, #0x7000000 @ extract loc from clidr - mov r3, r3, lsr #23 @ left align loc bit field + mov r3, r0, lsr #23 @ move LoC into position + ands r3, r3, #7 << 1 @ extract LoC*2 from clidr beq finished @ if loc is 0, then no need to clean +start_flush_levels: mov r10, #0 @ start clean at cache level 0 flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level @@ -140,10 +140,10 @@ flush_levels: #endif and r2, r1, #7 @ extract the length of the cache lines add r2, r2, #4 @ add 4 (line length offset) - ldr r4, =0x3ff + movw r4, #0x3ff ands r4, r4, r1, lsr #3 @ find maximum number on the way size clz r5, r4 @ find bit position of way size increment - ldr r7, =0x7fff + movw r7, #0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size loop1: mov r9, r7 @ create working copy of max index diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 170a116d1b29..09c5fe3d30c2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -171,7 +171,7 @@ static int __dma_supported(struct device *dev, u64 mask, bool warn) */ if (sizeof(mask) != sizeof(dma_addr_t) && mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) < max_pfn) { + dma_to_pfn(dev, ~0) < max_pfn - 1) { if (warn) { dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", mask); @@ -289,11 +289,11 @@ static void __dma_free_buffer(struct page *page, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller); + const void *caller, bool want_vaddr); static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, - const void *caller); + const void *caller, bool want_vaddr); static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, @@ -357,10 +357,10 @@ static int __init atomic_pool_init(void) if (dev_get_cma_area(NULL)) ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, - &page, atomic_pool_init); + &page, atomic_pool_init, true); else ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, - &page, atomic_pool_init); + &page, atomic_pool_init, true); if (ptr) { int ret; @@ -467,13 +467,15 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot) static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, - const void *caller) + const void *caller, bool want_vaddr) { struct page *page; - void *ptr; + void *ptr = NULL; page = __dma_alloc_buffer(dev, size, gfp); if (!page) return NULL; + if (!want_vaddr) + goto out; ptr = __dma_alloc_remap(page, size, gfp, prot, caller); if (!ptr) { @@ -481,6 +483,7 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, return NULL; } + out: *ret_page = page; return ptr; } @@ -523,12 +526,12 @@ static int __free_from_pool(void *start, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller) + const void *caller, bool want_vaddr) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; struct page *page; - void *ptr; + void *ptr = NULL; page = dma_alloc_from_contiguous(dev, count, order); if (!page) @@ -536,6 +539,9 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, __dma_clear_buffer(page, size); + if (!want_vaddr) + goto out; + if (PageHighMem(page)) { ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); if (!ptr) { @@ -546,17 +552,21 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, __dma_remap(page, size, prot); ptr = page_address(page); } + + out: *ret_page = page; return ptr; } static void __free_from_contiguous(struct device *dev, struct page *page, - void *cpu_addr, size_t size) + void *cpu_addr, size_t size, bool want_vaddr) { - if (PageHighMem(page)) - __dma_free_remap(cpu_addr, size); - else - __dma_remap(page, size, PAGE_KERNEL); + if (want_vaddr) { + if (PageHighMem(page)) + __dma_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); + } dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } @@ -574,12 +584,12 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define nommu() 1 -#define __get_dma_pgprot(attrs, prot) __pgprot(0) -#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __get_dma_pgprot(attrs, prot) __pgprot(0) +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL -#define __alloc_from_contiguous(dev, size, prot, ret, c) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL #define __free_from_pool(cpu_addr, size) 0 -#define __free_from_contiguous(dev, page, cpu_addr, size) do { } while (0) +#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ @@ -599,11 +609,13 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, + struct dma_attrs *attrs, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page = NULL; void *addr; + bool want_vaddr; #ifdef CONFIG_DMA_API_DEBUG u64 limit = (mask + 1) & ~mask; @@ -631,20 +643,21 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); + want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); else if (!dev_get_cma_area(dev)) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); else - addr = __alloc_from_contiguous(dev, size, prot, &page, caller); + addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr); - if (addr) + if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); - return addr; + return want_vaddr ? addr : page; } /* @@ -661,7 +674,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, return memory; return __dma_alloc(dev, size, handle, gfp, prot, false, - __builtin_return_address(0)); + attrs, __builtin_return_address(0)); } static void *arm_coherent_dma_alloc(struct device *dev, size_t size, @@ -674,7 +687,7 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, return memory; return __dma_alloc(dev, size, handle, gfp, prot, true, - __builtin_return_address(0)); + attrs, __builtin_return_address(0)); } /* @@ -715,6 +728,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; @@ -726,14 +740,15 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } else if (__free_from_pool(cpu_addr, size)) { return; } else if (!dev_get_cma_area(dev)) { - __dma_free_remap(cpu_addr, size); + if (want_vaddr) + __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { /* * Non-atomic allocations cannot be freed with IRQs disabled */ WARN_ON(irqs_disabled()); - __free_from_contiguous(dev, page, cpu_addr, size); + __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr); } } @@ -1135,13 +1150,28 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp |= __GFP_NOWARN | __GFP_HIGHMEM; while (count) { - int j, order = __fls(count); + int j, order; + + for (order = __fls(count); order > 0; --order) { + /* + * We do not want OOM killer to be invoked as long + * as we can fall back to single pages, so we force + * __GFP_NORETRY for orders higher than zero. + */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + if (pages[i]) + break; + } - pages[i] = alloc_pages(gfp, order); - while (!pages[i] && order) - pages[i] = alloc_pages(gfp, --order); - if (!pages[i]) - goto error; + if (!pages[i]) { + /* + * Fall back to single page allocation. + * Might invoke OOM killer as last resort. + */ + pages[i] = alloc_pages(gfp, 0); + if (!pages[i]) + goto error; + } if (order) { split_page(pages[i], order); @@ -1206,7 +1236,7 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, static dma_addr_t __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t dma_addr, iova; int i, ret = DMA_ERROR_CODE; @@ -1242,7 +1272,7 @@ fail: static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); /* * add optional in-page offset from iova to size and align @@ -1457,7 +1487,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, enum dma_data_direction dir, struct dma_attrs *attrs, bool is_coherent) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova, iova_base; int ret = 0; unsigned int count; @@ -1678,7 +1708,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t dma_addr; int ret, prot, len = PAGE_ALIGN(size + offset); @@ -1731,7 +1761,7 @@ static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; int offset = handle & ~PAGE_MASK; int len = PAGE_ALIGN(size + offset); @@ -1756,7 +1786,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); int offset = handle & ~PAGE_MASK; @@ -1775,7 +1805,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, static void arm_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); unsigned int offset = handle & ~PAGE_MASK; @@ -1789,7 +1819,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, static void arm_iommu_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); dma_addr_t iova = handle & PAGE_MASK; struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); unsigned int offset = handle & ~PAGE_MASK; @@ -1950,7 +1980,7 @@ static int __arm_iommu_attach_device(struct device *dev, return err; kref_get(&mapping->kref); - dev->archdata.mapping = mapping; + to_dma_iommu_mapping(dev) = mapping; pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; @@ -1995,7 +2025,7 @@ static void __arm_iommu_detach_device(struct device *dev) iommu_detach_device(mapping->domain, dev); kref_put(&mapping->kref, release_iommu_mapping); - dev->archdata.mapping = NULL; + to_dma_iommu_mapping(dev) = NULL; pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } @@ -2027,6 +2057,13 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, if (!iommu) return false; + /* + * currently arm_iommu_create_mapping() takes a max of size_t + * for size param. So check this limit for now. + */ + if (size > SIZE_MAX) + return false; + mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", @@ -2046,7 +2083,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, static void arm_teardown_iommu_dma_ops(struct device *dev) { - struct dma_iommu_mapping *mapping = dev->archdata.mapping; + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); if (!mapping) return; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a982dc3190df..6333d9c17875 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -552,6 +552,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); + show_pte(current->mm, addr); info.si_signo = inf->sig; info.si_errno = 0; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 1609b022a72f..be92fa0f2f35 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -86,55 +86,6 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); -/* - * This keeps memory configuration data used by a couple memory - * initialization functions, as well as show_mem() for the skipping - * of holes in the memory map. It is populated by arm_add_memory(). - */ -void show_mem(unsigned int filter) -{ - int free = 0, total = 0, reserved = 0; - int shared = 0, cached = 0, slab = 0; - struct memblock_region *reg; - - printk("Mem-info:\n"); - show_free_areas(filter); - - for_each_memblock (memory, reg) { - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = memblock_region_memory_base_pfn(reg); - pfn2 = memblock_region_memory_end_pfn(reg); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (PageSlab(page)) - slab++; - else if (!page_count(page)) - free++; - else - shared += page_count(page) - 1; - pfn1++; - page = pfn_to_page(pfn1); - } while (pfn1 < pfn2); - } - - printk("%d pages of RAM\n", total); - printk("%d free pages\n", free); - printk("%d reserved pages\n", reserved); - printk("%d slab pages\n", slab); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); -} - static void __init find_limits(unsigned long *min, unsigned long *max_low, unsigned long *max_high) { @@ -335,6 +286,9 @@ void __init bootmem_init(void) find_limits(&min, &max_low, &max_high); + early_memtest((phys_addr_t)min << PAGE_SHIFT, + (phys_addr_t)max_low << PAGE_SHIFT); + /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..407dc786583a 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -169,14 +169,22 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } +unsigned long arch_mmap_rnd(void) +{ + unsigned long rnd; + + /* 8 bits of randomness in 20 address space bits */ + rnd = (unsigned long)get_random_int() % (1 << 8); + + return rnd << PAGE_SHIFT; +} + void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; - /* 8 bits of randomness in 20 address space bits */ - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + if (current->flags & PF_RANDOMIZE) + random_factor = arch_mmap_rnd(); if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 004e35cdcfff..cf30daff8932 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -49,7 +49,10 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (!is_module_address(start) || !is_module_address(end - 1)) + if (start < MODULES_VADDR || start >= MODULES_END) + return -EINVAL; + + if (end < MODULES_VADDR || start >= MODULES_END) return -EINVAL; data.set_mask = set_mask; diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 86ee5d47ce3c..aa0519eed698 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -507,7 +507,7 @@ cpu_arm1020_name: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1020_proc_info,#object __arm1020_proc_info: @@ -519,7 +519,7 @@ __arm1020_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1020_setup + initfn __arm1020_setup, __arm1020_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index a6331d78601f..bff4c7f70fd6 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -465,7 +465,7 @@ arm1020e_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1020e_proc_info,#object __arm1020e_proc_info: @@ -479,7 +479,7 @@ __arm1020e_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1020e_setup + initfn __arm1020e_setup, __arm1020e_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index a126b7a59928..dbb2413fe04d 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -448,7 +448,7 @@ arm1022_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1022_proc_info,#object __arm1022_proc_info: @@ -462,7 +462,7 @@ __arm1022_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1022_setup + initfn __arm1022_setup, __arm1022_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index fc294067e977..0b37b2cef9d3 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -442,7 +442,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm1026_proc_info,#object __arm1026_proc_info: @@ -456,7 +456,7 @@ __arm1026_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm1026_setup + initfn __arm1026_setup, __arm1026_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index 2baa66b3ac9b..3651cd70e418 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -186,7 +186,7 @@ arm720_crval: * See <asm/procinfo.h> for a definition of this structure. */ - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name\()_proc_info,#object @@ -203,7 +203,7 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b \cpu_flush @ cpu_flush + initfn \cpu_flush, __\name\()_proc_info @ cpu_flush .long cpu_arch_name @ arch_name .long cpu_elf_name @ elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index ac1ea6b3bce4..024fb7732407 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -132,14 +132,14 @@ __arm740_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 .long 0 - b __arm740_setup + initfn __arm740_setup, __arm740_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index bf6ba4bc30ff..25472d94426d 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -76,7 +76,7 @@ __arm7tdmi_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 @@ -86,7 +86,7 @@ __\name\()_proc_info: .long \cpu_mask .long 0 .long 0 - b __arm7tdmi_setup + initfn __arm7tdmi_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 22bf8dde4f84..7a14bd4414c9 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -448,7 +448,7 @@ arm920_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm920_proc_info,#object __arm920_proc_info: @@ -464,7 +464,7 @@ __arm920_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm920_setup + initfn __arm920_setup, __arm920_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 0c6d5ac5a6d4..edccfcdcd551 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -426,7 +426,7 @@ arm922_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm922_proc_info,#object __arm922_proc_info: @@ -442,7 +442,7 @@ __arm922_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm922_setup + initfn __arm922_setup, __arm922_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index c32d073282ea..ede8c54ab4aa 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -494,7 +494,7 @@ arm925_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object @@ -510,7 +510,7 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm925_setup + initfn __arm925_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 252b2503038d..fb827c633693 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -474,7 +474,7 @@ arm926_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm926_proc_info,#object __arm926_proc_info: @@ -490,7 +490,7 @@ __arm926_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm926_setup + initfn __arm926_setup, __arm926_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index e5212d489377..ee5b66f847c4 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -297,26 +297,16 @@ __arm940_setup: mcr p15, 0, r0, c6, c0, 1 ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c1, 0 @ set area 1, RAM - mcr p15, 0, r0, c6, c1, 1 + ldr r7, =CONFIG_DRAM_SIZE >> 12 @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 @ set area 1, RAM + mcr p15, 0, r3, c6, c1, 1 ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2, 0 @ set area 2, ROM/FLASH - mcr p15, 0, r0, c6, c2, 1 + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r6, #1 + mcr p15, 0, r3, c6, c2, 0 @ set area 2, ROM/FLASH + mcr p15, 0, r3, c6, c2, 1 mov r0, #0x06 mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable @@ -354,14 +344,14 @@ __arm940_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm940_proc_info,#object __arm940_proc_info: .long 0x41009400 .long 0xff00fff0 .long 0 - b __arm940_setup + initfn __arm940_setup, __arm940_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index b3dd9b2d0b8e..7361837edc31 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -343,24 +343,14 @@ __arm946_setup: mcr p15, 0, r0, c6, c0, 0 @ set region 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the region register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c1, 0 @ set region 1, RAM + ldr r7, =CONFIG_DRAM_SIZE @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 - bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the region register value - orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2, 0 @ set region 2, ROM/FLASH + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c2, 0 mov r0, #0x06 mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable @@ -409,14 +399,14 @@ __arm946_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 .long 0xff00fff0 .long 0 .long 0 - b __arm946_setup + initfn __arm946_setup, __arm946_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 8227322bbb8f..7fac8c612134 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -70,7 +70,7 @@ __arm9tdmi_setup: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info, #object @@ -79,7 +79,7 @@ __\name\()_proc_info: .long \cpu_mask .long 0 .long 0 - b __arm9tdmi_setup + initfn __arm9tdmi_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index c494886892ba..4001b73af4ee 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -190,7 +190,7 @@ fa526_cr1_set: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __fa526_proc_info,#object __fa526_proc_info: @@ -206,7 +206,7 @@ __fa526_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __fa526_setup + initfn __fa526_setup, __fa526_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index 03a1b75f2e16..e494d6d6acbe 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -584,7 +584,7 @@ feroceon_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name\()_proc_info,#object @@ -601,7 +601,8 @@ __\name\()_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __feroceon_setup + initfn __feroceon_setup, __\name\()_proc_info + .long __feroceon_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 082b9f2f7e90..c671f345266a 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -331,3 +331,31 @@ ENTRY(\name\()_tlb_fns) .globl \x .equ \x, \y .endm + +.macro initfn, func, base + .long \func - \base +.endm + + /* + * Macro to calculate the log2 size for the protection region + * registers. This calculates rd = log2(size) - 1. tmp must + * not be the same register as rd. + */ +.macro pr_sz, rd, size, tmp + mov \tmp, \size, lsr #12 + mov \rd, #11 +1: movs \tmp, \tmp, lsr #1 + addne \rd, \rd, #1 + bne 1b +.endm + + /* + * Macro to generate a protection region register value + * given a pre-masked address, size, and enable bit. + * Corrupts size. + */ +.macro pr_val, dest, addr, size, enable + pr_sz \dest, \size, \size @ calculate log2(size) - 1 + orr \dest, \addr, \dest, lsl #1 @ mask in the region size + orr \dest, \dest, \enable +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 53d393455f13..d65edf717bf7 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -427,7 +427,7 @@ mohawk_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __88sv331x_proc_info,#object __88sv331x_proc_info: @@ -443,7 +443,7 @@ __88sv331x_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __mohawk_setup + initfn __mohawk_setup, __88sv331x_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 8008a0461cf5..ee2ce496239f 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -199,7 +199,7 @@ sa110_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .type __sa110_proc_info,#object __sa110_proc_info: @@ -213,7 +213,7 @@ __sa110_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __sa110_setup + initfn __sa110_setup, __sa110_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 89f97ac648a9..222d5836f666 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -242,7 +242,7 @@ sa1100_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info,#object @@ -257,7 +257,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __sa1100_setup + initfn __sa1100_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index d0390f4b3f18..06d890a2342b 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -264,7 +264,7 @@ v6_crval: string cpu_elf_name, "v6" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Match any ARMv6 processor core. @@ -287,7 +287,7 @@ __v6_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v6_setup + initfn __v6_setup, __v6_proc_info .long cpu_arch_name .long cpu_elf_name /* See also feat_v6_fixup() for HWCAP_TLS */ diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index ed448d8a596b..10405b8d31af 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -37,15 +37,18 @@ * It is assumed that: * - we are not using split page tables */ -ENTRY(cpu_v7_switch_mm) +ENTRY(cpu_ca8_switch_mm) #ifdef CONFIG_MMU mov r2, #0 - mmid r1, r1 @ get mm->context.id - ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) - ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif +#endif +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) #ifdef CONFIG_PID_IN_CONTEXTIDR mrc p15, 0, r2, c13, c0, 1 @ read current context ID lsr r2, r2, #8 @ extract the PID @@ -61,6 +64,7 @@ ENTRY(cpu_v7_switch_mm) #endif bx lr ENDPROC(cpu_v7_switch_mm) +ENDPROC(cpu_ca8_switch_mm) /* * cpu_v7_set_pte_ext(ptep, pte) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 8b4ee5e81c14..3d1054f11a8a 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -153,6 +153,21 @@ ENDPROC(cpu_v7_do_resume) #endif /* + * Cortex-A8 + */ + globl_equ cpu_ca8_proc_init, cpu_v7_proc_init + globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca8_reset, cpu_v7_reset + globl_equ cpu_ca8_do_idle, cpu_v7_do_idle + globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca8_do_resume, cpu_v7_do_resume +#endif + +/* * Cortex-A9 processor functions */ globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init @@ -451,7 +466,10 @@ __v7_setup_stack: @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#ifndef CONFIG_ARM_LPAE + define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif @@ -462,19 +480,19 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Standard v7 proc info content */ -.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions +.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags - W(b) \initfunc + initfn \initfunc, \name .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ @@ -494,7 +512,7 @@ __v7_setup_stack: __v7_ca5mp_proc_info: .long 0x410fc050 .long 0xff0ffff0 - __v7_proc __v7_ca5mp_setup + __v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info /* @@ -504,9 +522,19 @@ __v7_ca5mp_proc_info: __v7_ca9mp_proc_info: .long 0x410fc090 .long 0xff0ffff0 - __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions + __v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* + * ARM Ltd. Cortex A8 processor. + */ + .type __v7_ca8_proc_info, #object +__v7_ca8_proc_info: + .long 0x410fc080 + .long 0xff0ffff0 + __v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions + .size __v7_ca8_proc_info, . - __v7_ca8_proc_info + #endif /* CONFIG_ARM_LPAE */ /* @@ -517,7 +545,7 @@ __v7_ca9mp_proc_info: __v7_pj4b_proc_info: .long 0x560f5800 .long 0xff0fff00 - __v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions + __v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info #endif @@ -528,7 +556,7 @@ __v7_pj4b_proc_info: __v7_cr7mp_proc_info: .long 0x410fc170 .long 0xff0ffff0 - __v7_proc __v7_cr7mp_setup + __v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info /* @@ -538,7 +566,7 @@ __v7_cr7mp_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup + __v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -548,7 +576,7 @@ __v7_ca7mp_proc_info: __v7_ca12mp_proc_info: .long 0x410fc0d0 .long 0xff0ffff0 - __v7_proc __v7_ca12mp_setup + __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info /* @@ -558,7 +586,7 @@ __v7_ca12mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup + __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* @@ -568,7 +596,7 @@ __v7_ca15mp_proc_info: __v7_b15mp_proc_info: .long 0x420f00f0 .long 0xff0ffff0 - __v7_proc __v7_b15mp_setup + __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info /* @@ -578,7 +606,7 @@ __v7_b15mp_proc_info: __v7_ca17mp_proc_info: .long 0x410fc0e0 .long 0xff0ffff0 - __v7_proc __v7_ca17mp_setup + __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info /* @@ -594,7 +622,7 @@ __krait_proc_info: * do support them. They also don't indicate support for fused multiply * instructions even though they actually do support them. */ - __v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 + __v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 .size __krait_proc_info, . - __krait_proc_info /* @@ -604,5 +632,5 @@ __krait_proc_info: __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - __v7_proc __v7_setup + __v7_proc __v7_proc_info, __v7_setup .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index d1e68b553d3b..e08e1f2bab76 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -135,7 +135,7 @@ __v7m_setup_stack_top: string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M" - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc /* * Match any ARMv7-M processor core. @@ -146,7 +146,7 @@ __v7m_proc_info: .long 0x000f0000 @ Mask for ID .long 0 @ proc_info_list.__cpu_mm_mmu_flags .long 0 @ proc_info_list.__cpu_io_mmu_flags - b __v7m_setup @ proc_info_list.__cpu_flush + initfn __v7m_setup, __v7m_proc_info @ proc_info_list.__cpu_flush .long cpu_arch_name .long cpu_elf_name .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index f8acdfece036..293dcc2c441f 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -499,7 +499,7 @@ xsc3_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name\()_proc_info,#object @@ -514,7 +514,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __xsc3_setup + initfn __xsc3_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index afa2b3c4df4a..b6bbfdb6dfdc 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -612,7 +612,7 @@ xscale_crval: .align - .section ".proc.info.init", #alloc, #execinstr + .section ".proc.info.init", #alloc .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object @@ -627,7 +627,7 @@ __\name\()_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __xscale_setup + initfn __xscale_setup, __\name\()_proc_info .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 5d65be1f1e8a..71df43547659 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -113,7 +113,7 @@ next: @ to fault. Emit the appropriate exception gunk to fix things up. @ ??? For some reason, faults can happen at .Lx2 even with a @ plain LDR instruction. Weird, but it seems harmless. - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 2 .Lfix: ret r9 @ let the user eat segfaults .popsection diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 61b4d705c267..2438b96004c1 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void) } /** - * omap_read_persistent_clock - Return time from a persistent clock. + * omap_read_persistent_clock64 - Return time from a persistent clock. * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into - * nsecs and adds to a monotonically increasing timespec. + * nsecs and adds to a monotonically increasing timespec64. */ -static struct timespec persistent_ts; +static struct timespec64 persistent_ts; static cycles_t cycles; static unsigned int persistent_mult, persistent_shift; -static DEFINE_SPINLOCK(read_persistent_clock_lock); -static void omap_read_persistent_clock(struct timespec *ts) +static void omap_read_persistent_clock64(struct timespec64 *ts) { unsigned long long nsecs; cycles_t last_cycles; - unsigned long flags; - - spin_lock_irqsave(&read_persistent_clock_lock, flags); last_cycles = cycles; cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0; @@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts) nsecs = clocksource_cyc2ns(cycles - last_cycles, persistent_mult, persistent_shift); - timespec_add_ns(&persistent_ts, nsecs); + timespec64_add_ns(&persistent_ts, nsecs); *ts = persistent_ts; - - spin_unlock_irqrestore(&read_persistent_clock_lock, flags); } /** @@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) /* * 120000 rough estimate from the calculations in - * __clocksource_updatefreq_scale. + * __clocksource_update_freq_scale. */ clocks_calc_mult_shift(&persistent_mult, &persistent_shift, 32768, NSEC_PER_SEC, 120000); @@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) } sched_clock_register(omap_32k_read_sched_clock, 32, 32768); - register_persistent_clock(NULL, omap_read_persistent_clock); + register_persistent_clock(NULL, omap_read_persistent_clock64); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); return 0; diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index db10169a08de..8ca94d379bc3 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct of_device_id *match; const struct dmtimer_platform_data *pdata; + int ret; match = of_match_device(of_match_ptr(omap_timer_match), dev); pdata = match ? match->data : dev->platform_data; @@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev) } if (!timer->reserved) { - pm_runtime_get_sync(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "%s: pm_runtime_get_sync failed!\n", + __func__); + goto err_get_sync; + } __omap_dm_timer_init_regs(timer); pm_runtime_put(dev); } @@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev) dev_dbg(dev, "Device Probed.\n"); return 0; + +err_get_sync: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; } /** @@ -899,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev) } spin_unlock_irqrestore(&dm_timer_lock, flags); + pm_runtime_disable(&pdev->dev); + return ret; } diff --git a/arch/arm/plat-pxa/dma.c b/arch/arm/plat-pxa/dma.c index 054fc5a1a11c..d92f07f6ecfb 100644 --- a/arch/arm/plat-pxa/dma.c +++ b/arch/arm/plat-pxa/dma.c @@ -51,19 +51,19 @@ static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan; static int dbg_show_requester_chan(struct seq_file *s, void *p) { - int pos = 0; int chan = (int)s->private; int i; u32 drcmr; - pos += seq_printf(s, "DMA channel %d requesters list :\n", chan); + seq_printf(s, "DMA channel %d requesters list :\n", chan); for (i = 0; i < DMA_MAX_REQUESTERS; i++) { drcmr = DRCMR(i); if ((drcmr & DRCMR_CHLNUM) == chan) - pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, - !!(drcmr & DRCMR_MAPVLD)); + seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", + i, !!(drcmr & DRCMR_MAPVLD)); } - return pos; + + return 0; } static inline int dbg_burst_from_dcmd(u32 dcmd) @@ -83,7 +83,6 @@ static int is_phys_valid(unsigned long addr) static int dbg_show_descriptors(struct seq_file *s, void *p) { - int pos = 0; int chan = (int)s->private; int i, max_show = 20, burst, width; u32 dcmd; @@ -94,44 +93,43 @@ static int dbg_show_descriptors(struct seq_file *s, void *p) spin_lock_irqsave(&dma_channels[chan].lock, flags); phys_desc = DDADR(chan); - pos += seq_printf(s, "DMA channel %d descriptors :\n", chan); - pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0); + seq_printf(s, "DMA channel %d descriptors :\n", chan); + seq_printf(s, "[%03d] First descriptor unknown\n", 0); for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { desc = phys_to_virt(phys_desc); dcmd = desc->dcmd; burst = dbg_burst_from_dcmd(dcmd); width = (1 << ((dcmd >> 14) & 0x3)) >> 1; - pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", - i, phys_desc, desc); - pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); - pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); - pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); - pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d" - " width=%d len=%d)\n", - dcmd, - DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), - DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), - DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), - DCMD_STR(ENDIAN), burst, width, - dcmd & DCMD_LENGTH); + seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), + DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), + DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), + DCMD_STR(ENDIAN), burst, width, + dcmd & DCMD_LENGTH); phys_desc = desc->ddadr; } if (i == max_show) - pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", - i, phys_desc); + seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); else - pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n", - i, phys_desc, phys_desc == DDADR_STOP ? - "DDADR_STOP" : "invalid"); + seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); spin_unlock_irqrestore(&dma_channels[chan].lock, flags); - return pos; + + return 0; } static int dbg_show_chan_state(struct seq_file *s, void *p) { - int pos = 0; int chan = (int)s->private; u32 dcsr, dcmd; int burst, width; @@ -142,42 +140,39 @@ static int dbg_show_chan_state(struct seq_file *s, void *p) burst = dbg_burst_from_dcmd(dcmd); width = (1 << ((dcmd >> 14) & 0x3)) >> 1; - pos += seq_printf(s, "DMA channel %d\n", chan); - pos += seq_printf(s, "\tPriority : %s\n", - str_prio[dma_channels[chan].prio]); - pos += seq_printf(s, "\tUnaligned transfer bit: %s\n", - DALGN & (1 << chan) ? "yes" : "no"); - pos += seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", - dcsr, DCSR_STR(RUN), DCSR_STR(NODESC), - DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN), - DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN), - DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST), - DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND), - DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR), - DCSR_STR(STARTINTR), DCSR_STR(BUSERR)); - - pos += seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d" - " len=%d)\n", - dcmd, - DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), - DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), - DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), - DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH); - pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan)); - pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan)); - pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan)); - return pos; + seq_printf(s, "DMA channel %d\n", chan); + seq_printf(s, "\tPriority : %s\n", str_prio[dma_channels[chan].prio]); + seq_printf(s, "\tUnaligned transfer bit: %s\n", + DALGN & (1 << chan) ? "yes" : "no"); + seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, DCSR_STR(RUN), DCSR_STR(NODESC), + DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN), + DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN), + DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST), + DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND), + DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR), + DCSR_STR(STARTINTR), DCSR_STR(BUSERR)); + + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR), + DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG), + DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN), + DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH); + seq_printf(s, "\tDSADR = %08x\n", DSADR(chan)); + seq_printf(s, "\tDTADR = %08x\n", DTADR(chan)); + seq_printf(s, "\tDDADR = %08x\n", DDADR(chan)); + + return 0; } static int dbg_show_state(struct seq_file *s, void *p) { - int pos = 0; - /* basic device status */ - pos += seq_printf(s, "DMA engine status\n"); - pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels); + seq_puts(s, "DMA engine status\n"); + seq_printf(s, "\tChannel number: %d\n", num_dma_channels); - return pos; + return 0; } #define DBGFS_FUNC_DECL(name) \ diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore new file mode 100644 index 000000000000..f8b69d84238e --- /dev/null +++ b/arch/arm/vdso/.gitignore @@ -0,0 +1 @@ +vdso.lds diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile new file mode 100644 index 000000000000..bab0a8be7924 --- /dev/null +++ b/arch/arm/vdso/Makefile @@ -0,0 +1,74 @@ +hostprogs-y := vdsomunge + +obj-vdso := vgettimeofday.o datapage.o + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING +ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +CFLAGS_REMOVE_vdso.o = -pg + +# Force -O2 to avoid libgcc dependencies +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os +CFLAGS_vgettimeofday.o = -O2 + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency +$(obj)/vdso.o : $(obj)/vdso.so + +# Link rule for the .so file +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE + $(call if_changed,vdsomunge) + +# Strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Actual build commands +quiet_cmd_vdsold = VDSO $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) \ + $(call cc-ldoption, -Wl$(comma)--build-id) \ + -Wl,-Bsymbolic -Wl,-z,max-page-size=4096 \ + -Wl,-z,common-page-size=4096 -o $@ + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ + +# +# Install the unstripped copy of vdso.so.dbg. If our toolchain +# supports build-id, install .build-id links as well. +# +# Cribbed from arch/x86/vdso/Makefile. +# +quiet_cmd_vdso_install = INSTALL $< +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/vdso.so"; \ + if readelf -n $< | grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../vdso.so" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef + +$(MODLIB)/vdso: FORCE + @mkdir -p $(MODLIB)/vdso + +PHONY += vdso_install +vdso_install: $(obj)/vdso.so.dbg $(MODLIB)/vdso FORCE + $(call cmd,vdso_install) diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S new file mode 100644 index 000000000000..a2e60367931b --- /dev/null +++ b/arch/arm/vdso/datapage.S @@ -0,0 +1,15 @@ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .align 2 +.L_vdso_data_ptr: + .long _start - . - VDSO_DATA_SIZE + +ENTRY(__get_datapage) + .fnstart + adr r0, .L_vdso_data_ptr + ldr r1, [r0] + add r0, r0, r1 + bx lr + .fnend +ENDPROC(__get_datapage) diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S new file mode 100644 index 000000000000..b2b97e3e7bab --- /dev/null +++ b/arch/arm/vdso/vdso.S @@ -0,0 +1,35 @@ +/* + * Adapted from arm64 version. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/arm/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S new file mode 100644 index 000000000000..89ca89f12d23 --- /dev/null +++ b/arch/arm/vdso/vdso.lds.S @@ -0,0 +1,87 @@ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE(_start = .); + + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + local: *; + }; +} diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c new file mode 100644 index 000000000000..9005b07296c8 --- /dev/null +++ b/arch/arm/vdso/vdsomunge.c @@ -0,0 +1,201 @@ +/* + * Copyright 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * vdsomunge - Host program which produces a shared object + * architecturally specified to be usable by both soft- and hard-float + * programs. + * + * The Procedure Call Standard for the ARM Architecture (ARM IHI + * 0042E) says: + * + * 6.4.1 VFP and Base Standard Compatibility + * + * Code compiled for the VFP calling standard is compatible with + * the base standard (and vice-versa) if no floating-point or + * containerized vector arguments or results are used. + * + * And ELF for the ARM Architecture (ARM IHI 0044E) (Table 4-2) says: + * + * If both EF_ARM_ABI_FLOAT_XXXX bits are clear, conformance to the + * base procedure-call standard is implied. + * + * The VDSO is built with -msoft-float, as with the rest of the ARM + * kernel, and uses no floating point arguments or results. The build + * process will produce a shared object that may or may not have the + * EF_ARM_ABI_FLOAT_SOFT flag set (it seems to depend on the binutils + * version; binutils starting with 2.24 appears to set it). The + * EF_ARM_ABI_FLOAT_HARD flag should definitely not be set, and this + * program will error out if it is. + * + * If the soft-float flag is set, this program clears it. That's all + * it does. + */ + +#define _GNU_SOURCE + +#include <byteswap.h> +#include <elf.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define HOST_ORDER ELFDATA2LSB +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define HOST_ORDER ELFDATA2MSB +#endif + +/* Some of the ELF constants we'd like to use were added to <elf.h> + * relatively recently. + */ +#ifndef EF_ARM_EABI_VER5 +#define EF_ARM_EABI_VER5 0x05000000 +#endif + +#ifndef EF_ARM_ABI_FLOAT_SOFT +#define EF_ARM_ABI_FLOAT_SOFT 0x200 +#endif + +#ifndef EF_ARM_ABI_FLOAT_HARD +#define EF_ARM_ABI_FLOAT_HARD 0x400 +#endif + +static const char *outfile; + +static void cleanup(void) +{ + if (error_message_count > 0 && outfile != NULL) + unlink(outfile); +} + +static Elf32_Word read_elf_word(Elf32_Word word, bool swap) +{ + return swap ? bswap_32(word) : word; +} + +static Elf32_Half read_elf_half(Elf32_Half half, bool swap) +{ + return swap ? bswap_16(half) : half; +} + +static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap) +{ + *dst = swap ? bswap_32(val) : val; +} + +int main(int argc, char **argv) +{ + const Elf32_Ehdr *inhdr; + bool clear_soft_float; + const char *infile; + Elf32_Word e_flags; + const void *inbuf; + struct stat stat; + void *outbuf; + bool swap; + int outfd; + int infd; + + atexit(cleanup); + + if (argc != 3) + error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + + infile = argv[1]; + outfile = argv[2]; + + infd = open(infile, O_RDONLY); + if (infd < 0) + error(EXIT_FAILURE, errno, "Cannot open %s", infile); + + if (fstat(infd, &stat) != 0) + error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + + inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); + if (inbuf == MAP_FAILED) + error(EXIT_FAILURE, errno, "Failed to map %s", infile); + + close(infd); + + inhdr = inbuf; + + if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) + error(EXIT_FAILURE, 0, "Not an ELF file"); + + if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) + error(EXIT_FAILURE, 0, "Unsupported ELF class"); + + swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; + + if (read_elf_half(inhdr->e_type, swap) != ET_DYN) + error(EXIT_FAILURE, 0, "Not a shared object"); + + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { + error(EXIT_FAILURE, 0, "Unsupported architecture %#x", + inhdr->e_machine); + } + + e_flags = read_elf_word(inhdr->e_flags, swap); + + if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { + error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", + EF_ARM_EABI_VERSION(e_flags)); + } + + if (e_flags & EF_ARM_ABI_FLOAT_HARD) + error(EXIT_FAILURE, 0, + "Unexpected hard-float flag set in e_flags"); + + clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); + + outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + if (outfd < 0) + error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + + if (ftruncate(outfd, stat.st_size) != 0) + error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + + outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, + outfd, 0); + if (outbuf == MAP_FAILED) + error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + + close(outfd); + + memcpy(outbuf, inbuf, stat.st_size); + + if (clear_soft_float) { + Elf32_Ehdr *outhdr; + + outhdr = outbuf; + e_flags &= ~EF_ARM_ABI_FLOAT_SOFT; + write_elf_word(e_flags, &outhdr->e_flags, swap); + } + + if (msync(outbuf, stat.st_size, MS_SYNC) != 0) + error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + + return EXIT_SUCCESS; +} diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c new file mode 100644 index 000000000000..79214d5ff097 --- /dev/null +++ b/arch/arm/vdso/vgettimeofday.c @@ -0,0 +1,282 @@ +/* + * Copyright 2015 Mentor Graphics Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/hrtimer.h> +#include <linux/time.h> +#include <asm/arch_timer.h> +#include <asm/barrier.h> +#include <asm/bug.h> +#include <asm/page.h> +#include <asm/unistd.h> +#include <asm/vdso_datapage.h> + +#ifndef CONFIG_AEABI +#error This code depends on AEABI system call conventions +#endif + +extern struct vdso_data *__get_datapage(void); + +static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) +{ + u32 seq; +repeat: + seq = ACCESS_ONCE(vdata->seq_count); + if (seq & 1) { + cpu_relax(); + goto repeat; + } + return seq; +} + +static notrace u32 vdso_read_begin(const struct vdso_data *vdata) +{ + u32 seq; + + seq = __vdso_read_begin(vdata); + + smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */ + return seq; +} + +static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start) +{ + smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */ + return vdata->seq_count != start; +} + +static notrace long clock_gettime_fallback(clockid_t _clkid, + struct timespec *_ts) +{ + register struct timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static notrace int do_realtime_coarse(struct timespec *ts, + struct vdso_data *vdata) +{ + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + ts->tv_sec = vdata->xtime_coarse_sec; + ts->tv_nsec = vdata->xtime_coarse_nsec; + + } while (vdso_read_retry(vdata, seq)); + + return 0; +} + +static notrace int do_monotonic_coarse(struct timespec *ts, + struct vdso_data *vdata) +{ + struct timespec tomono; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + ts->tv_sec = vdata->xtime_coarse_sec; + ts->tv_nsec = vdata->xtime_coarse_nsec; + + tomono.tv_sec = vdata->wtm_clock_sec; + tomono.tv_nsec = vdata->wtm_clock_nsec; + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_sec += tomono.tv_sec; + timespec_add_ns(ts, tomono.tv_nsec); + + return 0; +} + +#ifdef CONFIG_ARM_ARCH_TIMER + +static notrace u64 get_ns(struct vdso_data *vdata) +{ + u64 cycle_delta; + u64 cycle_now; + u64 nsec; + + cycle_now = arch_counter_get_cntvct(); + + cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; + + nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec; + nsec >>= vdata->cs_shift; + + return nsec; +} + +static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) +{ + u64 nsecs; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + if (!vdata->tk_is_cntvct) + return -1; + + ts->tv_sec = vdata->xtime_clock_sec; + nsecs = get_ns(vdata); + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_nsec = 0; + timespec_add_ns(ts, nsecs); + + return 0; +} + +static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) +{ + struct timespec tomono; + u64 nsecs; + u32 seq; + + do { + seq = vdso_read_begin(vdata); + + if (!vdata->tk_is_cntvct) + return -1; + + ts->tv_sec = vdata->xtime_clock_sec; + nsecs = get_ns(vdata); + + tomono.tv_sec = vdata->wtm_clock_sec; + tomono.tv_nsec = vdata->wtm_clock_nsec; + + } while (vdso_read_retry(vdata, seq)); + + ts->tv_sec += tomono.tv_sec; + ts->tv_nsec = 0; + timespec_add_ns(ts, nsecs + tomono.tv_nsec); + + return 0; +} + +#else /* CONFIG_ARM_ARCH_TIMER */ + +static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) +{ + return -1; +} + +static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) +{ + return -1; +} + +#endif /* CONFIG_ARM_ARCH_TIMER */ + +notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) +{ + struct vdso_data *vdata; + int ret = -1; + + vdata = __get_datapage(); + + switch (clkid) { + case CLOCK_REALTIME_COARSE: + ret = do_realtime_coarse(ts, vdata); + break; + case CLOCK_MONOTONIC_COARSE: + ret = do_monotonic_coarse(ts, vdata); + break; + case CLOCK_REALTIME: + ret = do_realtime(ts, vdata); + break; + case CLOCK_MONOTONIC: + ret = do_monotonic(ts, vdata); + break; + default: + break; + } + + if (ret) + ret = clock_gettime_fallback(clkid, ts); + + return ret; +} + +static notrace long gettimeofday_fallback(struct timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct timespec ts; + struct vdso_data *vdata; + int ret; + + vdata = __get_datapage(); + + ret = do_realtime(&ts, vdata); + if (ret) + return gettimeofday_fallback(tv, tz); + + if (tv) { + tv->tv_sec = ts.tv_sec; + tv->tv_usec = ts.tv_nsec / 1000; + } + if (tz) { + tz->tz_minuteswest = vdata->tz_minuteswest; + tz->tz_dsttime = vdata->tz_dsttime; + } + + return ret; +} + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} |