summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig31
-rw-r--r--arch/alpha/defconfig2
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/asm-prototypes.h18
-rw-r--r--arch/alpha/include/asm/core_marvel.h2
-rw-r--r--arch/alpha/include/asm/fb.h13
-rw-r--r--arch/alpha/include/asm/futex.h26
-rw-r--r--arch/alpha/include/asm/io.h1
-rw-r--r--arch/alpha/include/asm/spinlock.h5
-rw-r--r--arch/alpha/include/asm/string.h15
-rw-r--r--arch/alpha/include/asm/types.h2
-rw-r--r--arch/alpha/include/asm/unistd.h2
-rw-r--r--arch/alpha/include/asm/vga.h2
-rw-r--r--arch/alpha/include/uapi/asm/mman.h14
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/alpha/include/uapi/asm/types.h12
-rw-r--r--arch/alpha/include/uapi/asm/unistd.h19
-rw-r--r--arch/alpha/kernel/core_marvel.c8
-rw-r--r--arch/alpha/kernel/core_titan.c2
-rw-r--r--arch/alpha/kernel/module.c3
-rw-r--r--arch/alpha/kernel/pci-noop.c6
-rw-r--r--arch/alpha/kernel/pci-sysfs.c7
-rw-r--r--arch/alpha/kernel/pci.c33
-rw-r--r--arch/alpha/kernel/setup.c5
-rw-r--r--arch/alpha/kernel/smc37c669.c7
-rw-r--r--arch/alpha/kernel/smp.c2
-rw-r--r--arch/alpha/kernel/sys_marvel.c12
-rw-r--r--arch/alpha/kernel/sys_nautilus.c31
-rw-r--r--arch/alpha/kernel/systbls.S9
-rw-r--r--arch/alpha/kernel/traps.c2
-rw-r--r--arch/alpha/lib/Makefile22
-rw-r--r--arch/alpha/lib/copy_user.S2
-rw-r--r--arch/alpha/lib/ev6-copy_user.S7
-rw-r--r--arch/alpha/lib/memset.S10
-rw-r--r--arch/alpha/math-emu/math.c1
-rw-r--r--arch/arc/Kconfig12
-rw-r--r--arch/arc/Makefile3
-rw-r--r--arch/arc/boot/dts/axc001.dtsi20
-rw-r--r--arch/arc/boot/dts/axc003.dtsi32
-rw-r--r--arch/arc/boot/dts/axc003_idu.dtsi32
-rw-r--r--arch/arc/boot/dts/axs10x_mb.dtsi3
-rw-r--r--arch/arc/boot/dts/hsdk.dts189
-rw-r--r--arch/arc/boot/dts/nsim_hs.dts2
-rw-r--r--arch/arc/boot/dts/vdk_axs10x_mb.dtsi1
-rw-r--r--arch/arc/configs/haps_hs_defconfig1
-rw-r--r--arch/arc/configs/haps_hs_smp_defconfig1
-rw-r--r--arch/arc/configs/hsdk_defconfig80
-rw-r--r--arch/arc/configs/nps_defconfig1
-rw-r--r--arch/arc/configs/nsim_700_defconfig1
-rw-r--r--arch/arc/configs/nsim_hs_defconfig1
-rw-r--r--arch/arc/configs/nsim_hs_smp_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig1
-rw-r--r--arch/arc/configs/tb10x_defconfig1
-rw-r--r--arch/arc/include/asm/atomic.h2
-rw-r--r--arch/arc/include/asm/cache.h7
-rw-r--r--arch/arc/include/asm/entry-compact.h24
-rw-r--r--arch/arc/include/asm/futex.h40
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h3
-rw-r--r--arch/arc/include/asm/irqflags-compact.h2
-rw-r--r--arch/arc/include/asm/mmu.h2
-rw-r--r--arch/arc/include/asm/page.h2
-rw-r--r--arch/arc/include/asm/processor.h10
-rw-r--r--arch/arc/include/asm/ptrace.h5
-rw-r--r--arch/arc/include/asm/spinlock.h17
-rw-r--r--arch/arc/include/asm/switch_to.h9
-rw-r--r--arch/arc/kernel/Makefile1
-rw-r--r--arch/arc/kernel/devtree.c5
-rw-r--r--arch/arc/kernel/entry-compact.S22
-rw-r--r--arch/arc/kernel/entry.S6
-rw-r--r--arch/arc/kernel/intc-arcv2.c10
-rw-r--r--arch/arc/kernel/intc-compact.c14
-rw-r--r--arch/arc/kernel/pcibios.c22
-rw-r--r--arch/arc/kernel/process.c33
-rw-r--r--arch/arc/kernel/setup.c6
-rw-r--r--arch/arc/kernel/traps.c4
-rw-r--r--arch/arc/kernel/troubleshoot.c5
-rw-r--r--arch/arc/mm/cache.c133
-rw-r--r--arch/arc/mm/dma.c45
-rw-r--r--arch/arc/mm/fault.c2
-rw-r--r--arch/arc/mm/init.c6
-rw-r--r--arch/arc/mm/tlb.c17
-rw-r--r--arch/arc/mm/tlbex.S9
-rw-r--r--arch/arc/plat-axs10x/axs10x.c152
-rw-r--r--arch/arc/plat-eznps/Kconfig26
-rw-r--r--arch/arc/plat-eznps/Makefile2
-rw-r--r--arch/arc/plat-eznps/ctop.c32
-rw-r--r--arch/arc/plat-eznps/entry.S2
-rw-r--r--arch/arc/plat-eznps/include/plat/ctop.h2
-rw-r--r--arch/arc/plat-eznps/mtm.c50
-rw-r--r--arch/arc/plat-hsdk/Kconfig9
-rw-r--r--arch/arc/plat-hsdk/Makefile9
-rw-r--r--arch/arc/plat-hsdk/platform.c66
-rw-r--r--arch/arc/plat-sim/Kconfig13
-rw-r--r--arch/arc/plat-sim/platform.c5
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/compressed/efi-header.S160
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.S30
-rw-r--r--arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi1
-rw-r--r--arch/arm/boot/dts/imx25.dtsi1
-rw-r--r--arch/arm/boot/dts/imx6q-evi.dts16
-rw-r--r--arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi4
-rw-r--r--arch/arm/boot/dts/imx7d-sdb.dts16
-rw-r--r--arch/arm/boot/dts/imx7ulp-pinfunc.h468
-rw-r--r--arch/arm/boot/dts/ls1021a.dtsi8
-rw-r--r--arch/arm/boot/dts/omap2420-n8x0-common.dtsi4
-rw-r--r--arch/arm/boot/dts/omap3-n950-n9.dtsi14
-rw-r--r--arch/arm/boot/dts/rk3228-evb.dts34
-rw-r--r--arch/arm/boot/dts/sama5d2.dtsi12
-rw-r--r--arch/arm/boot/dts/ste-hrefprev60.dtsi2
-rw-r--r--arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts9
-rw-r--r--arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts19
-rw-r--r--arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts7
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-2.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-one.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts5
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts8
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts22
-rw-r--r--arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts16
-rw-r--r--arch/arm/boot/dts/sunxi-h3-h5.dtsi26
-rw-r--r--arch/arm/boot/dts/tango4-smp8758.dtsi1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm/configs/sunxi_defconfig2
-rw-r--r--arch/arm/crypto/Kconfig5
-rw-r--r--arch/arm/crypto/aes-ce-glue.c4
-rw-r--r--arch/arm/crypto/aes-cipher-core.S88
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c5
-rw-r--r--arch/arm/crypto/ghash-ce-core.S234
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c24
-rw-r--r--arch/arm/include/asm/arch_gicv3.h34
-rw-r--r--arch/arm/include/asm/futex.h26
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h24
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/spinlock.h16
-rw-r--r--arch/arm/include/asm/string.h14
-rw-r--r--arch/arm/include/asm/thread_info.h15
-rw-r--r--arch/arm/include/asm/tlb.h11
-rw-r--r--arch/arm/include/asm/traps.h7
-rw-r--r--arch/arm/include/asm/uaccess.h2
-rw-r--r--arch/arm/kernel/armksyms.c2
-rw-r--r--arch/arm/kernel/entry-common.S9
-rw-r--r--arch/arm/kernel/signal.c5
-rw-r--r--arch/arm/kvm/handle_exit.c2
-rw-r--r--arch/arm/lib/memset.S24
-rw-r--r--arch/arm/mach-at91/Kconfig2
-rw-r--r--arch/arm/mach-at91/pm.c12
-rw-r--r--arch/arm/mach-hisi/Kconfig1
-rw-r--r--arch/arm/mach-omap1/board-h2-mmc.c2
-rw-r--r--arch/arm/mach-omap1/board-h2.c2
-rw-r--r--arch/arm/mach-omap1/board-h3-mmc.c2
-rw-r--r--arch/arm/mach-omap1/board-h3.c2
-rw-r--r--arch/arm/mach-omap1/board-nokia770.c4
-rw-r--r--arch/arm/mach-omap1/board-osk.c2
-rw-r--r--arch/arm/mach-omap2/Makefile2
-rw-r--r--arch/arm/mach-omap2/board-generic.c1
-rw-r--r--arch/arm/mach-omap2/common.h2
-rw-r--r--arch/arm/mach-omap2/display.c119
-rw-r--r--arch/arm/mach-omap2/display.h1
-rw-r--r--arch/arm/mach-omap2/drm.c53
-rw-r--r--arch/arm/mach-omap2/io.c1
-rw-r--r--arch/arm/mach-omap2/omap_twl.c2
-rw-r--r--arch/arm/mach-pxa/raumfeld.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-osiris-dvs.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-osiris.c2
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra114.c4
-rw-r--r--arch/arm/net/bpf_jit_32.c2448
-rw-r--r--arch/arm/net/bpf_jit_32.h108
-rw-r--r--arch/arm64/Kconfig13
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts15
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts15
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts16
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts15
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi20
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts17
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts17
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts17
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi3
-rw-r--r--arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi12
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi31
-rw-r--r--arch/arm64/boot/dts/marvell/armada-ap806.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/salvator-common.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-evb.dts17
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi39
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399.dtsi8
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/Kconfig22
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S30
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c174
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c55
-rw-r--r--arch/arm64/crypto/aes-ce.S12
-rw-r--r--arch/arm64/crypto/aes-cipher-core.S152
-rw-r--r--arch/arm64/crypto/aes-ctr-fallback.h53
-rw-r--r--arch/arm64/crypto/aes-glue.c63
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c53
-rw-r--r--arch/arm64/crypto/chacha20-neon-glue.c5
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c11
-rw-r--r--arch/arm64/crypto/crct10dif-ce-glue.c13
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S401
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c517
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c18
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c30
-rw-r--r--arch/arm64/crypto/sha256-glue.c1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h7
-rw-r--r--arch/arm64/include/asm/arch_timer.h4
-rw-r--r--arch/arm64/include/asm/asm-bug.h54
-rw-r--r--arch/arm64/include/asm/assembler.h25
-rw-r--r--arch/arm64/include/asm/bug.h35
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/efi.h16
-rw-r--r--arch/arm64/include/asm/elf.h6
-rw-r--r--arch/arm64/include/asm/esr.h42
-rw-r--r--arch/arm64/include/asm/fpsimd.h16
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h56
-rw-r--r--arch/arm64/include/asm/futex.h26
-rw-r--r--arch/arm64/include/asm/hugetlb.h9
-rw-r--r--arch/arm64/include/asm/irq.h42
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h24
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h21
-rw-r--r--arch/arm64/include/asm/memory.h53
-rw-r--r--arch/arm64/include/asm/mmu.h2
-rw-r--r--arch/arm64/include/asm/neon.h16
-rw-r--r--arch/arm64/include/asm/numa.h3
-rw-r--r--arch/arm64/include/asm/page-def.h34
-rw-r--r--arch/arm64/include/asm/page.h12
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h18
-rw-r--r--arch/arm64/include/asm/pgtable.h103
-rw-r--r--arch/arm64/include/asm/processor.h4
-rw-r--r--arch/arm64/include/asm/ptrace.h31
-rw-r--r--arch/arm64/include/asm/signal32.h2
-rw-r--r--arch/arm64/include/asm/simd.h56
-rw-r--r--arch/arm64/include/asm/smp.h2
-rw-r--r--arch/arm64/include/asm/spinlock.h69
-rw-r--r--arch/arm64/include/asm/stacktrace.h61
-rw-r--r--arch/arm64/include/asm/string.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h17
-rw-r--r--arch/arm64/include/asm/traps.h12
-rw-r--r--arch/arm64/include/asm/uaccess.h15
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/kernel/acpi.c4
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpufeature.c13
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S24
-rw-r--r--arch/arm64/kernel/entry.S283
-rw-r--r--arch/arm64/kernel/fpsimd.c170
-rw-r--r--arch/arm64/kernel/head.S23
-rw-r--r--arch/arm64/kernel/hibernate.c4
-rw-r--r--arch/arm64/kernel/irq.c40
-rw-r--r--arch/arm64/kernel/kaslr.c20
-rw-r--r--arch/arm64/kernel/machine_kexec.c2
-rw-r--r--arch/arm64/kernel/pci.c17
-rw-r--r--arch/arm64/kernel/perf_callchain.c1
-rw-r--r--arch/arm64/kernel/perf_event.c208
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/process.c15
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kernel/return_address.c1
-rw-r--r--arch/arm64/kernel/signal.c15
-rw-r--r--arch/arm64/kernel/signal32.c2
-rw-r--r--arch/arm64/kernel/smp.c16
-rw-r--r--arch/arm64/kernel/stacktrace.c60
-rw-r--r--arch/arm64/kernel/time.c1
-rw-r--r--arch/arm64/kernel/traps.c79
-rw-r--r--arch/arm64/kernel/vdso.c15
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S18
-rw-r--r--arch/arm64/kvm/handle_exit.c2
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c2
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c23
-rw-r--r--arch/arm64/lib/Makefile2
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c47
-rw-r--r--arch/arm64/mm/cache.S37
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/arm64/mm/fault.c82
-rw-r--r--arch/arm64/mm/flush.c16
-rw-r--r--arch/arm64/mm/hugetlbpage.c314
-rw-r--r--arch/arm64/net/bpf_jit.h4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c20
-rw-r--r--arch/blackfin/include/asm/spinlock.h5
-rw-r--r--arch/blackfin/kernel/module.c39
-rw-r--r--arch/c6x/configs/dsk6455_defconfig2
-rw-r--r--arch/c6x/configs/evmc6457_defconfig2
-rw-r--r--arch/c6x/configs/evmc6472_defconfig2
-rw-r--r--arch/c6x/configs/evmc6474_defconfig2
-rw-r--r--arch/c6x/configs/evmc6678_defconfig2
-rw-r--r--arch/c6x/platforms/megamod-pic.c22
-rw-r--r--arch/c6x/platforms/plldata.c4
-rw-r--r--arch/c6x/platforms/timer64.c8
-rw-r--r--arch/cris/arch-v32/drivers/pci/bios.c4
-rw-r--r--arch/cris/arch-v32/mach-a3/arbiter.c4
-rw-r--r--arch/cris/arch-v32/mach-fs/arbiter.c4
-rw-r--r--arch/cris/kernel/traps.c6
-rw-r--r--arch/frv/Kconfig3
-rw-r--r--arch/frv/include/asm/futex.h3
-rw-r--r--arch/frv/include/uapi/asm/socket.h2
-rw-r--r--arch/frv/kernel/futex.c27
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/h8300/include/asm/traps.h6
-rw-r--r--arch/hexagon/include/asm/atomic.h2
-rw-r--r--arch/hexagon/include/asm/futex.h38
-rw-r--r--arch/hexagon/include/asm/spinlock.h5
-rw-r--r--arch/ia64/include/asm/acpi.h2
-rw-r--r--arch/ia64/include/asm/futex.h25
-rw-r--r--arch/ia64/include/asm/spinlock.h21
-rw-r--r--arch/ia64/include/asm/tlb.h8
-rw-r--r--arch/ia64/include/uapi/asm/socket.h2
-rw-r--r--arch/ia64/kernel/acpi.c6
-rw-r--r--arch/ia64/kernel/efi.c4
-rw-r--r--arch/ia64/pci/pci.c7
-rw-r--r--arch/m32r/configs/m32104ut_defconfig4
-rw-r--r--arch/m32r/configs/m32700ut.smp_defconfig3
-rw-r--r--arch/m32r/configs/m32700ut.up_defconfig3
-rw-r--r--arch/m32r/configs/mappi.nommu_defconfig2
-rw-r--r--arch/m32r/configs/mappi.smp_defconfig4
-rw-r--r--arch/m32r/configs/mappi.up_defconfig4
-rw-r--r--arch/m32r/configs/mappi2.opsp_defconfig2
-rw-r--r--arch/m32r/configs/mappi2.vdec2_defconfig2
-rw-r--r--arch/m32r/configs/mappi3.smp_defconfig4
-rw-r--r--arch/m32r/configs/oaks32r_defconfig2
-rw-r--r--arch/m32r/configs/opsput_defconfig2
-rw-r--r--arch/m32r/configs/usrv_defconfig5
-rw-r--r--arch/m32r/include/asm/flat.h3
-rw-r--r--arch/m32r/include/asm/spinlock.h5
-rw-r--r--arch/m32r/include/uapi/asm/socket.h2
-rw-r--r--arch/m68k/Kconfig3
-rw-r--r--arch/m68k/coldfire/pci.c36
-rw-r--r--arch/m68k/configs/amiga_defconfig7
-rw-r--r--arch/m68k/configs/apollo_defconfig7
-rw-r--r--arch/m68k/configs/atari_defconfig8
-rw-r--r--arch/m68k/configs/bvme6000_defconfig7
-rw-r--r--arch/m68k/configs/hp300_defconfig7
-rw-r--r--arch/m68k/configs/mac_defconfig7
-rw-r--r--arch/m68k/configs/multi_defconfig7
-rw-r--r--arch/m68k/configs/mvme147_defconfig7
-rw-r--r--arch/m68k/configs/mvme16x_defconfig7
-rw-r--r--arch/m68k/configs/q40_defconfig7
-rw-r--r--arch/m68k/configs/sun3_defconfig7
-rw-r--r--arch/m68k/configs/sun3x_defconfig7
-rw-r--r--arch/m68k/include/asm/asm-prototypes.h5
-rw-r--r--arch/m68k/mac/misc.c16
-rw-r--r--arch/metag/Kconfig1
-rw-r--r--arch/metag/include/asm/atomic_lock1.h2
-rw-r--r--arch/metag/include/asm/spinlock.h5
-rw-r--r--arch/metag/include/asm/topology.h1
-rw-r--r--arch/microblaze/Kconfig16
-rw-r--r--arch/microblaze/Makefile2
-rw-r--r--arch/microblaze/include/asm/flat.h2
-rw-r--r--arch/microblaze/include/asm/futex.h38
-rw-r--r--arch/microblaze/include/asm/pci.h3
-rw-r--r--arch/microblaze/kernel/timer.c2
-rw-r--r--arch/microblaze/pci/pci-common.c159
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/Makefile15
-rw-r--r--arch/mips/boot/compressed/.gitignore2
-rw-r--r--arch/mips/cavium-octeon/octeon-usb.c2
-rw-r--r--arch/mips/configs/pistachio_defconfig2
-rw-r--r--arch/mips/dec/int-handler.S34
-rw-r--r--arch/mips/include/asm/cache.h2
-rw-r--r--arch/mips/include/asm/cpu-features.h3
-rw-r--r--arch/mips/include/asm/futex.h25
-rw-r--r--arch/mips/include/asm/kvm_host.h5
-rw-r--r--arch/mips/include/asm/octeon/cvmx-l2c-defs.h37
-rw-r--r--arch/mips/include/asm/octeon/cvmx-l2d-defs.h60
-rw-r--r--arch/mips/include/asm/octeon/cvmx.h1
-rw-r--r--arch/mips/include/asm/vga.h7
-rw-r--r--arch/mips/include/uapi/asm/mman.h14
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/kernel/ptrace.c10
-rw-r--r--arch/mips/kernel/scall32-o32.S11
-rw-r--r--arch/mips/kernel/scall64-o32.S6
-rw-r--r--arch/mips/kernel/smp.c12
-rw-r--r--arch/mips/kvm/mips.c5
-rw-r--r--arch/mips/mm/uasm-mips.c2
-rw-r--r--arch/mips/net/ebpf_jit.c1995
-rw-r--r--arch/mips/pci/pci-legacy.c30
-rw-r--r--arch/mips/pci/pci.c7
-rw-r--r--arch/mips/vdso/gettimeofday.c6
-rw-r--r--arch/mn10300/configs/asb2303_defconfig6
-rw-r--r--arch/mn10300/configs/asb2364_defconfig8
-rw-r--r--arch/mn10300/include/asm/spinlock.h5
-rw-r--r--arch/mn10300/include/uapi/asm/socket.h2
-rw-r--r--arch/openrisc/Kconfig3
-rw-r--r--arch/openrisc/include/asm/futex.h39
-rw-r--r--arch/parisc/Kconfig12
-rw-r--r--arch/parisc/Makefile16
-rw-r--r--arch/parisc/boot/.gitignore2
-rw-r--r--arch/parisc/boot/Makefile26
-rw-r--r--arch/parisc/boot/compressed/.gitignore3
-rw-r--r--arch/parisc/boot/compressed/Makefile86
-rw-r--r--arch/parisc/boot/compressed/head.S85
-rw-r--r--arch/parisc/boot/compressed/misc.c301
-rw-r--r--arch/parisc/boot/compressed/vmlinux.lds.S101
-rw-r--r--arch/parisc/boot/compressed/vmlinux.scr10
-rw-r--r--arch/parisc/boot/install.sh65
-rw-r--r--arch/parisc/configs/c3000_defconfig1
-rw-r--r--arch/parisc/include/asm/atomic.h2
-rw-r--r--arch/parisc/include/asm/futex.h26
-rw-r--r--arch/parisc/include/asm/mmu_context.h3
-rw-r--r--arch/parisc/include/asm/page.h4
-rw-r--r--arch/parisc/include/asm/pdc.h2
-rw-r--r--arch/parisc/include/asm/pdcpat.h14
-rw-r--r--arch/parisc/include/asm/spinlock.h7
-rw-r--r--arch/parisc/include/uapi/asm/mman.h20
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/kernel/firmware.c62
-rw-r--r--arch/parisc/kernel/pci-dma.c3
-rw-r--r--arch/parisc/kernel/pdt.c280
-rw-r--r--arch/parisc/kernel/perf.c4
-rw-r--r--arch/parisc/kernel/processor.c30
-rw-r--r--arch/parisc/kernel/real2.S4
-rw-r--r--arch/parisc/kernel/unwind.c4
-rw-r--r--arch/parisc/lib/memcpy.c2
-rw-r--r--arch/powerpc/Kconfig40
-rw-r--r--arch/powerpc/Makefile6
-rw-r--r--arch/powerpc/boot/4xx.c2
-rw-r--r--arch/powerpc/boot/Makefile11
-rw-r--r--arch/powerpc/boot/crt0.S20
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts33
-rw-r--r--arch/powerpc/boot/ppc_asm.h8
-rw-r--r--arch/powerpc/boot/serial.c4
-rw-r--r--arch/powerpc/boot/util.S24
-rw-r--r--arch/powerpc/configs/40x/acadia_defconfig1
-rw-r--r--arch/powerpc/configs/40x/ep405_defconfig1
-rw-r--r--arch/powerpc/configs/40x/kilauea_defconfig1
-rw-r--r--arch/powerpc/configs/40x/klondike_defconfig1
-rw-r--r--arch/powerpc/configs/40x/makalu_defconfig1
-rw-r--r--arch/powerpc/configs/40x/obs600_defconfig1
-rw-r--r--arch/powerpc/configs/40x/virtex_defconfig3
-rw-r--r--arch/powerpc/configs/40x/walnut_defconfig1
-rw-r--r--arch/powerpc/configs/44x/akebono_defconfig2
-rw-r--r--arch/powerpc/configs/44x/bamboo_defconfig1
-rw-r--r--arch/powerpc/configs/44x/currituck_defconfig2
-rw-r--r--arch/powerpc/configs/44x/ebony_defconfig1
-rw-r--r--arch/powerpc/configs/44x/eiger_defconfig4
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig3
-rw-r--r--arch/powerpc/configs/44x/icon_defconfig3
-rw-r--r--arch/powerpc/configs/44x/iss476-smp_defconfig2
-rw-r--r--arch/powerpc/configs/44x/katmai_defconfig1
-rw-r--r--arch/powerpc/configs/44x/rainier_defconfig1
-rw-r--r--arch/powerpc/configs/44x/redwood_defconfig4
-rw-r--r--arch/powerpc/configs/44x/sequoia_defconfig1
-rw-r--r--arch/powerpc/configs/44x/taishan_defconfig1
-rw-r--r--arch/powerpc/configs/44x/virtex5_defconfig3
-rw-r--r--arch/powerpc/configs/44x/warp_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/cm5200_defconfig1
-rw-r--r--arch/powerpc/configs/52xx/lite5200b_defconfig3
-rw-r--r--arch/powerpc/configs/52xx/motionpro_defconfig15
-rw-r--r--arch/powerpc/configs/52xx/tqm5200_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/asp8347_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/kmeter1_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc8313_rdb_defconfig3
-rw-r--r--arch/powerpc/configs/83xx/mpc8315_rdb_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_mds_defconfig4
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_rdb_defconfig6
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itx_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig1
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_mds_defconfig2
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_mds_defconfig4
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_rdk_defconfig5
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_mds_defconfig4
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig5
-rw-r--r--arch/powerpc/configs/83xx/sbc834x_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig3
-rw-r--r--arch/powerpc/configs/85xx/ksi8560_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/mpc8540_ads_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/mpc8560_ads_defconfig6
-rw-r--r--arch/powerpc/configs/85xx/mpc85xx_cds_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/sbc8548_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/socrates_defconfig3
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig3
-rw-r--r--arch/powerpc/configs/85xx/tqm8540_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/tqm8541_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/tqm8548_defconfig2
-rw-r--r--arch/powerpc/configs/85xx/tqm8555_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/tqm8560_defconfig4
-rw-r--r--arch/powerpc/configs/85xx/xes_mpc85xx_defconfig5
-rw-r--r--arch/powerpc/configs/adder875_defconfig2
-rw-r--r--arch/powerpc/configs/amigaone_defconfig2
-rw-r--r--arch/powerpc/configs/be.config1
-rw-r--r--arch/powerpc/configs/c2k_defconfig5
-rw-r--r--arch/powerpc/configs/cell_defconfig9
-rw-r--r--arch/powerpc/configs/chrp32_defconfig3
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig1
-rw-r--r--arch/powerpc/configs/ep88xc_defconfig2
-rw-r--r--arch/powerpc/configs/g5_defconfig11
-rw-r--r--arch/powerpc/configs/gamecube_defconfig2
-rw-r--r--arch/powerpc/configs/holly_defconfig3
-rw-r--r--arch/powerpc/configs/linkstation_defconfig3
-rw-r--r--arch/powerpc/configs/maple_defconfig9
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig2
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig28
-rw-r--r--arch/powerpc/configs/mpc5200_defconfig4
-rw-r--r--arch/powerpc/configs/mpc7448_hpc2_defconfig4
-rw-r--r--arch/powerpc/configs/mpc8272_ads_defconfig1
-rw-r--r--arch/powerpc/configs/mpc83xx_defconfig10
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig4
-rw-r--r--arch/powerpc/configs/mpc86xx_basic_defconfig12
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig2
-rw-r--r--arch/powerpc/configs/mvme5100_defconfig3
-rw-r--r--arch/powerpc/configs/pasemi_defconfig5
-rw-r--r--arch/powerpc/configs/pmac32_defconfig11
-rw-r--r--arch/powerpc/configs/powernv_defconfig21
-rw-r--r--arch/powerpc/configs/ppc40x_defconfig3
-rw-r--r--arch/powerpc/configs/ppc44x_defconfig4
-rw-r--r--arch/powerpc/configs/ppc64_defconfig41
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig10
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig46
-rw-r--r--arch/powerpc/configs/pq2fads_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig3
-rw-r--r--arch/powerpc/configs/pseries_defconfig45
-rw-r--r--arch/powerpc/configs/tqm8xx_defconfig3
-rw-r--r--arch/powerpc/configs/wii_defconfig5
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/asm-compat.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h7
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h5
-rw-r--r--arch/powerpc/include/asm/bug.h1
-rw-r--r--arch/powerpc/include/asm/cache.h2
-rw-r--r--arch/powerpc/include/asm/cpuidle.h27
-rw-r--r--arch/powerpc/include/asm/cputable.h4
-rw-r--r--arch/powerpc/include/asm/eeh.h5
-rw-r--r--arch/powerpc/include/asm/fadump.h2
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h6
-rw-r--r--arch/powerpc/include/asm/fixmap.h10
-rw-r--r--arch/powerpc/include/asm/fs_pd.h2
-rw-r--r--arch/powerpc/include/asm/futex.h26
-rw-r--r--arch/powerpc/include/asm/hardirq.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h14
-rw-r--r--arch/powerpc/include/asm/hvcall.h13
-rw-r--r--arch/powerpc/include/asm/icswx.h3
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h128
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mmu_context.h58
-rw-r--r--arch/powerpc/include/asm/nmi.h3
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h24
-rw-r--r--arch/powerpc/include/asm/opal.h19
-rw-r--r--arch/powerpc/include/asm/paca.h7
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h1
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h1
-rw-r--r--arch/powerpc/include/asm/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h16
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h16
-rw-r--r--arch/powerpc/include/asm/prom.h5
-rw-r--r--arch/powerpc/include/asm/pte-walk.h35
-rw-r--r--arch/powerpc/include/asm/reg.h99
-rw-r--r--arch/powerpc/include/asm/reg_booke.h3
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/include/asm/smp.h6
-rw-r--r--arch/powerpc/include/asm/spinlock.h36
-rw-r--r--arch/powerpc/include/asm/sstep.h95
-rw-r--r--arch/powerpc/include/asm/string.h26
-rw-r--r--arch/powerpc/include/asm/timex.h6
-rw-r--r--arch/powerpc/include/asm/tlb.h11
-rw-r--r--arch/powerpc/include/asm/topology.h2
-rw-r--r--arch/powerpc/include/asm/vas.h159
-rw-r--r--arch/powerpc/include/asm/vga.h8
-rw-r--r--arch/powerpc/include/asm/xive.h5
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h16
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/align.c774
-rw-r--r--arch/powerpc/kernel/asm-offsets.c8
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/cacheinfo.c34
-rw-r--r--arch/powerpc/kernel/cputable.c8
-rw-r--r--arch/powerpc/kernel/eeh.c20
-rw-r--r--arch/powerpc/kernel/eeh_dev.c7
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/eeh_pe.c90
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_32.S22
-rw-r--r--arch/powerpc/kernel/entry_64.S69
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S37
-rw-r--r--arch/powerpc/kernel/fadump.c31
-rw-r--r--arch/powerpc/kernel/head_32.S6
-rw-r--r--arch/powerpc/kernel/head_64.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S109
-rw-r--r--arch/powerpc/kernel/idle_book3s.S130
-rw-r--r--arch/powerpc/kernel/io-workarounds.c9
-rw-r--r--arch/powerpc/kernel/iommu.c7
-rw-r--r--arch/powerpc/kernel/irq.c65
-rw-r--r--arch/powerpc/kernel/isa-bridge.c32
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c12
-rw-r--r--arch/powerpc/kernel/mce.c33
-rw-r--r--arch/powerpc/kernel/of_platform.c2
-rw-r--r--arch/powerpc/kernel/optprobes_head.S8
-rw-r--r--arch/powerpc/kernel/paca.c15
-rw-r--r--arch/powerpc/kernel/pci-common.c15
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c4
-rw-r--r--arch/powerpc/kernel/pci_dn.c22
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c24
-rw-r--r--arch/powerpc/kernel/process.c104
-rw-r--r--arch/powerpc/kernel/prom_init.c34
-rw-r--r--arch/powerpc/kernel/ptrace.c42
-rw-r--r--arch/powerpc/kernel/reloc_64.S6
-rw-r--r--arch/powerpc/kernel/rtas_pci.c33
-rw-r--r--arch/powerpc/kernel/setup-common.c38
-rw-r--r--arch/powerpc/kernel/setup_32.c7
-rw-r--r--arch/powerpc/kernel/setup_64.c39
-rw-r--r--arch/powerpc/kernel/smp.c234
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/systbl.S14
-rw-r--r--arch/powerpc/kernel/traps.c305
-rw-r--r--arch/powerpc/kernel/uprobes.c9
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S12
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kernel/watchdog.c52
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c6
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c32
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c57
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv.c29
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S16
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c68
-rw-r--r--arch/powerpc/kvm/e500.c8
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/e500mc.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/powerpc/lib/copy_32.S44
-rw-r--r--arch/powerpc/lib/copypage_power7.S14
-rw-r--r--arch/powerpc/lib/copyuser_power7.S66
-rw-r--r--arch/powerpc/lib/ldstfp.S299
-rw-r--r--arch/powerpc/lib/mem_64.S19
-rw-r--r--arch/powerpc/lib/memcpy_power7.S66
-rw-r--r--arch/powerpc/lib/quad.S62
-rw-r--r--arch/powerpc/lib/sstep.c2232
-rw-r--r--arch/powerpc/lib/string_64.S2
-rw-r--r--arch/powerpc/mm/8xx_mmu.c29
-rw-r--r--arch/powerpc/mm/Makefile4
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/dump_linuxpagetables.c2
-rw-r--r--arch/powerpc/mm/fault.c562
-rw-r--r--arch/powerpc/mm/hash_low_32.S2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c16
-rw-r--r--arch/powerpc/mm/hugetlbpage.c219
-rw-r--r--arch/powerpc/mm/icswx.c292
-rw-r--r--arch/powerpc/mm/icswx.h68
-rw-r--r--arch/powerpc/mm/icswx_pid.c87
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/init_64.c6
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/mmu_context.c99
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c20
-rw-r--r--arch/powerpc/mm/mmu_decl.h10
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c24
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c12
-rw-r--r--arch/powerpc/mm/pgtable-radix.c33
-rw-r--r--arch/powerpc/mm/pgtable_32.c66
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/mm/slb_low.S23
-rw-r--r--arch/powerpc/mm/tlb-radix.c108
-rw-r--r--arch/powerpc/mm/tlb_hash64.c13
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S2
-rw-r--r--arch/powerpc/net/bpf_jit.h1
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c26
-rw-r--r--arch/powerpc/perf/Makefile1
-rw-r--r--arch/powerpc/perf/callchain.c3
-rw-r--r--arch/powerpc/perf/core-book3s.c3
-rw-r--r--arch/powerpc/perf/imc-pmu.c1306
-rw-r--r--arch/powerpc/perf/isa207-common.c31
-rw-r--r--arch/powerpc/perf/isa207-common.h5
-rw-r--r--arch/powerpc/perf/power8-pmu.c33
-rw-r--r--arch/powerpc/perf/power9-events-list.h9
-rw-r--r--arch/powerpc/perf/power9-pmu.c23
-rw-r--r--arch/powerpc/platforms/44x/Makefile4
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c89
-rw-r--r--arch/powerpc/platforms/4xx/Makefile8
-rw-r--r--arch/powerpc/platforms/4xx/cpm.c (renamed from arch/powerpc/sysdev/ppc4xx_cpm.c)10
-rw-r--r--arch/powerpc/platforms/4xx/gpio.c (renamed from arch/powerpc/sysdev/ppc4xx_gpio.c)3
-rw-r--r--arch/powerpc/platforms/4xx/hsta_msi.c (renamed from arch/powerpc/sysdev/ppc4xx_hsta_msi.c)0
-rw-r--r--arch/powerpc/platforms/4xx/machine_check.c26
-rw-r--r--arch/powerpc/platforms/4xx/msi.c (renamed from arch/powerpc/sysdev/ppc4xx_msi.c)3
-rw-r--r--arch/powerpc/platforms/4xx/ocm.c (renamed from arch/powerpc/sysdev/ppc4xx_ocm.c)0
-rw-r--r--arch/powerpc/platforms/4xx/pci.c (renamed from arch/powerpc/sysdev/ppc4xx_pci.c)118
-rw-r--r--arch/powerpc/platforms/4xx/pci.h (renamed from arch/powerpc/sysdev/ppc4xx_pci.h)0
-rw-r--r--arch/powerpc/platforms/4xx/soc.c (renamed from arch/powerpc/sysdev/ppc4xx_soc.c)5
-rw-r--r--arch/powerpc/platforms/4xx/uic.c (renamed from arch/powerpc/sysdev/uic.c)14
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c12
-rw-r--r--arch/powerpc/platforms/52xx/efika.c8
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c4
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c8
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c3
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c4
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c8
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c4
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig3
-rw-r--r--arch/powerpc/platforms/8xx/Makefile2
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c2
-rw-r--r--arch/powerpc/platforms/8xx/machine_check.c37
-rw-r--r--arch/powerpc/platforms/8xx/pic.c (renamed from arch/powerpc/sysdev/mpc8xx_pic.c)2
-rw-r--r--arch/powerpc/platforms/8xx/pic.h (renamed from arch/powerpc/sysdev/mpc8xx_pic.h)0
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype45
-rw-r--r--arch/powerpc/platforms/Makefile1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c6
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c36
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c24
-rw-r--r--arch/powerpc/platforms/cell/ras.c4
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c4
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c4
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c26
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2
-rw-r--r--arch/powerpc/platforms/chrp/pci.c18
-rw-r--r--arch/powerpc/platforms/chrp/pegasos_eth.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/linkstation.c6
-rw-r--r--arch/powerpc/platforms/embedded6xx/mvme5100.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c2
-rw-r--r--arch/powerpc/platforms/maple/pci.c10
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c14
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c50
-rw-r--r--arch/powerpc/platforms/powermac/pci.c6
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c24
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c6
-rw-r--r--arch/powerpc/platforms/powermac/pic.c8
-rw-r--r--arch/powerpc/platforms/powermac/setup.c2
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig22
-rw-r--r--arch/powerpc/platforms/powernv/Makefile5
-rw-r--r--arch/powerpc/platforms/powernv/copy-paste.h46
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c29
-rw-r--r--arch/powerpc/platforms/powernv/idle.c75
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c282
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c226
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c244
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c13
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c175
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c212
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S11
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal.c152
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c18
-rw-r--r--arch/powerpc/platforms/powernv/pci.c75
-rw-r--r--arch/powerpc/platforms/powernv/pci.h5
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/rng.c45
-rw-r--r--arch/powerpc/platforms/powernv/smp.c11
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c1134
-rw-r--r--arch/powerpc/platforms/powernv/vas.c151
-rw-r--r--arch/powerpc/platforms/powernv/vas.h467
-rw-r--r--arch/powerpc/platforms/ps3/repository.c22
-rw-r--r--arch/powerpc/platforms/ps3/setup.c15
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c9
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c4
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c6
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c17
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c57
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c5
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c58
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c6
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c2
-rw-r--r--arch/powerpc/platforms/pseries/msi.c23
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c7
-rw-r--r--arch/powerpc/platforms/pseries/ras.c15
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c15
-rw-r--r--arch/powerpc/platforms/pseries/setup.c9
-rw-r--r--arch/powerpc/platforms/pseries/smp.c27
-rw-r--r--arch/powerpc/platforms/pseries/vio.c10
-rw-r--r--arch/powerpc/purgatory/trampoline.S6
-rw-r--r--arch/powerpc/sysdev/Makefile12
-rw-r--r--arch/powerpc/sysdev/axonram.c48
-rw-r--r--arch/powerpc/sysdev/dcr.c4
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_cache_sram.c12
-rw-r--r--arch/powerpc/sysdev/fsl_gtm.c14
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c16
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c47
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c36
-rw-r--r--arch/powerpc/sysdev/fsl_rmu.c12
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c2
-rw-r--r--arch/powerpc/sysdev/fsl_soc.h2
-rw-r--r--arch/powerpc/sysdev/ipic.c1
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c2
-rw-r--r--arch/powerpc/sysdev/mpic_msi.c2
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c19
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c4
-rw-r--r--arch/powerpc/sysdev/mv64x60_dev.c20
-rw-r--r--arch/powerpc/sysdev/mv64x60_pci.c6
-rw-r--r--arch/powerpc/sysdev/of_rtc.c12
-rw-r--r--arch/powerpc/sysdev/scom.c5
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c3
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c4
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig5
-rw-r--r--arch/powerpc/sysdev/xive/Makefile1
-rw-r--r--arch/powerpc/sysdev/xive/common.c90
-rw-r--r--arch/powerpc/sysdev/xive/native.c26
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c662
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h7
-rw-r--r--arch/powerpc/xmon/Makefile4
-rw-r--r--arch/powerpc/xmon/xmon.c86
-rw-r--r--arch/s390/Kconfig18
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/compat.h5
-rw-r--r--arch/s390/include/asm/cpcmd.h7
-rw-r--r--arch/s390/include/asm/ebcdic.h4
-rw-r--r--arch/s390/include/asm/elf.h2
-rw-r--r--arch/s390/include/asm/futex.h23
-rw-r--r--arch/s390/include/asm/ipl.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/lowcore.h48
-rw-r--r--arch/s390/include/asm/mman.h11
-rw-r--r--arch/s390/include/asm/mmu_context.h38
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/page-states.h3
-rw-r--r--arch/s390/include/asm/page.h37
-rw-r--r--arch/s390/include/asm/pgalloc.h18
-rw-r--r--arch/s390/include/asm/pgtable.h197
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/setup.h17
-rw-r--r--arch/s390/include/asm/spinlock.h16
-rw-r--r--arch/s390/include/asm/timex.h40
-rw-r--r--arch/s390/include/asm/tlb.h23
-rw-r--r--arch/s390/include/asm/tlbflush.h7
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/types.h11
-rw-r--r--arch/s390/include/asm/unaligned.h13
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/dasd.h6
-rw-r--r--arch/s390/include/uapi/asm/kvm.h6
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/include/uapi/asm/swab.h89
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h24
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cpcmd.c13
-rw-r--r--arch/s390/kernel/debug.c9
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/early.c17
-rw-r--r--arch/s390/kernel/head.S3
-rw-r--r--arch/s390/kernel/head64.S4
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/relocate_kernel.S5
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/suspend.c24
-rw-r--r--arch/s390/kernel/time.c67
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S4
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S4
-rw-r--r--arch/s390/kvm/diag.c10
-rw-r--r--arch/s390/kvm/gaccess.c35
-rw-r--r--arch/s390/kvm/guestdbg.c2
-rw-r--r--arch/s390/kvm/interrupt.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c118
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c14
-rw-r--r--arch/s390/kvm/sigp.c36
-rw-r--r--arch/s390/kvm/sthyi.c13
-rw-r--r--arch/s390/kvm/vsie.c18
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/spinlock.c87
-rw-r--r--arch/s390/lib/uaccess.c38
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/s390/mm/gmap.c163
-rw-r--r--arch/s390/mm/init.c60
-rw-r--r--arch/s390/mm/mmap.c6
-rw-r--r--arch/s390/mm/page-states.c192
-rw-r--r--arch/s390/mm/pageattr.c5
-rw-r--r--arch/s390/mm/pgalloc.c12
-rw-r--r--arch/s390/mm/pgtable.c160
-rw-r--r--arch/s390/mm/vmem.c47
-rw-r--r--arch/s390/net/bpf_jit_comp.c27
-rw-r--r--arch/s390/pci/pci.c4
-rw-r--r--arch/s390/pci/pci_clp.c10
-rw-r--r--arch/s390/tools/gen_facilities.c6
-rw-r--r--arch/sh/configs/ap325rxa_defconfig10
-rw-r--r--arch/sh/configs/apsh4a3a_defconfig9
-rw-r--r--arch/sh/configs/apsh4ad0a_defconfig6
-rw-r--r--arch/sh/configs/cayman_defconfig4
-rw-r--r--arch/sh/configs/dreamcast_defconfig6
-rw-r--r--arch/sh/configs/ecovec24-romimage_defconfig7
-rw-r--r--arch/sh/configs/ecovec24_defconfig9
-rw-r--r--arch/sh/configs/edosk7705_defconfig2
-rw-r--r--arch/sh/configs/edosk7760_defconfig11
-rw-r--r--arch/sh/configs/espt_defconfig10
-rw-r--r--arch/sh/configs/hp6xx_defconfig4
-rw-r--r--arch/sh/configs/kfr2r09-romimage_defconfig5
-rw-r--r--arch/sh/configs/kfr2r09_defconfig7
-rw-r--r--arch/sh/configs/landisk_defconfig4
-rw-r--r--arch/sh/configs/lboxre2_defconfig3
-rw-r--r--arch/sh/configs/magicpanelr2_defconfig9
-rw-r--r--arch/sh/configs/microdev_defconfig3
-rw-r--r--arch/sh/configs/migor_defconfig8
-rw-r--r--arch/sh/configs/polaris_defconfig9
-rw-r--r--arch/sh/configs/r7780mp_defconfig4
-rw-r--r--arch/sh/configs/r7785rp_defconfig3
-rw-r--r--arch/sh/configs/rsk7201_defconfig8
-rw-r--r--arch/sh/configs/rsk7203_defconfig10
-rw-r--r--arch/sh/configs/rsk7264_defconfig3
-rw-r--r--arch/sh/configs/rsk7269_defconfig4
-rw-r--r--arch/sh/configs/rts7751r2d1_defconfig5
-rw-r--r--arch/sh/configs/rts7751r2dplus_defconfig8
-rw-r--r--arch/sh/configs/sdk7780_defconfig9
-rw-r--r--arch/sh/configs/sdk7786_defconfig9
-rw-r--r--arch/sh/configs/se7206_defconfig8
-rw-r--r--arch/sh/configs/se7343_defconfig9
-rw-r--r--arch/sh/configs/se7619_defconfig5
-rw-r--r--arch/sh/configs/se7705_defconfig5
-rw-r--r--arch/sh/configs/se7712_defconfig7
-rw-r--r--arch/sh/configs/se7721_defconfig6
-rw-r--r--arch/sh/configs/se7722_defconfig3
-rw-r--r--arch/sh/configs/se7724_defconfig9
-rw-r--r--arch/sh/configs/se7750_defconfig5
-rw-r--r--arch/sh/configs/se7751_defconfig6
-rw-r--r--arch/sh/configs/se7780_defconfig7
-rw-r--r--arch/sh/configs/secureedge5410_defconfig9
-rw-r--r--arch/sh/configs/sh03_defconfig4
-rw-r--r--arch/sh/configs/sh2007_defconfig8
-rw-r--r--arch/sh/configs/sh7710voipgw_defconfig5
-rw-r--r--arch/sh/configs/sh7724_generic_defconfig3
-rw-r--r--arch/sh/configs/sh7757lcr_defconfig4
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig9
-rw-r--r--arch/sh/configs/sh7770_generic_defconfig3
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig10
-rw-r--r--arch/sh/configs/sh7785lcr_defconfig9
-rw-r--r--arch/sh/configs/shmin_defconfig4
-rw-r--r--arch/sh/configs/shx3_defconfig6
-rw-r--r--arch/sh/configs/titan_defconfig8
-rw-r--r--arch/sh/configs/ul2_defconfig9
-rw-r--r--arch/sh/configs/urquell_defconfig9
-rw-r--r--arch/sh/drivers/pci/fixups-cayman.c2
-rw-r--r--arch/sh/drivers/pci/fixups-dreamcast.c2
-rw-r--r--arch/sh/drivers/pci/fixups-r7780rp.c2
-rw-r--r--arch/sh/drivers/pci/fixups-rts7751r2d.c6
-rw-r--r--arch/sh/drivers/pci/fixups-sdk7780.c4
-rw-r--r--arch/sh/drivers/pci/fixups-se7751.c2
-rw-r--r--arch/sh/drivers/pci/fixups-sh03.c2
-rw-r--r--arch/sh/drivers/pci/fixups-snapgear.c2
-rw-r--r--arch/sh/drivers/pci/fixups-titan.c4
-rw-r--r--arch/sh/drivers/pci/pci.c49
-rw-r--r--arch/sh/drivers/pci/pcie-sh7786.c2
-rw-r--r--arch/sh/include/asm/futex.h26
-rw-r--r--arch/sh/include/asm/spinlock-cas.h5
-rw-r--r--arch/sh/include/asm/spinlock-llsc.h5
-rw-r--r--arch/sh/include/asm/tlb.h8
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/crypto/aes_glue.c3
-rw-r--r--arch/sparc/include/asm/atomic_32.h2
-rw-r--r--arch/sparc/include/asm/futex_64.h26
-rw-r--r--arch/sparc/include/asm/page_32.h2
-rw-r--r--arch/sparc/include/asm/spinlock_32.h5
-rw-r--r--arch/sparc/include/asm/spitfire.h16
-rw-r--r--arch/sparc/include/asm/vga.h25
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/kernel/cpu.c6
-rw-r--r--arch/sparc/kernel/cpumap.c1
-rw-r--r--arch/sparc/kernel/head_64.S22
-rw-r--r--arch/sparc/kernel/leon_pci.c30
-rw-r--r--arch/sparc/kernel/pci.c10
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/pcic.c8
-rw-r--r--arch/sparc/kernel/setup_64.c15
-rw-r--r--arch/sparc/lib/multi3.S24
-rw-r--r--arch/sparc/mm/init_64.c14
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c34
-rw-r--r--arch/tile/include/asm/atomic_32.h2
-rw-r--r--arch/tile/include/asm/futex.h40
-rw-r--r--arch/tile/include/asm/spinlock_32.h2
-rw-r--r--arch/tile/include/asm/spinlock_64.h2
-rw-r--r--arch/tile/kernel/pci.c39
-rw-r--r--arch/tile/kernel/pci_gx.c35
-rw-r--r--arch/tile/kernel/setup.c2
-rw-r--r--arch/tile/lib/spinlock_32.c23
-rw-r--r--arch/tile/lib/spinlock_64.c22
-rw-r--r--arch/um/include/asm/tlb.h13
-rw-r--r--arch/um/include/asm/unwind.h8
-rw-r--r--arch/unicore32/kernel/pci.c35
-rw-r--r--arch/x86/Kbuild5
-rw-r--r--arch/x86/Kconfig84
-rw-r--r--arch/x86/Kconfig.debug59
-rw-r--r--arch/x86/Makefile17
-rw-r--r--arch/x86/boot/compressed/eboot.c13
-rw-r--r--arch/x86/boot/compressed/head_32.S129
-rw-r--r--arch/x86/boot/compressed/head_64.S112
-rw-r--r--arch/x86/boot/compressed/kaslr.c147
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/pagetable.c7
-rw-r--r--arch/x86/boot/header.S8
-rw-r--r--arch/x86/configs/tiny.config2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/blowfish_glue.c3
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c3
-rw-r--r--arch/x86/crypto/des3_ede_glue.c3
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S67
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c2
-rw-r--r--arch/x86/entry/Makefile1
-rw-r--r--arch/x86/entry/calling.h5
-rw-r--r--arch/x86/entry/common.c3
-rw-r--r--arch/x86/entry/entry_32.S20
-rw-r--r--arch/x86/entry/entry_64.S239
-rw-r--r--arch/x86/entry/entry_64_compat.S12
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/events/amd/uncore.c21
-rw-r--r--arch/x86/events/core.c80
-rw-r--r--arch/x86/events/intel/Makefile2
-rw-r--r--arch/x86/events/intel/bts.c4
-rw-r--r--arch/x86/events/intel/core.c107
-rw-r--r--arch/x86/events/intel/cqm.c1766
-rw-r--r--arch/x86/events/intel/ds.c58
-rw-r--r--arch/x86/events/intel/lbr.c52
-rw-r--r--arch/x86/events/intel/p4.c2
-rw-r--r--arch/x86/events/intel/pt.c5
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c12
-rw-r--r--arch/x86/events/intel/uncore_snb.c6
-rw-r--r--arch/x86/events/intel/uncore_snbep.c42
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/hyperv/Makefile2
-rw-r--r--arch/x86/hyperv/hv_init.c90
-rw-r--r--arch/x86/hyperv/mmu.c272
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/acpi.h13
-rw-r--r--arch/x86/include/asm/asm.h6
-rw-r--r--arch/x86/include/asm/atomic.h69
-rw-r--r--arch/x86/include/asm/atomic64_32.h81
-rw-r--r--arch/x86/include/asm/atomic64_64.h73
-rw-r--r--arch/x86/include/asm/cmdline.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/desc.h248
-rw-r--r--arch/x86/include/asm/desc_defs.h122
-rw-r--r--arch/x86/include/asm/disabled-features.h4
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/dmi.h8
-rw-r--r--arch/x86/include/asm/e820/api.h2
-rw-r--r--arch/x86/include/asm/elf.h23
-rw-r--r--arch/x86/include/asm/entry_arch.h17
-rw-r--r--arch/x86/include/asm/fixmap.h20
-rw-r--r--arch/x86/include/asm/fpu/internal.h6
-rw-r--r--arch/x86/include/asm/futex.h40
-rw-r--r--arch/x86/include/asm/hw_irq.h20
-rw-r--r--arch/x86/include/asm/hypervisor.h10
-rw-r--r--arch/x86/include/asm/init.h1
-rw-r--r--arch/x86/include/asm/intel_rdt.h286
-rw-r--r--arch/x86/include/asm/intel_rdt_common.h27
-rw-r--r--arch/x86/include/asm/intel_rdt_sched.h92
-rw-r--r--arch/x86/include/asm/io.h106
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_work.h8
-rw-r--r--arch/x86/include/asm/kexec.h11
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h45
-rw-r--r--arch/x86/include/asm/lguest.h91
-rw-r--r--arch/x86/include/asm/lguest_hcall.h74
-rw-r--r--arch/x86/include/asm/mem_encrypt.h80
-rw-r--r--arch/x86/include/asm/mmu.h25
-rw-r--r--arch/x86/include/asm/mmu_context.h19
-rw-r--r--arch/x86/include/asm/module.h9
-rw-r--r--arch/x86/include/asm/mpx.h9
-rw-r--r--arch/x86/include/asm/mshyperv.h149
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/orc_lookup.h46
-rw-r--r--arch/x86/include/asm/orc_types.h107
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/page_types.h3
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/pgtable.h45
-rw-r--r--arch/x86/include/asm/pgtable_64.h14
-rw-r--r--arch/x86/include/asm/pgtable_types.h68
-rw-r--r--arch/x86/include/asm/processor-flags.h13
-rw-r--r--arch/x86/include/asm/processor.h25
-rw-r--r--arch/x86/include/asm/proto.h3
-rw-r--r--arch/x86/include/asm/ptrace.h43
-rw-r--r--arch/x86/include/asm/realmode.h12
-rw-r--r--arch/x86/include/asm/refcount.h109
-rw-r--r--arch/x86/include/asm/rmwcc.h37
-rw-r--r--arch/x86/include/asm/segment.h4
-rw-r--r--arch/x86/include/asm/set_memory.h3
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/string_32.h24
-rw-r--r--arch/x86/include/asm/string_64.h36
-rw-r--r--arch/x86/include/asm/svm.h6
-rw-r--r--arch/x86/include/asm/thread_info.h5
-rw-r--r--arch/x86/include/asm/tlb.h14
-rw-r--r--arch/x86/include/asm/tlbflush.h89
-rw-r--r--arch/x86/include/asm/topology.h6
-rw-r--r--arch/x86/include/asm/trace/common.h16
-rw-r--r--arch/x86/include/asm/trace/exceptions.h8
-rw-r--r--arch/x86/include/asm/trace/hyperv.h40
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h51
-rw-r--r--arch/x86/include/asm/traps.h50
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/include/asm/unwind.h76
-rw-r--r--arch/x86/include/asm/unwind_hints.h105
-rw-r--r--arch/x86/include/asm/vga.h14
-rw-r--r--arch/x86/include/asm/vmx.h22
-rw-r--r--arch/x86/include/asm/xen/hypercall.h6
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h17
-rw-r--r--arch/x86/include/uapi/asm/mman.h3
-rw-r--r--arch/x86/kernel/Makefile11
-rw-r--r--arch/x86/kernel/acpi/boot.c17
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/apic/apic.c78
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c20
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c55
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c3
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/common.c64
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c32
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c375
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h440
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c (renamed from arch/x86/kernel/cpu/intel_rdt_schemata.c)67
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_monitor.c499
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1117
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c43
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c16
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c27
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c22
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c18
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/dumpstack.c14
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/dumpstack_64.c4
-rw-r--r--arch/x86/kernel/e820.c26
-rw-r--r--arch/x86/kernel/early-quirks.c5
-rw-r--r--arch/x86/kernel/eisa.c19
-rw-r--r--arch/x86/kernel/espfix_64.c2
-rw-r--r--arch/x86/kernel/head32.c4
-rw-r--r--arch/x86/kernel/head64.c106
-rw-r--r--arch/x86/kernel/head_32.S66
-rw-r--r--arch/x86/kernel/head_64.S40
-rw-r--r--arch/x86/kernel/idt.c371
-rw-r--r--arch/x86/kernel/irq.c40
-rw-r--r--arch/x86/kernel/irq_work.c20
-rw-r--r--arch/x86/kernel/irqinit.c102
-rw-r--r--arch/x86/kernel/kdebugfs.c34
-rw-r--r--arch/x86/kernel/kprobes/opt.c9
-rw-r--r--arch/x86/kernel/ksysfs.c32
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/ldt.c21
-rw-r--r--arch/x86/kernel/machine_kexec_32.c14
-rw-r--r--arch/x86/kernel/machine_kexec_64.c25
-rw-r--r--arch/x86/kernel/module.c11
-rw-r--r--arch/x86/kernel/mpparse.c108
-rw-r--r--arch/x86/kernel/nmi.c18
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c15
-rw-r--r--arch/x86/kernel/platform-quirks.c1
-rw-r--r--arch/x86/kernel/process.c17
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c244
-rw-r--r--arch/x86/kernel/quirks.c10
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S14
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smp.c81
-rw-r--r--arch/x86/kernel/smpboot.c32
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c30
-rw-r--r--arch/x86/kernel/tls.c2
-rw-r--r--arch/x86/kernel/tracepoint.c57
-rw-r--r--arch/x86/kernel/traps.c107
-rw-r--r--arch/x86/kernel/unwind_frame.c41
-rw-r--r--arch/x86/kernel/unwind_guess.c5
-rw-r--r--arch/x86/kernel/unwind_orc.c582
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c36
-rw-r--r--arch/x86/kvm/cpuid.h186
-rw-r--r--arch/x86/kvm/emulate.c42
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h7
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/lapic.h3
-rw-r--r--arch/x86/kvm/mmu.c308
-rw-r--r--arch/x86/kvm/mmu.h25
-rw-r--r--arch/x86/kvm/mmu_audit.c4
-rw-r--r--arch/x86/kvm/mtrr.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h6
-rw-r--r--arch/x86/kvm/svm.c183
-rw-r--r--arch/x86/kvm/trace.h11
-rw-r--r--arch/x86/kvm/vmx.c649
-rw-r--r--arch/x86/kvm/x86.c244
-rw-r--r--arch/x86/kvm/x86.h54
-rw-r--r--arch/x86/lguest/Kconfig14
-rw-r--r--arch/x86/lguest/Makefile2
-rw-r--r--arch/x86/lguest/boot.c1558
-rw-r--r--arch/x86/lguest/head_32.S192
-rw-r--r--arch/x86/lib/cmdline.c105
-rw-r--r--arch/x86/math-emu/div_Xsig.S1
-rw-r--r--arch/x86/math-emu/div_small.S2
-rw-r--r--arch/x86/math-emu/fpu_entry.c11
-rw-r--r--arch/x86/math-emu/fpu_system.h48
-rw-r--r--arch/x86/math-emu/get_address.c17
-rw-r--r--arch/x86/math-emu/mul_Xsig.S4
-rw-r--r--arch/x86/math-emu/polynom_Xsig.S1
-rw-r--r--arch/x86/math-emu/reg_norm.S2
-rw-r--r--arch/x86/math-emu/reg_round.S2
-rw-r--r--arch/x86/math-emu/reg_u_add.S1
-rw-r--r--arch/x86/math-emu/reg_u_div.S2
-rw-r--r--arch/x86/math-emu/reg_u_mul.S1
-rw-r--r--arch/x86/math-emu/reg_u_sub.S1
-rw-r--r--arch/x86/math-emu/round_Xsig.S4
-rw-r--r--arch/x86/math-emu/shr_Xsig.S1
-rw-r--r--arch/x86/math-emu/wm_shrx.S2
-rw-r--r--arch/x86/math-emu/wm_sqrt.S1
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/dump_pagetables.c93
-rw-r--r--arch/x86/mm/extable.c44
-rw-r--r--arch/x86/mm/fault.c75
-rw-r--r--arch/x86/mm/hugetlbpage.c27
-rw-r--r--arch/x86/mm/ident_map.c12
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/init_64.c22
-rw-r--r--arch/x86/mm/ioremap.c287
-rw-r--r--arch/x86/mm/kasan_init_64.c6
-rw-r--r--arch/x86/mm/mem_encrypt.c593
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S149
-rw-r--r--arch/x86/mm/mmap.c19
-rw-r--r--arch/x86/mm/mpx.c33
-rw-r--r--arch/x86/mm/numa_emulation.c55
-rw-r--r--arch/x86/mm/pageattr.c67
-rw-r--r--arch/x86/mm/pat.c9
-rw-r--r--arch/x86/mm/pgtable.c8
-rw-r--r--arch/x86/mm/tlb.c365
-rw-r--r--arch/x86/net/bpf_jit_comp.c35
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/fixup.c17
-rw-r--r--arch/x86/pci/intel_mid_pci.c12
-rw-r--r--arch/x86/platform/efi/efi.c19
-rw-r--r--arch/x86/platform/efi/efi_64.c15
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bt.c2
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c6
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c2
-rw-r--r--arch/x86/platform/intel-mid/pwr.c4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/power/cpu.c1
-rw-r--r--arch/x86/realmode/init.c12
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S24
-rw-r--r--arch/x86/um/user-offsets.c2
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/enlighten_hvm.c59
-rw-r--r--arch/x86/xen/enlighten_pv.c176
-rw-r--r--arch/x86/xen/irq.c3
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/mmu_pv.c25
-rw-r--r--arch/x86/xen/p2m.c25
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--arch/x86/xen/xen-asm.S26
-rw-r--r--arch/x86/xen/xen-asm.h12
-rw-r--r--arch/x86/xen/xen-asm_32.S27
-rw-r--r--arch/x86/xen/xen-asm_64.S102
-rw-r--r--arch/x86/xen/xen-head.S2
-rw-r--r--arch/x86/xen/xen-ops.h16
-rw-r--r--arch/xtensa/include/asm/Kbuild2
-rw-r--r--arch/xtensa/include/asm/device.h15
-rw-r--r--arch/xtensa/include/asm/futex.h27
-rw-r--r--arch/xtensa/include/asm/param.h18
-rw-r--r--arch/xtensa/include/asm/spinlock.h5
-rw-r--r--arch/xtensa/include/uapi/asm/mman.h14
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h2
-rw-r--r--arch/xtensa/kernel/setup.c6
-rw-r--r--arch/xtensa/kernel/xtensa_ksyms.c2
-rw-r--r--arch/xtensa/mm/cache.c16
1308 files changed, 33523 insertions, 18039 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089117fe..1aafb4efbb51 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -458,6 +458,13 @@ config GCC_PLUGIN_STRUCTLEAK
* https://grsecurity.net/
* https://pax.grsecurity.net/
+config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
+ bool "Force initialize all struct type variables passed by reference"
+ depends on GCC_PLUGIN_STRUCTLEAK
+ help
+ Zero initialize any struct type local variable that may be passed by
+ reference without having been initialized.
+
config GCC_PLUGIN_STRUCTLEAK_VERBOSE
bool "Report forcefully initialized variables"
depends on GCC_PLUGIN_STRUCTLEAK
@@ -473,11 +480,13 @@ config GCC_PLUGIN_RANDSTRUCT
depends on GCC_PLUGINS
select MODVERSIONS if MODULES
help
- If you say Y here, the layouts of structures explicitly
- marked by __randomize_layout will be randomized at
- compile-time. This can introduce the requirement of an
- additional information exposure vulnerability for exploits
- targeting these structure types.
+ If you say Y here, the layouts of structures that are entirely
+ function pointers (and have not been manually annotated with
+ __no_randomize_layout), or structures that have been explicitly
+ marked with __randomize_layout, will be randomized at compile-time.
+ This can introduce the requirement of an additional information
+ exposure vulnerability for exploits targeting these structure
+ types.
Enabling this feature will introduce some performance impact,
slightly increase memory usage, and prevent the use of forensic
@@ -931,6 +940,18 @@ config STRICT_MODULE_RWX
config ARCH_WANT_RELAX_ORDER
bool
+config ARCH_HAS_REFCOUNT
+ bool
+ help
+ An architecture selects this when it has implemented refcount_t
+ using open coded assembly primitives that provide an optimized
+ refcount_t implementation, possibly at the expense of some full
+ refcount state checks of CONFIG_REFCOUNT_FULL=y.
+
+ The refcount overflow check behavior, however, must be retained.
+ Catching overflows is the primary security concern for protecting
+ against bugs in reference counts.
+
config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig
index 539e8b5a6cbd..f4ec420d7f2d 100644
--- a/arch/alpha/defconfig
+++ b/arch/alpha/defconfig
@@ -19,7 +19,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_VLAN_8021Q=m
@@ -57,7 +56,6 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_RTC=y
CONFIG_EXT2_FS=y
CONFIG_REISERFS_FS=m
-CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index d103db5af5ff..5b974ab8425c 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,6 +3,7 @@
generic-y += clkdev.h
generic-y += exec.h
generic-y += export.h
+generic-y += fb.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/alpha/include/asm/asm-prototypes.h b/arch/alpha/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..d12c68ea340b
--- /dev/null
+++ b/arch/alpha/include/asm/asm-prototypes.h
@@ -0,0 +1,18 @@
+#include <linux/spinlock.h>
+
+#include <asm/checksum.h>
+#include <asm/console.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+extern void __divl(void);
+extern void __reml(void);
+extern void __divq(void);
+extern void __remq(void);
+extern void __divlu(void);
+extern void __remlu(void);
+extern void __divqu(void);
+extern void __remqu(void);
diff --git a/arch/alpha/include/asm/core_marvel.h b/arch/alpha/include/asm/core_marvel.h
index dad300fa14ce..8dcf9dbda618 100644
--- a/arch/alpha/include/asm/core_marvel.h
+++ b/arch/alpha/include/asm/core_marvel.h
@@ -312,7 +312,7 @@ struct io7 {
io7_port7_csrs *csrs;
struct io7_port ports[IO7_NUM_PORTS];
- spinlock_t irq_lock;
+ raw_spinlock_t irq_lock;
};
#ifndef __EXTERN_INLINE
diff --git a/arch/alpha/include/asm/fb.h b/arch/alpha/include/asm/fb.h
deleted file mode 100644
index fa9bbb96b2b3..000000000000
--- a/arch/alpha/include/asm/fb.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/device.h>
-
-/* Caching is off in the I/O space quadrant by design. */
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index fb01dfb760c2..05a70edd57b6 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -25,18 +25,10 @@
: "r" (uaddr), "r"(oparg) \
: "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
pagefault_disable();
@@ -62,17 +54,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index ff4049155c84..4d61d2a50c52 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -299,6 +299,7 @@ static inline void __iomem * ioremap_nocache(unsigned long offset,
return ioremap(offset, size);
}
+#define ioremap_wc ioremap_nocache
#define ioremap_uc ioremap_nocache
static inline void iounmap(volatile void __iomem *addr)
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index a40b9fc0c6c3..718ac0b64adf 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,11 +16,6 @@
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
#define arch_spin_is_locked(x) ((x)->lock != 0)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.lock == 0;
diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h
index c2911f591704..9eb9933d845f 100644
--- a/arch/alpha/include/asm/string.h
+++ b/arch/alpha/include/asm/string.h
@@ -65,13 +65,14 @@ extern void * memchr(const void *, int, size_t);
aligned values. The DEST and COUNT parameters must be even for
correct operation. */
-#define __HAVE_ARCH_MEMSETW
-extern void * __memsetw(void *dest, unsigned short, size_t count);
-
-#define memsetw(s, c, n) \
-(__builtin_constant_p(c) \
- ? __constant_c_memset((s),0x0001000100010001UL*(unsigned short)(c),(n)) \
- : __memsetw((s),(c),(n)))
+#define __HAVE_ARCH_MEMSET16
+extern void * __memset16(void *dest, unsigned short, size_t count);
+static inline void *memset16(uint16_t *p, uint16_t v, size_t n)
+{
+ if (__builtin_constant_p(v))
+ return __constant_c_memset(p, 0x0001000100010001UL * v, n * 2);
+ return __memset16(p, v, n * 2);
+}
#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index 4cb4b6d3452c..0bc66e1d3a7e 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -1,6 +1,6 @@
#ifndef _ALPHA_TYPES_H
#define _ALPHA_TYPES_H
-#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index b37153ecf2ac..db7fc0f511e2 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,7 +3,7 @@
#include <uapi/asm/unistd.h>
-#define NR_SYSCALLS 514
+#define NR_SYSCALLS 523
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h
index c00106bac521..3c1c2b6128e7 100644
--- a/arch/alpha/include/asm/vga.h
+++ b/arch/alpha/include/asm/vga.h
@@ -34,7 +34,7 @@ static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
if (__is_ioaddr(s))
memsetw_io((u16 __iomem *) s, c, count);
else
- memsetw(s, c, count);
+ memset16(s, c, count / 2);
}
/* Do not trust that the usage will be correct; analyze the arguments. */
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 02760f6e6ca4..3b26cc62dadb 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -64,20 +64,12 @@
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
/* compatibility flags */
#define MAP_FILE 0
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 7b285dd4fe05..c6133a045352 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -109,4 +109,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h
index 9fd3cd459777..8d1024d7be05 100644
--- a/arch/alpha/include/uapi/asm/types.h
+++ b/arch/alpha/include/uapi/asm/types.h
@@ -9,8 +9,18 @@
* need to be careful to avoid a name clashes.
*/
-#ifndef __KERNEL__
+/*
+ * This is here because we used to use l64 for alpha
+ * and we don't want to impact user mode with our change to ll64
+ * in the kernel.
+ *
+ * However, some user programs are fine with this. They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
+ */
+#if !defined(__SANE_USERSPACE_TYPES__) && !defined(__KERNEL__)
#include <asm-generic/int-l64.h>
+#else
+#include <asm-generic/int-ll64.h>
#endif
#endif /* _UAPI_ALPHA_TYPES_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index aa33bf5aacb6..53de540e39a7 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -366,11 +366,6 @@
#define __NR_epoll_create 407
#define __NR_epoll_ctl 408
#define __NR_epoll_wait 409
-/* Feb 2007: These three sys_epoll defines shouldn't be here but culling
- * them would break userspace apps ... we'll kill them off in 2010 :) */
-#define __NR_sys_epoll_create __NR_epoll_create
-#define __NR_sys_epoll_ctl __NR_epoll_ctl
-#define __NR_sys_epoll_wait __NR_epoll_wait
#define __NR_remap_file_pages 410
#define __NR_set_tid_address 411
#define __NR_restart_syscall 412
@@ -475,5 +470,19 @@
#define __NR_getrandom 511
#define __NR_memfd_create 512
#define __NR_execveat 513
+#define __NR_seccomp 514
+#define __NR_bpf 515
+#define __NR_userfaultfd 516
+#define __NR_membarrier 517
+#define __NR_mlock2 518
+#define __NR_copy_file_range 519
+#define __NR_preadv2 520
+#define __NR_pwritev2 521
+#define __NR_statx 522
+
+/* Alpha doesn't have protection keys. */
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
#endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index d5f0580746a5..b10c316475dd 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -118,7 +118,7 @@ alloc_io7(unsigned int pe)
io7 = alloc_bootmem(sizeof(*io7));
io7->pe = pe;
- spin_lock_init(&io7->irq_lock);
+ raw_spin_lock_init(&io7->irq_lock);
for (h = 0; h < 4; h++) {
io7->ports[h].io7 = io7;
@@ -351,7 +351,7 @@ marvel_init_io7(struct io7 *io7)
}
}
-void
+void __init
marvel_io7_present(gct6_node *node)
{
int pe;
@@ -369,6 +369,7 @@ marvel_io7_present(gct6_node *node)
static void __init
marvel_find_console_vga_hose(void)
{
+#ifdef CONFIG_VGA_HOSE
u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset);
if (pu64[7] == 3) { /* TERM_TYPE == graphics */
@@ -402,9 +403,10 @@ marvel_find_console_vga_hose(void)
pci_vga_hose = hose;
}
}
+#endif
}
-gct6_search_struct gct_wanted_node_list[] = {
+gct6_search_struct gct_wanted_node_list[] __initdata = {
{ GCT_TYPE_HOSE, GCT_SUBTYPE_IO_PORT_MODULE, marvel_io7_present },
{ 0, 0, NULL }
};
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 219bf271c0ba..b532d925443d 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -461,6 +461,7 @@ titan_ioremap(unsigned long addr, unsigned long size)
unsigned long *ptes;
unsigned long pfn;
+#ifdef CONFIG_VGA_HOSE
/*
* Adjust the address and hose, if necessary.
*/
@@ -468,6 +469,7 @@ titan_ioremap(unsigned long addr, unsigned long size)
h = pci_vga_hose->index;
addr += pci_vga_hose->mem_space->start;
}
+#endif
/*
* Find the hose.
diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
index 936bc8f89a67..47632fa8c24e 100644
--- a/arch/alpha/kernel/module.c
+++ b/arch/alpha/kernel/module.c
@@ -181,6 +181,9 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
switch (r_type) {
case R_ALPHA_NONE:
break;
+ case R_ALPHA_REFLONG:
+ *(u32 *)location = value;
+ break;
case R_ALPHA_REFQUAD:
/* BUG() can produce misaligned relocations. */
((u32 *)location)[0] = value;
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index ffbdb3fb672f..676bab6e3123 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -42,11 +42,7 @@ alloc_pci_controller(void)
struct resource * __init
alloc_resource(void)
{
- struct resource *res;
-
- res = alloc_bootmem(sizeof(*res));
-
- return res;
+ return alloc_bootmem(sizeof(struct resource));
}
asmlinkage long
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 92c0d460815b..cbecd527c696 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -38,7 +38,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
unsigned long nr, start, size;
int shift = sparse ? 5 : 0;
- nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ nr = vma_pages(vma);
start = vma->vm_pgoff;
size = ((pci_resource_len(pdev, num) - 1) >> (PAGE_SHIFT - shift)) + 1;
@@ -64,8 +64,7 @@ static int pci_mmap_resource(struct kobject *kobj,
struct bin_attribute *attr,
struct vm_area_struct *vma, int sparse)
{
- struct pci_dev *pdev = to_pci_dev(container_of(kobj,
- struct device, kobj));
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
struct resource *res = attr->private;
enum pci_mmap_state mmap_type;
struct pci_bus_region bar;
@@ -255,7 +254,7 @@ static int __legacy_mmap_fits(struct pci_controller *hose,
{
unsigned long nr, start, size;
- nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ nr = vma_pages(vma);
start = vma->vm_pgoff;
size = ((res_size - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 5f387ee5b5c5..564114eb85e1 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -312,8 +312,9 @@ common_init_pci(void)
{
struct pci_controller *hose;
struct list_head resources;
+ struct pci_host_bridge *bridge;
struct pci_bus *bus;
- int next_busno;
+ int ret, next_busno;
int need_domain_info = 0;
u32 pci_mem_end;
u32 sg_base;
@@ -336,11 +337,25 @@ common_init_pci(void)
pci_add_resource_offset(&resources, hose->mem_space,
hose->mem_space->start);
- bus = pci_scan_root_bus(NULL, next_busno, alpha_mv.pci_ops,
- hose, &resources);
- if (!bus)
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
continue;
- hose->bus = bus;
+
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = hose;
+ bridge->busnr = next_busno;
+ bridge->ops = alpha_mv.pci_ops;
+ bridge->swizzle_irq = alpha_mv.pci_swizzle;
+ bridge->map_irq = alpha_mv.pci_map_irq;
+
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
+ continue;
+ }
+
+ bus = hose->bus = bridge->bus;
hose->need_domain_info = need_domain_info;
next_busno = bus->busn_res.end + 1;
/* Don't allow 8-bit bus number overflow inside the hose -
@@ -354,7 +369,6 @@ common_init_pci(void)
pcibios_claim_console_setup();
pci_assign_unassigned_resources();
- pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
for (hose = hose_head; hose; hose = hose->next) {
bus = hose->bus;
if (bus)
@@ -362,7 +376,6 @@ common_init_pci(void)
}
}
-
struct pci_controller * __init
alloc_pci_controller(void)
{
@@ -379,11 +392,7 @@ alloc_pci_controller(void)
struct resource * __init
alloc_resource(void)
{
- struct resource *res;
-
- res = alloc_bootmem(sizeof(*res));
-
- return res;
+ return alloc_bootmem(sizeof(struct resource));
}
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 491e6a604e82..249229ab4942 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -1094,8 +1094,9 @@ get_sysnames(unsigned long type, unsigned long variation, unsigned long cpu,
default: /* default to variation "0" for now */
break;
case ST_DEC_EB164:
- if (member < ARRAY_SIZE(eb164_indices))
- *variation_name = eb164_names[eb164_indices[member]];
+ if (member >= ARRAY_SIZE(eb164_indices))
+ break;
+ *variation_name = eb164_names[eb164_indices[member]];
/* PC164 may show as EB164 variation, but with EV56 CPU,
so, since no true EB164 had anything but EV5... */
if (eb164_indices[member] == 0 && cpu == EV56_CPU)
diff --git a/arch/alpha/kernel/smc37c669.c b/arch/alpha/kernel/smc37c669.c
index c803fc76ae4f..4dbd4e415041 100644
--- a/arch/alpha/kernel/smc37c669.c
+++ b/arch/alpha/kernel/smc37c669.c
@@ -2007,11 +2007,8 @@ static void __init SMC37c669_config_mode(
static unsigned char __init SMC37c669_read_config(
unsigned char index )
{
- unsigned char data;
-
- wb( &SMC37c669->index_port, index );
- data = rb( &SMC37c669->data_port );
- return data;
+ wb(&SMC37c669->index_port, index);
+ return rb(&SMC37c669->data_port);
}
/*
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 9fc560459ebd..f6726a746427 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -115,7 +115,7 @@ wait_boot_cpu_to_stop(int cpuid)
/*
* Where secondaries begin a life of C.
*/
-void
+void __init
smp_callin(void)
{
int cpuid = hard_smp_processor_id();
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 24e41bd7d3c9..3e533920371f 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -115,11 +115,11 @@ io7_enable_irq(struct irq_data *d)
return;
}
- spin_lock(&io7->irq_lock);
+ raw_spin_lock(&io7->irq_lock);
*ctl |= 1UL << 24;
mb();
*ctl;
- spin_unlock(&io7->irq_lock);
+ raw_spin_unlock(&io7->irq_lock);
}
static void
@@ -136,11 +136,11 @@ io7_disable_irq(struct irq_data *d)
return;
}
- spin_lock(&io7->irq_lock);
+ raw_spin_lock(&io7->irq_lock);
*ctl &= ~(1UL << 24);
mb();
*ctl;
- spin_unlock(&io7->irq_lock);
+ raw_spin_unlock(&io7->irq_lock);
}
static void
@@ -263,7 +263,7 @@ init_io7_irqs(struct io7 *io7,
*/
printk(" Interrupts reported to CPU at PE %u\n", boot_cpuid);
- spin_lock(&io7->irq_lock);
+ raw_spin_lock(&io7->irq_lock);
/* set up the error irqs */
io7_redirect_irq(io7, &io7->csrs->HLT_CTL.csr, boot_cpuid);
@@ -295,7 +295,7 @@ init_io7_irqs(struct io7 *io7,
for (i = 0; i < 16; ++i)
init_one_io7_msi(io7, i, boot_cpuid);
- spin_unlock(&io7->irq_lock);
+ raw_spin_unlock(&io7->irq_lock);
}
static void __init
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 2cfaa0e5c577..8ae04a121186 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -194,22 +194,46 @@ static struct resource irongate_mem = {
.name = "Irongate PCI MEM",
.flags = IORESOURCE_MEM,
};
+static struct resource busn_resource = {
+ .name = "PCI busn",
+ .start = 0,
+ .end = 255,
+ .flags = IORESOURCE_BUS,
+};
void __init
nautilus_init_pci(void)
{
struct pci_controller *hose = hose_head;
+ struct pci_host_bridge *bridge;
struct pci_bus *bus;
struct pci_dev *irongate;
unsigned long bus_align, bus_size, pci_mem;
unsigned long memtop = max_low_pfn << PAGE_SHIFT;
+ int ret;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return;
+
+ pci_add_resource(&bridge->windows, &ioport_resource);
+ pci_add_resource(&bridge->windows, &iomem_resource);
+ pci_add_resource(&bridge->windows, &busn_resource);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = hose;
+ bridge->busnr = 0;
+ bridge->ops = alpha_mv.pci_ops;
+ bridge->swizzle_irq = alpha_mv.pci_swizzle;
+ bridge->map_irq = alpha_mv.pci_map_irq;
/* Scan our single hose. */
- bus = pci_scan_bus(0, alpha_mv.pci_ops, hose);
- if (!bus)
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
return;
+ }
- hose->bus = bus;
+ bus = hose->bus = bridge->bus;
pcibios_claim_one_bus(bus);
irongate = pci_get_bus_and_slot(0, 0);
@@ -254,7 +278,6 @@ nautilus_init_pci(void)
/* pci_common_swizzle() relies on bus->self being NULL
for the root bus, so just clear it. */
bus->self = NULL;
- pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
pci_bus_add_devices(bus);
}
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 9b62e3fd4f03..5b4514abb234 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -532,6 +532,15 @@ sys_call_table:
.quad sys_getrandom
.quad sys_memfd_create
.quad sys_execveat
+ .quad sys_seccomp
+ .quad sys_bpf /* 515 */
+ .quad sys_userfaultfd
+ .quad sys_membarrier
+ .quad sys_mlock2
+ .quad sys_copy_file_range
+ .quad sys_preadv2 /* 520 */
+ .quad sys_pwritev2
+ .quad sys_statx
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 65bb102d985b..ddb89a18cf26 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -193,8 +193,10 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
static long dummy_emul(void) { return 0; }
long (*alpha_fp_emul_imprecise)(struct pt_regs *regs, unsigned long writemask)
= (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul_imprecise);
long (*alpha_fp_emul) (unsigned long pc)
= (void *)dummy_emul;
+EXPORT_SYMBOL_GPL(alpha_fp_emul);
#else
long alpha_fp_emul_imprecise(struct pt_regs *regs, unsigned long writemask);
long alpha_fp_emul (unsigned long pc);
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 7083434dd241..a80815960364 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -20,12 +20,8 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \
checksum.o \
csum_partial_copy.o \
$(ev67-y)strlen.o \
- $(ev67-y)strcat.o \
- strcpy.o \
- $(ev67-y)strncat.o \
- strncpy.o \
- $(ev6-y)stxcpy.o \
- $(ev6-y)stxncpy.o \
+ stycpy.o \
+ styncpy.o \
$(ev67-y)strchr.o \
$(ev67-y)strrchr.o \
$(ev6-y)memchr.o \
@@ -49,3 +45,17 @@ AFLAGS___remlu.o = -DREM -DINTSIZE
$(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \
$(src)/$(ev6-y)divide.S FORCE
$(call if_changed_rule,as_o_S)
+
+# There are direct branches between {str*cpy,str*cat} and stx*cpy.
+# Ensure the branches are within range by merging these objects.
+
+LDFLAGS_stycpy.o := -r
+LDFLAGS_styncpy.o := -r
+
+$(obj)/stycpy.o: $(obj)/strcpy.o $(obj)/$(ev67-y)strcat.o \
+ $(obj)/$(ev6-y)stxcpy.o FORCE
+ $(call if_changed,ld)
+
+$(obj)/styncpy.o: $(obj)/strncpy.o $(obj)/$(ev67-y)strncat.o \
+ $(obj)/$(ev6-y)stxncpy.o FORCE
+ $(call if_changed,ld)
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
index 159f1b7e6e49..c277a1a4383e 100644
--- a/arch/alpha/lib/copy_user.S
+++ b/arch/alpha/lib/copy_user.S
@@ -34,7 +34,7 @@
.ent __copy_user
__copy_user:
.prologue 0
- and $18,$18,$0
+ mov $18,$0
and $16,7,$3
beq $0,$35
beq $3,$36
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S
index 35e6710d0700..954ca03ebebe 100644
--- a/arch/alpha/lib/ev6-copy_user.S
+++ b/arch/alpha/lib/ev6-copy_user.S
@@ -45,9 +45,10 @@
# Pipeline info: Slotting & Comments
__copy_user:
.prologue 0
- andq $18, $18, $0
- subq $18, 32, $1 # .. E .. .. : Is this going to be a small copy?
- beq $0, $zerolength # U .. .. .. : U L U L
+ mov $18, $0 # .. .. .. E
+ subq $18, 32, $1 # .. .. E. .. : Is this going to be a small copy?
+ nop # .. E .. ..
+ beq $18, $zerolength # U .. .. .. : U L U L
and $16,7,$3 # .. .. .. E : is leading dest misalignment
ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
index 89a26f5e89de..f824969e9e77 100644
--- a/arch/alpha/lib/memset.S
+++ b/arch/alpha/lib/memset.S
@@ -20,7 +20,7 @@
.globl memset
.globl __memset
.globl ___memset
- .globl __memsetw
+ .globl __memset16
.globl __constant_c_memset
.ent ___memset
@@ -110,8 +110,8 @@ EXPORT_SYMBOL(___memset)
EXPORT_SYMBOL(__constant_c_memset)
.align 5
- .ent __memsetw
-__memsetw:
+ .ent __memset16
+__memset16:
.prologue 0
inswl $17,0,$1 /* E0 */
@@ -123,8 +123,8 @@ __memsetw:
or $1,$4,$17 /* E0 */
br __constant_c_memset /* .. E1 */
- .end __memsetw
-EXPORT_SYMBOL(__memsetw)
+ .end __memset16
+EXPORT_SYMBOL(__memset16)
memset = ___memset
__memset = ___memset
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index d17d705f6545..1c2d456da7f2 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -53,6 +53,7 @@ extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
#ifdef MODULE
MODULE_DESCRIPTION("FP Software completion module");
+MODULE_LICENSE("GPL v2");
extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
extern long (*alpha_fp_emul) (unsigned long pc);
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a5459698f0ee..a598641eed98 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -96,11 +96,11 @@ menu "ARC Architecture Configuration"
menu "ARC Platform/SoC/Board"
-source "arch/arc/plat-sim/Kconfig"
source "arch/arc/plat-tb10x/Kconfig"
source "arch/arc/plat-axs10x/Kconfig"
#New platform adds here
source "arch/arc/plat-eznps/Kconfig"
+source "arch/arc/plat-hsdk/Kconfig"
endmenu
@@ -419,7 +419,7 @@ endif # ISA_ARCV2
endmenu # "ARC CPU Configuration"
config LINUX_LINK_BASE
- hex "Linux Link Address"
+ hex "Kernel link address"
default "0x80000000"
help
ARC700 divides the 32 bit phy address space into two equal halves
@@ -432,6 +432,14 @@ config LINUX_LINK_BASE
If you don't know what the above means, leave this setting alone.
This needs to match memory start address specified in Device Tree
+config LINUX_RAM_BASE
+ hex "RAM base address"
+ default LINUX_LINK_BASE
+ help
+ By default Linux is linked at base of RAM. However in some special
+ cases (such as HSDK), Linux can't be linked at start of DDR, hence
+ this option.
+
config HIGHMEM
bool "High Memory Support"
select ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 44ef35d33956..3a4b52b7e09d 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -107,10 +107,11 @@ core-y += arch/arc/
# w/o this dtb won't embed into kernel binary
core-y += arch/arc/boot/dts/
-core-$(CONFIG_ARC_PLAT_SIM) += arch/arc/plat-sim/
+core-y += arch/arc/plat-sim/
core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/
core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/
core-$(CONFIG_ARC_PLAT_EZNPS) += arch/arc/plat-eznps/
+core-$(CONFIG_ARC_SOC_HSDK) += arch/arc/plat-hsdk/
ifdef CONFIG_ARC_PLAT_EZNPS
KBUILD_CPPFLAGS += -I$(srctree)/arch/arc/plat-eznps/include
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index 53ce226f77a5..fdc266504ada 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -15,15 +15,15 @@
/ {
compatible = "snps,arc";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
cpu_card {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
core_clk: core_clk {
#clock-cells = <0>;
@@ -91,23 +91,21 @@
mb_intc: dw-apb-ictl@0xe0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
- reg = < 0xe0012000 0x200 >;
+ reg = < 0x0 0xe0012000 0x0 0x200 >;
interrupt-controller;
interrupt-parent = <&core_intc>;
interrupts = < 7 >;
};
memory {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x00000000 0x80000000 0x20000000>;
device_type = "memory";
- reg = <0x80000000 0x1b000000>; /* (512 - 32) MiB */
+ /* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+ reg = <0x0 0x80000000 0x0 0x1b000000>; /* (512 - 32) MiB */
};
reserved-memory {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
ranges;
/*
* We just move frame buffer area to the very end of
@@ -118,7 +116,7 @@
*/
frame_buffer: frame_buffer@9e000000 {
compatible = "shared-dma-pool";
- reg = <0x9e000000 0x2000000>;
+ reg = <0x0 0x9e000000 0x0 0x2000000>;
no-map;
};
};
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 14df46f141bf..4e6e9f57e790 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -14,20 +14,27 @@
/ {
compatible = "snps,arc";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
cpu_card {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
- core_clk: core_clk {
+ input_clk: input-clk {
#clock-cells = <0>;
compatible = "fixed-clock";
- clock-frequency = <90000000>;
+ clock-frequency = <33333333>;
+ };
+
+ core_clk: core-clk@80 {
+ compatible = "snps,axs10x-arc-pll-clock";
+ reg = <0x80 0x10>, <0x100 0x10>;
+ #clock-cells = <0>;
+ clocks = <&input_clk>;
};
core_intc: archs-intc@cpu {
@@ -94,30 +101,29 @@
mb_intc: dw-apb-ictl@0xe0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
- reg = < 0xe0012000 0x200 >;
+ reg = < 0x0 0xe0012000 0x0 0x200 >;
interrupt-controller;
interrupt-parent = <&core_intc>;
interrupts = < 24 >;
};
memory {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x00000000 0x80000000 0x40000000>;
device_type = "memory";
- reg = <0x80000000 0x20000000>; /* 512MiB */
+ /* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */
+ 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */
};
reserved-memory {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
ranges;
/*
* Move frame buffer out of IOC aperture (0x8z-0xAz).
*/
frame_buffer: frame_buffer@be000000 {
compatible = "shared-dma-pool";
- reg = <0xbe000000 0x2000000>;
+ reg = <0x0 0xbe000000 0x0 0x2000000>;
no-map;
};
};
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 695f9fa1996b..63954a8b0100 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -14,20 +14,27 @@
/ {
compatible = "snps,arc";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
cpu_card {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
- core_clk: core_clk {
+ input_clk: input-clk {
#clock-cells = <0>;
compatible = "fixed-clock";
- clock-frequency = <100000000>;
+ clock-frequency = <33333333>;
+ };
+
+ core_clk: core-clk@80 {
+ compatible = "snps,axs10x-arc-pll-clock";
+ reg = <0x80 0x10>, <0x100 0x10>;
+ #clock-cells = <0>;
+ clocks = <&input_clk>;
};
core_intc: archs-intc@cpu {
@@ -100,30 +107,29 @@
mb_intc: dw-apb-ictl@0xe0012000 {
#interrupt-cells = <1>;
compatible = "snps,dw-apb-ictl";
- reg = < 0xe0012000 0x200 >;
+ reg = < 0x0 0xe0012000 0x0 0x200 >;
interrupt-controller;
interrupt-parent = <&idu_intc>;
interrupts = <0>;
};
memory {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x00000000 0x80000000 0x40000000>;
device_type = "memory";
- reg = <0x80000000 0x20000000>; /* 512MiB */
+ /* CONFIG_LINUX_RAM_BASE needs to match low mem start */
+ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MiB low mem */
+ 0x1 0xc0000000 0x0 0x40000000>; /* 1 GiB highmem */
};
reserved-memory {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
ranges;
/*
* Move frame buffer out of IOC aperture (0x8z-0xAz).
*/
frame_buffer: frame_buffer@be000000 {
compatible = "shared-dma-pool";
- reg = <0xbe000000 0x2000000>;
+ reg = <0x0 0xbe000000 0x0 0x2000000>;
no-map;
};
};
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 41cfb29b62c1..2367a67c5f10 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -13,7 +13,7 @@
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0x00000000 0xe0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xe0000000 0x10000000>;
interrupt-parent = <&mb_intc>;
i2sclk: i2sclk@100a0 {
@@ -101,7 +101,6 @@
mmc@0x15000 {
compatible = "altr,socfpga-dw-mshc";
reg = < 0x15000 0x400 >;
- num-slots = < 1 >;
fifo-depth = < 16 >;
card-detect-delay = < 200 >;
clocks = <&apbclk>, <&mmcclk>;
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
new file mode 100644
index 000000000000..229d13adbce4
--- /dev/null
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Device Tree for ARC HS Development Kit
+ */
+/dts-v1/;
+
+#include <dt-bindings/net/ti-dp83867.h>
+
+/ {
+ model = "snps,hsdk";
+ compatible = "snps,hsdk";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ chosen {
+ bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "snps,archs38";
+ reg = <0>;
+ clocks = <&core_clk>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "snps,archs38";
+ reg = <1>;
+ clocks = <&core_clk>;
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "snps,archs38";
+ reg = <2>;
+ clocks = <&core_clk>;
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "snps,archs38";
+ reg = <3>;
+ clocks = <&core_clk>;
+ };
+ };
+
+ core_clk: core-clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <500000000>;
+ };
+
+ cpu_intc: cpu-interrupt-controller {
+ compatible = "snps,archs-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ idu_intc: idu-interrupt-controller {
+ compatible = "snps,archs-idu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ };
+
+ arcpct: pct {
+ compatible = "snps,archs-pct";
+ };
+
+ /* TIMER0 with interrupt for clockevent */
+ timer {
+ compatible = "snps,arc-timer";
+ interrupts = <16>;
+ interrupt-parent = <&cpu_intc>;
+ clocks = <&core_clk>;
+ };
+
+ /* 64-bit Global Free Running Counter */
+ gfrc {
+ compatible = "snps,archs-timer-gfrc";
+ clocks = <&core_clk>;
+ };
+
+ soc {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&idu_intc>;
+
+ ranges = <0x00000000 0xf0000000 0x10000000>;
+
+ serial: serial@5000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x5000 0x100>;
+ clock-frequency = <33330000>;
+ interrupts = <6>;
+ baud = <115200>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+ gmacclk: gmacclk {
+ compatible = "fixed-clock";
+ clock-frequency = <400000000>;
+ #clock-cells = <0>;
+ };
+
+ mmcclk_ciu: mmcclk-ciu {
+ compatible = "fixed-clock";
+ clock-frequency = <100000000>;
+ #clock-cells = <0>;
+ };
+
+ mmcclk_biu: mmcclk-biu {
+ compatible = "fixed-clock";
+ clock-frequency = <400000000>;
+ #clock-cells = <0>;
+ };
+
+ ethernet@8000 {
+ #interrupt-cells = <1>;
+ compatible = "snps,dwmac";
+ reg = <0x8000 0x2000>;
+ interrupts = <10>;
+ interrupt-names = "macirq";
+ phy-mode = "rgmii";
+ snps,pbl = <32>;
+ clocks = <&gmacclk>;
+ clock-names = "stmmaceth";
+ phy-handle = <&phy0>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwmac-mdio";
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
+ ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_00_NS>;
+ ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+ };
+ };
+ };
+
+ ohci@60000 {
+ compatible = "snps,hsdk-v1.0-ohci", "generic-ohci";
+ reg = <0x60000 0x100>;
+ interrupts = <15>;
+ };
+
+ ehci@40000 {
+ compatible = "snps,hsdk-v1.0-ehci", "generic-ehci";
+ reg = <0x40000 0x100>;
+ interrupts = <15>;
+ };
+
+ mmc@a000 {
+ compatible = "altr,socfpga-dw-mshc";
+ reg = <0xa000 0x400>;
+ num-slots = <1>;
+ fifo-depth = <16>;
+ card-detect-delay = <200>;
+ clocks = <&mmcclk_biu>, <&mmcclk_ciu>;
+ clock-names = "biu", "ciu";
+ interrupts = <12>;
+ bus-width = <4>;
+ };
+ };
+
+ memory@80000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "memory";
+ reg = <0x80000000 0x40000000>; /* 1 GiB */
+ };
+};
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index 3772c40c245e..8d787b251f73 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -18,7 +18,7 @@
memory {
device_type = "memory";
- /* CONFIG_LINUX_LINK_BASE needs to match low mem start */
+ /* CONFIG_LINUX_RAM_BASE needs to match low mem start */
reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MB low mem */
0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */
};
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
index 459fc656b759..48bb4b4cd234 100644
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -104,7 +104,6 @@
mmc@0x15000 {
compatible = "snps,dw-mshc";
reg = <0x15000 0x400>;
- num-slots = <1>;
fifo-depth = <1024>;
card-detect-delay = <200>;
clocks = <&apbclk>, <&mmcclk>;
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index 57b3e599322f..db04ea4dd2d9 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -21,7 +21,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs"
CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index f85985adebb2..821a2e562f3f 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -23,7 +23,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
CONFIG_ARC_BUILTIN_DTB_NAME="haps_hs_idu"
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
new file mode 100644
index 000000000000..9a3fcf446388
--- /dev/null
+++ b/arch/arc/configs/hsdk_defconfig
@@ -0,0 +1,80 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_ARC_SOC_HSDK=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_LINUX_LINK_BASE=0x90000000
+CONFIG_LINUX_RAM_BASE=0x80000000
+CONFIG_ARC_BUILTIN_DTB_NAME="hsdk"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_STMMAC_ETH=y
+CONFIG_MICREL_PHY=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FB_UDL=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_DW=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT3_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
+CONFIG_CRYPTO_ECHAINIV=y
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index ede625c76216..7c9c706ae7f6 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -39,7 +39,6 @@ CONFIG_IP_PNP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index b0066a749d4c..6dff83a238b8 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -23,7 +23,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index ebe9ebb92933..31ee51b987e7 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -26,7 +26,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs"
CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 4bde43278be6..8d3b1f67cae4 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -24,7 +24,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index f6fb3d26557e..6168ce2ac2ef 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -23,7 +23,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
# CONFIG_COMPACTION is not set
CONFIG_NET=y
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index b9f0fe00044b..a70bdeb2b3fd 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -23,7 +23,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs"
# CONFIG_COMPACTION is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 155add7761ed..ef96406c446e 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -18,7 +18,6 @@ CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_SIM=y
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
# CONFIG_ARC_TIMERS_64BIT is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 4c5118384eb5..f30182549395 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -38,7 +38,6 @@ CONFIG_IP_MULTICAST=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 54b54da6384c..11859287c52a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i)
atomic_ops_unlock(flags);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#endif
/*
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 19ebddffb279..8486f328cc5d 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -47,7 +47,8 @@
: "r"(data), "r"(ptr)); \
})
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+/* Largest line length for either L1 or L2 is 128 bytes */
+#define ARCH_DMA_MINALIGN 128
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
@@ -95,8 +96,12 @@ extern unsigned long perip_base, perip_end;
#define ARC_REG_SLC_CTRL 0x903
#define ARC_REG_SLC_FLUSH 0x904
#define ARC_REG_SLC_INVALIDATE 0x905
+#define ARC_AUX_SLC_IVDL 0x910
+#define ARC_AUX_SLC_FLDL 0x912
#define ARC_REG_SLC_RGN_START 0x914
+#define ARC_REG_SLC_RGN_START1 0x915
#define ARC_REG_SLC_RGN_END 0x916
+#define ARC_REG_SLC_RGN_END1 0x917
/* Bit val in SLC_CONTROL */
#define SLC_CTRL_DIS 0x001
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 14c310f2e0b1..ec36d5b6d435 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -192,6 +192,12 @@
PUSHAX lp_start
PUSHAX erbta
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ .word CTOP_INST_SCHD_RW
+ PUSHAX CTOP_AUX_GPA1
+ PUSHAX CTOP_AUX_EFLAGS
+#endif
+
lr r9, [ecr]
st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */
.endm
@@ -208,6 +214,12 @@
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro EXCEPTION_EPILOGUE
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ .word CTOP_INST_SCHD_RW
+ POPAX CTOP_AUX_EFLAGS
+ POPAX CTOP_AUX_GPA1
+#endif
+
POPAX erbta
POPAX lp_start
POPAX lp_end
@@ -265,6 +277,12 @@
PUSHAX lp_end
PUSHAX lp_start
PUSHAX bta_l\LVL\()
+
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ .word CTOP_INST_SCHD_RW
+ PUSHAX CTOP_AUX_GPA1
+ PUSHAX CTOP_AUX_EFLAGS
+#endif
.endm
/*--------------------------------------------------------------
@@ -277,6 +295,12 @@
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro INTERRUPT_EPILOGUE LVL
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ .word CTOP_INST_SCHD_RW
+ POPAX CTOP_AUX_EFLAGS
+ POPAX CTOP_AUX_GPA1
+#endif
+
POPAX bta_l\LVL\()
POPAX lp_start
POPAX lp_end
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 11e1b1f3acda..eb887dd13e74 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -73,20 +73,11 @@
#endif
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
-
#ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
@@ -118,30 +109,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
preempt_enable();
#endif
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (oldval == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (oldval != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (oldval < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (oldval >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (oldval <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (oldval > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index a64c447b0337..8a4f77ea3238 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -47,9 +47,6 @@
#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \
(ARCV2_IRQ_DEF_PRIO << 1))
-/* SLEEP needs default irq priority (<=) which can interrupt the doze */
-#define ISA_SLEEP_ARG (0x10 | ARCV2_IRQ_DEF_PRIO)
-
#ifndef __ASSEMBLY__
/*
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 4c6eed80cd8b..fcb80171fc34 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -43,8 +43,6 @@
#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
-#define ISA_SLEEP_ARG 0x3
-
#ifndef __ASSEMBLY__
/******************************************************************
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index db7319e9b506..efb79fafff1d 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -94,6 +94,8 @@ static inline int is_pae40_enabled(void)
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
+extern int pae40_exist_but_not_enab(void);
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 296c3426a6ad..109baa06831c 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -85,7 +85,7 @@ typedef pte_t * pgtable_t;
*/
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_LINK_BASE)
+#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_RAM_BASE)
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 4104a0839214..d400a2161935 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -27,6 +27,13 @@ struct arc_fpu {
};
#endif
+#ifdef CONFIG_ARC_PLAT_EZNPS
+struct eznps_dp {
+ unsigned int eflags;
+ unsigned int gpa1;
+};
+#endif
+
/* Arch specific stuff which needs to be saved per task.
* However these items are not so important so as to earn a place in
* struct thread_info
@@ -38,6 +45,9 @@ struct thread_struct {
#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
struct arc_fpu fpu;
#endif
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ struct eznps_dp dp;
+#endif
};
#define INIT_THREAD { \
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 5297faa8a378..5a8cb22724a1 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -19,6 +19,11 @@
#ifdef CONFIG_ISA_ARCOMPACT
struct pt_regs {
+#ifdef CONFIG_ARC_PLAT_EZNPS
+ unsigned long eflags; /* Extended FLAGS */
+ unsigned long gpa1; /* General Purpose Aux */
+#endif
+
/* Real registers */
unsigned long bta; /* bta_l1, bta_l2, erbta */
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 233d5ffe6ec7..47efc8451b70 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -16,11 +16,6 @@
#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
#ifdef CONFIG_ARC_HAS_LLSC
static inline void arch_spin_lock(arch_spinlock_t *lock)
@@ -252,9 +247,15 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
__asm__ __volatile__(
"1: ex %0, [%1] \n"
+#ifdef CONFIG_EZNPS_MTM_EXT
+ " .word %3 \n"
+#endif
" breq %0, %2, 1b \n"
: "+&r" (val)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
+#ifdef CONFIG_EZNPS_MTM_EXT
+ , "i"(CTOP_INST_SCHD_RW)
+#endif
: "memory");
/*
@@ -296,6 +297,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
*/
smp_mb();
+ /*
+ * EX is not really required here, a simple STore of 0 suffices.
+ * However this causes tasklist livelocks in SystemC based SMP virtual
+ * platforms where the systemc core scheduler uses EX as a cue for
+ * moving to next core. Do a git log of this file for details
+ */
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r" (val)
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index 1b171ab5fec0..f7d07feeea61 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -26,10 +26,19 @@ extern void fpu_save_restore(struct task_struct *p, struct task_struct *n);
#endif /* !CONFIG_ARC_FPU_SAVE_RESTORE */
+#ifdef CONFIG_ARC_PLAT_EZNPS
+extern void dp_save_restore(struct task_struct *p, struct task_struct *n);
+#define ARC_EZNPS_DP_PREV(p, n) dp_save_restore(p, n)
+#else
+#define ARC_EZNPS_DP_PREV(p, n)
+
+#endif /* !CONFIG_ARC_PLAT_EZNPS */
+
struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
#define switch_to(prev, next, last) \
do { \
+ ARC_EZNPS_DP_PREV(prev, next); \
ARC_FPU_PREV(prev, next); \
last = __switch_to(prev, next);\
ARC_FPU_NEXT(next); \
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8942c5c3b4c5..2dc5f4296d44 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -12,7 +12,6 @@ obj-y := arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o
obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o
-obj-$(CONFIG_PCI) += pcibios.o
obj-$(CONFIG_MODULES) += arcksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index 3b67f538f142..521ef3521a1c 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -29,8 +29,9 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
{
if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x"))
arc_base_baud = 166666666; /* Fixed 166.6MHz clk (TB10x) */
- else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
- arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */
+ else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp") ||
+ of_flat_dt_is_compatible(dt_root, "snps,hsdk"))
+ arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x & HSDK) */
else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
arc_base_baud = 800000000; /* Fixed 800MHz clk (NPS) */
else
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 9211707634dc..f285dbb28066 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -25,12 +25,12 @@
*
* vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
* -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't
+ * -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't
* need ptregs anymore
*
* Vineetg: Oct 2009
* -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains
+ * out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains
* active (AE bit enabled). This causes a double fault for a subseq valid
* exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
* Instr Error could also cause similar scenario, so same there as well.
@@ -59,7 +59,7 @@
*/
#include <linux/errno.h>
-#include <linux/linkage.h> /* {EXTRY,EXIT} */
+#include <linux/linkage.h> /* {ENTRY,EXIT} */
#include <asm/entry.h>
#include <asm/irqflags.h>
@@ -80,8 +80,8 @@
.align 4
/* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are gauranteed that 'j somewhere'
- * will use the 'j limm' form of the intrsuction as long as somewhere is in
+ * across sections (.vector to .text) we are guaranteed that 'j somewhere'
+ * will use the 'j limm' form of the instruction as long as somewhere is in
* a section other than .vector.
*/
@@ -105,13 +105,13 @@ VECTOR handle_interrupt_level1 ; Other devices
; ******************** Exceptions **********************
VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20)
-VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21)
+VECTOR EV_TLBMissI ; 0x108, Instruction TLB miss (0x21)
VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22)
VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23)
; or Misaligned Access
VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24)
VECTOR EV_Trap ; 0x128, Trap exception (0x25)
-VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26)
+VECTOR EV_Extension ; 0x130, Extn Instruction Excp (0x26)
.rept 24
VECTOR reserved ; Reserved Exceptions
@@ -199,7 +199,7 @@ END(handle_interrupt_level2)
; ---------------------------------------------
; User Mode Memory Bus Error Interrupt Handler
-; (Kernel mode memory errors handled via seperate exception vectors)
+; (Kernel mode memory errors handled via separate exception vectors)
; ---------------------------------------------
ENTRY(mem_service)
@@ -273,7 +273,7 @@ ENTRY(EV_TLBProtV)
;------ (5) Type of Protection Violation? ----------
;
; ProtV Hardware Exception is triggered for Access Faults of 2 types
- ; -Access Violaton : 00_23_(00|01|02|03)_00
+ ; -Access Violation : 00_23_(00|01|02|03)_00
; x r w r+w
; -Unaligned Access : 00_23_04_00
;
@@ -327,7 +327,7 @@ END(call_do_page_fault)
.Lrestore_regs:
- # Interrpts are actually disabled from this point on, but will get
+ # Interrupts are actually disabled from this point on, but will get
# reenabled after we return from interrupt/exception.
# But irq tracer needs to be told now...
TRACE_ASM_IRQ_ENABLE
@@ -335,7 +335,7 @@ END(call_do_page_fault)
lr r10, [status32]
; Restore REG File. In case multiple Events outstanding,
- ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
+ ; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None
; Note that we use realtime STATUS32 (not pt_regs->status32) to
; decide that.
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1eea99beecc3..85d9ea4a0acc 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -92,6 +92,12 @@ ENTRY(EV_MachineCheck)
lr r0, [efa]
mov r1, sp
+ ; hardware auto-disables MMU, re-enable it to allow kernel vaddr
+ ; access for say stack unwinding of modules for crash dumps
+ lr r3, [ARC_REG_PID]
+ or r3, r3, MMU_ENABLE
+ sr r3, [ARC_REG_PID]
+
lsr r3, r2, 8
bmsk r3, r3, 7
brne r3, ECR_C_MCHK_DUP_TLB, 1f
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index f928795fd07a..067ea362fb3e 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -75,10 +75,20 @@ void arc_init_IRQ(void)
* Set a default priority for all available interrupts to prevent
* switching of register banks if Fast IRQ and multiple register banks
* are supported by CPU.
+ * Also disable private-per-core IRQ lines so faulty external HW won't
+ * trigger interrupt that kernel is not ready to handle.
*/
for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) {
write_aux_reg(AUX_IRQ_SELECT, i);
write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
+
+ /*
+ * Only mask cpu private IRQs here.
+ * "common" interrupts are masked at IDU, otherwise it would
+ * need to be unmasked at each cpu, with IPIs
+ */
+ if (i < FIRST_EXT_IRQ)
+ write_aux_reg(AUX_IRQ_ENABLE, 0);
}
/* setup status32, don't enable intr yet as kernel doesn't want */
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 7e608c6b0a01..47b421fa0147 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -27,7 +27,7 @@
*/
void arc_init_IRQ(void)
{
- int level_mask = 0;
+ unsigned int level_mask = 0, i;
/* Is timer high priority Interrupt (Level2 in ARCompact jargon) */
level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ;
@@ -40,6 +40,18 @@ void arc_init_IRQ(void)
if (level_mask)
pr_info("Level-2 interrupts bitset %x\n", level_mask);
+
+ /*
+ * Disable all IRQ lines so faulty external hardware won't
+ * trigger interrupt that kernel is not ready to handle.
+ */
+ for (i = TIMER0_IRQ; i < NR_CPU_IRQS; i++) {
+ unsigned int ienb;
+
+ ienb = read_aux_reg(AUX_IENABLE);
+ ienb &= ~(1 << i);
+ write_aux_reg(AUX_IENABLE, ienb);
+ }
}
/*
diff --git a/arch/arc/kernel/pcibios.c b/arch/arc/kernel/pcibios.c
deleted file mode 100644
index 72e1d73d0bd6..000000000000
--- a/arch/arc/kernel/pcibios.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/pci.h>
-
-/*
- * We don't have to worry about legacy ISA devices, so nothing to do here
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 2a018de6d6cd..5ac3b547453f 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -79,15 +79,40 @@ done:
return uval;
}
+#ifdef CONFIG_ISA_ARCV2
+
void arch_cpu_idle(void)
{
- /* sleep, but enable all interrupts before committing */
+ /* Re-enable interrupts <= default irq priority before commiting SLEEP */
+ const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
+
__asm__ __volatile__(
"sleep %0 \n"
:
- :"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */
+ :"I"(arg)); /* can't be "r" has to be embedded const */
+}
+
+#elif defined(CONFIG_EZNPS_MTM_EXT) /* ARC700 variant in NPS */
+
+void arch_cpu_idle(void)
+{
+ /* only the calling HW thread needs to sleep */
+ __asm__ __volatile__(
+ ".word %0 \n"
+ :
+ :"i"(CTOP_INST_HWSCHD_WFT_IE12));
+}
+
+#else /* ARC700 */
+
+void arch_cpu_idle(void)
+{
+ /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+ __asm__ __volatile__("sleep 0x3 \n");
}
+#endif
+
asmlinkage void ret_from_fork(void);
/*
@@ -209,6 +234,10 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp)
*/
regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS;
+#ifdef CONFIG_EZNPS_MTM_EXT
+ regs->eflags = 0;
+#endif
+
/* bogus seed values for debugging */
regs->lp_start = 0x10;
regs->lp_end = 0x80;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 666613fde91d..c4ffb441716c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -385,13 +385,13 @@ void setup_processor(void)
read_arc_build_cfg_regs();
arc_init_IRQ();
- printk(arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+ pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
arc_mmu_init();
arc_cache_init();
- printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
- printk(arc_platform_smp_cpuinfo());
+ pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
+ pr_info("%s", arc_platform_smp_cpuinfo());
arc_chk_core_config();
}
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index ff83e78d0cfb..bcd7c9fc5d0f 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -80,7 +80,7 @@ int name(unsigned long address, struct pt_regs *regs) \
DO_ERROR_INFO(SIGILL, "Priv Op/Disabled Extn", do_privilege_fault, ILL_PRVOPC)
DO_ERROR_INFO(SIGILL, "Invalid Extn Insn", do_extension_fault, ILL_ILLOPC)
DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
-DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
+DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
@@ -103,7 +103,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs,
*/
void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
{
- die("Machine Check Exception", regs, address);
+ die("Unhandled Machine Check Exception", regs, address);
}
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index f9caf79186d4..7e94476f3994 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -140,7 +140,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
} else if (vec == ECR_V_ITLB_MISS) {
pr_cont("Insn could not be fetched\n");
} else if (vec == ECR_V_MACH_CHK) {
- pr_cont("%s\n", (cause_code == 0x0) ?
+ pr_cont("Machine Check (%s)\n", (cause_code == 0x0) ?
"Double Fault" : "Other Fatal Err");
} else if (vec == ECR_V_PROTV) {
@@ -233,6 +233,9 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
{
current->thread.fault_address = address;
+ /* Show fault description */
+ pr_info("\n%s\n", str);
+
/* Caller and Callee regs */
show_regs(regs);
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index a867575a758b..eee924dfffa6 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -652,7 +652,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
#endif /* CONFIG_ARC_HAS_ICACHE */
-noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
+noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
{
#ifdef CONFIG_ISA_ARCV2
/*
@@ -665,6 +665,7 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
static DEFINE_SPINLOCK(lock);
unsigned long flags;
unsigned int ctrl;
+ phys_addr_t end;
spin_lock_irqsave(&lock, flags);
@@ -694,8 +695,19 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
* END needs to be setup before START (latter triggers the operation)
* END can't be same as START, so add (l2_line_sz - 1) to sz
*/
- write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
- write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+ end = paddr + sz + l2_line_sz - 1;
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end));
+
+ write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end));
+
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr));
+
+ write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr));
+
+ /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+ read_aux_reg(ARC_REG_SLC_CTRL);
while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
@@ -703,6 +715,58 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
#endif
}
+noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+ /*
+ * SLC is shared between all cores and concurrent aux operations from
+ * multiple cores need to be serialized using a spinlock
+ * A concurrent operation can be silently ignored and/or the old/new
+ * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+ * below)
+ */
+ static DEFINE_SPINLOCK(lock);
+
+ const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1);
+ unsigned int ctrl, cmd;
+ unsigned long flags;
+ int num_lines;
+
+ spin_lock_irqsave(&lock, flags);
+
+ ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+ /* Don't rely on default value of IM bit */
+ if (!(op & OP_FLUSH)) /* i.e. OP_INV */
+ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */
+ else
+ ctrl |= SLC_CTRL_IM;
+
+ write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+ cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL;
+
+ sz += paddr & ~SLC_LINE_MASK;
+ paddr &= SLC_LINE_MASK;
+
+ num_lines = DIV_ROUND_UP(sz, l2_line_sz);
+
+ while (num_lines-- > 0) {
+ write_aux_reg(cmd, paddr);
+ paddr += l2_line_sz;
+ }
+
+ /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+ read_aux_reg(ARC_REG_SLC_CTRL);
+
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+ spin_unlock_irqrestore(&lock, flags);
+#endif
+}
+
+#define slc_op(paddr, sz, op) slc_op_rgn(paddr, sz, op)
+
noinline static void slc_entire_op(const int op)
{
unsigned int ctrl, r = ARC_REG_SLC_CTRL;
@@ -1083,7 +1147,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
*/
noinline void __init arc_ioc_setup(void)
{
- unsigned int ap_sz;
+ unsigned int ioc_base, mem_sz;
/* Flush + invalidate + disable L1 dcache */
__dc_disable();
@@ -1092,18 +1156,29 @@ noinline void __init arc_ioc_setup(void)
if (read_aux_reg(ARC_REG_SLC_BCR))
slc_entire_op(OP_FLUSH_N_INV);
- /* IOC Aperture start: TDB: handle non default CONFIG_LINUX_LINK_BASE */
- write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
-
/*
- * IOC Aperture size:
- * decoded as 2 ^ (SIZE + 2) KB: so setting 0x11 implies 512M
+ * currently IOC Aperture covers entire DDR
* TBD: fix for PGU + 1GB of low mem
* TBD: fix for PAE
*/
- ap_sz = order_base_2(arc_get_mem_sz()/1024) - 2;
- write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, ap_sz);
+ mem_sz = arc_get_mem_sz();
+
+ if (!is_power_of_2(mem_sz) || mem_sz < 4096)
+ panic("IOC Aperture size must be power of 2 larger than 4KB");
+
+ /*
+ * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
+ * so setting 0x11 implies 512MB, 0x12 implies 1GB...
+ */
+ write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2);
+
+ /* for now assume kernel base is start of IOC aperture */
+ ioc_base = CONFIG_LINUX_RAM_BASE;
+
+ if (ioc_base % mem_sz != 0)
+ panic("IOC Aperture start must be aligned to the size of the aperture");
+ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12);
write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
@@ -1111,6 +1186,13 @@ noinline void __init arc_ioc_setup(void)
__dc_enable();
}
+/*
+ * Cache related boot time checks/setups only needed on master CPU:
+ * - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES)
+ * Assume SMP only, so all cores will have same cache config. A check on
+ * one core suffices for all
+ * - IOC setup / dma callbacks only need to be done once
+ */
void __init arc_cache_init_master(void)
{
unsigned int __maybe_unused cpu = smp_processor_id();
@@ -1188,14 +1270,29 @@ void __ref arc_cache_init(void)
unsigned int __maybe_unused cpu = smp_processor_id();
char str[256];
- printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+ pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
- /*
- * Only master CPU needs to execute rest of function:
- * - Assume SMP so all cores will have same cache config so
- * any geomtry checks will be same for all
- * - IOC setup / dma callbacks only need to be setup once
- */
if (!cpu)
arc_cache_init_master();
+
+ /*
+ * In PAE regime, TLB and cache maintenance ops take wider addresses
+ * And even if PAE is not enabled in kernel, the upper 32-bits still need
+ * to be zeroed to keep the ops sane.
+ * As an optimization for more common !PAE enabled case, zero them out
+ * once at init, rather than checking/setting to 0 for every runtime op
+ */
+ if (is_isa_arcv2() && pae40_exist_but_not_enab()) {
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE))
+ write_aux_reg(ARC_REG_IC_PTAG_HI, 0);
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE))
+ write_aux_reg(ARC_REG_DC_PTAG_HI, 0);
+
+ if (l2_line_sz) {
+ write_aux_reg(ARC_REG_SLC_RGN_END1, 0);
+ write_aux_reg(ARC_REG_SLC_RGN_START1, 0);
+ }
+ }
}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 71d3efff99d3..e9d93604ad0f 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -153,6 +153,19 @@ static void _dma_cache_sync(phys_addr_t paddr, size_t size,
}
}
+/*
+ * arc_dma_map_page - map a portion of a page for streaming DMA
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed. The CPU
+ * can regain ownership by calling dma_unmap_page().
+ *
+ * Note: while it takes struct page as arg, caller can "abuse" it to pass
+ * a region larger than PAGE_SIZE, provided it is physically contiguous
+ * and this still works correctly
+ */
static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
unsigned long attrs)
@@ -165,6 +178,24 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
return plat_phys_to_dma(dev, paddr);
}
+/*
+ * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ *
+ * Note: historically this routine was not implemented for ARC
+ */
+static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ phys_addr_t paddr = plat_dma_to_phys(dev, handle);
+
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ _dma_cache_sync(paddr, size, dir);
+}
+
static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
@@ -178,6 +209,18 @@ static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
return nents;
}
+static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i)
+ arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir,
+ attrs);
+}
+
static void arc_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
{
@@ -223,7 +266,9 @@ const struct dma_map_ops arc_dma_ops = {
.free = arc_dma_free,
.mmap = arc_dma_mmap,
.map_page = arc_dma_map_page,
+ .unmap_page = arc_dma_unmap_page,
.map_sg = arc_dma_map_sg,
+ .unmap_sg = arc_dma_unmap_sg,
.sync_single_for_device = arc_dma_sync_single_for_device,
.sync_single_for_cpu = arc_dma_sync_single_for_cpu,
.sync_sg_for_cpu = arc_dma_sync_sg_for_cpu,
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 162c97528872..a0b7bd6d030d 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -207,7 +207,7 @@ no_context:
/* Are we prepared to handle this kernel fault?
*
* (The kernel has valid exception-points in the source
- * when it acesses user-memory. When it fails in one
+ * when it accesses user-memory. When it fails in one
* of those points, we find it in a table and do a jump
* to some fixup code that loads an appropriate error
* code)
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 8c9415ed6280..ba145065c579 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -26,7 +26,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
EXPORT_SYMBOL(empty_zero_page);
-static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE;
+static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
static unsigned long low_mem_sz;
#ifdef CONFIG_HIGHMEM
@@ -63,7 +63,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
if (!low_mem_sz) {
if (base != low_mem_start)
- panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+ panic("CONFIG_LINUX_RAM_BASE != DT memory { }");
low_mem_sz = size;
in_use = 1;
@@ -161,7 +161,7 @@ void __init setup_arch_memory(void)
* We can't use the helper free_area_init(zones[]) because it uses
* PAGE_OFFSET to compute the @min_low_pfn which would be wrong
* when our kernel doesn't start at PAGE_OFFSET, i.e.
- * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE
+ * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE
*/
free_area_init_node(0, /* node-id */
zones_size, /* num pages per zone */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index d0126fdfe2d8..8ceefbf72fb0 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -104,6 +104,8 @@
/* A copy of the ASID from the PID reg is kept in asid_cache */
DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
+static int __read_mostly pae_exists;
+
/*
* Utility Routine to erase a J-TLB entry
* Caller needs to setup Index Reg (manually or via getIndex)
@@ -784,7 +786,7 @@ void read_decode_mmu_bcr(void)
mmu->u_dtlb = mmu4->u_dtlb * 4;
mmu->u_itlb = mmu4->u_itlb * 4;
mmu->sasid = mmu4->sasid;
- mmu->pae = mmu4->pae;
+ pae_exists = mmu->pae = mmu4->pae;
}
}
@@ -809,12 +811,17 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
return buf;
}
+int pae40_exist_but_not_enab(void)
+{
+ return pae_exists && !is_pae40_enabled();
+}
+
void arc_mmu_init(void)
{
char str[256];
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
- printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
+ pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
/*
* Can't be done in processor.h due to header include depenedencies
@@ -859,6 +866,9 @@ void arc_mmu_init(void)
/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
#endif
+
+ if (pae40_exist_but_not_enab())
+ write_aux_reg(ARC_REG_TLBPD1HI, 0);
}
/*
@@ -898,9 +908,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
local_irq_save(flags);
- /* re-enable the MMU */
- write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
-
/* loop thru all sets of TLB */
for (set = 0; set < mmu->sets; set++) {
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index b30e4e36bb00..0e1e47a67c73 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -274,6 +274,13 @@ ex_saved_reg1:
.macro COMMIT_ENTRY_TO_MMU
#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_EZNPS_MTM_EXT
+ /* verify if entry for this vaddr+ASID already exists */
+ sr TLBProbe, [ARC_REG_TLBCOMMAND]
+ lr r0, [ARC_REG_TLBINDEX]
+ bbit0 r0, 31, 88f
+#endif
+
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -287,6 +294,8 @@ ex_saved_reg1:
#else
sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
#endif
+
+88:
.endm
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 38ff349d7f2a..f1ac6790da5f 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -80,22 +80,6 @@ static void __init axs10x_enable_gpio_intc_wire(void)
iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN);
}
-static inline void __init
-write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg)
-{
- unsigned int loops = 128 * 1024, ctr;
-
- iowrite32(value, reg);
-
- ctr = loops;
- while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */
- cpu_relax();
-
- ctr = loops;
- while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */
- cpu_relax();
-}
-
static void __init axs10x_print_board_ver(unsigned int creg, const char *str)
{
union ver {
@@ -314,7 +298,6 @@ static void __init axs101_early_init(void)
#ifdef CONFIG_AXS103
-#define AXC003_CGU 0xF0000000
#define AXC003_CREG 0xF0001000
#define AXC003_MST_AXI_TUNNEL 0
#define AXC003_MST_HS38 1
@@ -324,131 +307,38 @@ static void __init axs101_early_init(void)
#define CREG_CPU_TUN_IO_CTRL (AXC003_CREG + 0x494)
-union pll_reg {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6;
-#else
- unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17;
-#endif
- };
- unsigned int val;
-};
-
-static unsigned int __init axs103_get_freq(void)
-{
- union pll_reg idiv, fbdiv, odiv;
- unsigned int f = 33333333;
-
- idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0);
- fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4);
- odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8);
-
- if (idiv.bypass != 1)
- f = f / (idiv.low + idiv.high);
-
- if (fbdiv.bypass != 1)
- f = f * (fbdiv.low + fbdiv.high);
-
- if (odiv.bypass != 1)
- f = f / (odiv.low + odiv.high);
-
- f = (f + 500000) / 1000000; /* Rounding */
- return f;
-}
-
-static inline unsigned int __init encode_div(unsigned int id, int upd)
-{
- union pll_reg div;
-
- div.val = 0;
-
- div.noupd = !upd;
- div.bypass = id == 1 ? 1 : 0;
- div.edge = (id%2 == 0) ? 0 : 1; /* 0 = rising */
- div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1;
- div.high = id >> 1;
-
- return div.val;
-}
-
-noinline static void __init
-axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
-{
- write_cgu_reg(encode_div(id, 0),
- (void __iomem *)AXC003_CGU + 0x80 + 0,
- (void __iomem *)AXC003_CGU + 0x110);
-
- write_cgu_reg(encode_div(fd, 0),
- (void __iomem *)AXC003_CGU + 0x80 + 4,
- (void __iomem *)AXC003_CGU + 0x110);
-
- write_cgu_reg(encode_div(od, 1),
- (void __iomem *)AXC003_CGU + 0x80 + 8,
- (void __iomem *)AXC003_CGU + 0x110);
-}
-
static void __init axs103_early_init(void)
{
- int offset = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
- const struct fdt_property *prop = fdt_get_property(initial_boot_params,
- offset,
- "clock-frequency",
- NULL);
- u32 freq = be32_to_cpu(*(u32*)(prop->data)) / 1000000, orig = freq;
-
+#ifdef CONFIG_ARC_MCIP
/*
* AXS103 configurations for SMP/QUAD configurations share device tree
- * which defaults to 90 MHz. However recent failures of Quad config
+ * which defaults to 100 MHz. However recent failures of Quad config
* revealed P&R timing violations so clamp it down to safe 50 MHz
* Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
- *
- * This hack is really hacky as of now. Fix it properly by getting the
- * number of cores as return value of platform's early SMP callback
+ * of fudging the freq in DT
*/
-#ifdef CONFIG_ARC_MCIP
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
- if (num_cores > 2)
- freq = 50;
-#endif
-
- switch (freq) {
- case 33:
- axs103_set_freq(1, 1, 1);
- break;
- case 50:
- axs103_set_freq(1, 30, 20);
- break;
- case 75:
- axs103_set_freq(2, 45, 10);
- break;
- case 90:
- axs103_set_freq(2, 54, 10);
- break;
- case 100:
- axs103_set_freq(1, 30, 10);
- break;
- case 125:
- axs103_set_freq(2, 45, 6);
- break;
- default:
+ if (num_cores > 2) {
+ u32 freq = 50, orig;
/*
- * In this case, core_frequency derived from
- * DT "clock-frequency" might not match with board value.
- * Hence update it to match the board value.
+ * TODO: use cpu node "cpu-freq" param instead of platform-specific
+ * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
*/
- freq = axs103_get_freq();
- break;
- }
-
- pr_info("Freq is %dMHz\n", freq);
-
- /* Patching .dtb in-place with new core clock value */
- if (freq != orig ) {
- freq = cpu_to_be32(freq * 1000000);
- fdt_setprop_inplace(initial_boot_params, offset,
- "clock-frequency", &freq, sizeof(freq));
+ int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
+ const struct fdt_property *prop;
+
+ prop = fdt_get_property(initial_boot_params, off,
+ "clock-frequency", NULL);
+ orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
+
+ /* Patching .dtb in-place with new core clock value */
+ if (freq != orig ) {
+ freq = cpu_to_be32(freq * 1000000);
+ fdt_setprop_inplace(initial_boot_params, off,
+ "clock-frequency", &freq, sizeof(freq));
+ }
}
+#endif
/* Memory maps already config in pre-bootloader */
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
index 1595a38e50cd..e151e2067886 100644
--- a/arch/arc/plat-eznps/Kconfig
+++ b/arch/arc/plat-eznps/Kconfig
@@ -12,8 +12,8 @@ menuconfig ARC_PLAT_EZNPS
help
Support for EZchip development platforms,
based on ARC700 cores.
- We handle few flavours:
- - Hardware Emulator AKA HE which is FPGA based chasis
+ We handle few flavors:
+ - Hardware Emulator AKA HE which is FPGA based chassis
- Simulator based on MetaWare nSIM
- NPS400 chip based on ASIC
@@ -32,3 +32,25 @@ config EZNPS_MTM_EXT
any of them seem like CPU from Linux point of view.
All threads within same core share the execution unit of the
core and HW scheduler round robin between them.
+
+config EZNPS_MEM_ERROR_ALIGN
+ bool "ARC-EZchip Memory error as an exception"
+ depends on EZNPS_MTM_EXT
+ default n
+ help
+ On the real chip of the NPS, user memory errors are handled
+ as a machine check exception, which is fatal, whereas on
+ simulator platform for NPS, is handled as a Level 2 interrupt
+ (just a stock ARC700) which is recoverable. This option makes
+ simulator behave like hardware.
+
+config EZNPS_SHARED_AUX_REGS
+ bool "ARC-EZchip Shared Auxiliary Registers Per Core"
+ depends on ARC_PLAT_EZNPS
+ default y
+ help
+ On the real chip of the NPS, auxiliary registers are shared between
+ all the cpus of the core, whereas on simulator platform for NPS,
+ each cpu has a different set of auxiliary registers. Configuration
+ should be unset if auxiliary registers are not shared between the cpus
+ of the core, so there will be a need to initialize them per cpu.
diff --git a/arch/arc/plat-eznps/Makefile b/arch/arc/plat-eznps/Makefile
index 21091b199df0..8d4371706b2f 100644
--- a/arch/arc/plat-eznps/Makefile
+++ b/arch/arc/plat-eznps/Makefile
@@ -2,6 +2,6 @@
# Makefile for the linux kernel.
#
-obj-y := entry.o platform.o
+obj-y := entry.o platform.o ctop.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_EZNPS_MTM_EXT) += mtm.o
diff --git a/arch/arc/plat-eznps/ctop.c b/arch/arc/plat-eznps/ctop.c
new file mode 100644
index 000000000000..030bcd070a1b
--- /dev/null
+++ b/arch/arc/plat-eznps/ctop.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright(c) 2015 EZchip Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <plat/ctop.h>
+
+void dp_save_restore(struct task_struct *prev, struct task_struct *next)
+{
+ struct eznps_dp *prev_task_dp = &prev->thread.dp;
+ struct eznps_dp *next_task_dp = &next->thread.dp;
+
+ /* Here we save all Data Plane related auxiliary registers */
+ prev_task_dp->eflags = read_aux_reg(CTOP_AUX_EFLAGS);
+ write_aux_reg(CTOP_AUX_EFLAGS, next_task_dp->eflags);
+
+ prev_task_dp->gpa1 = read_aux_reg(CTOP_AUX_GPA1);
+ write_aux_reg(CTOP_AUX_GPA1, next_task_dp->gpa1);
+}
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
index 328261c27cda..091c92c32ab6 100644
--- a/arch/arc/plat-eznps/entry.S
+++ b/arch/arc/plat-eznps/entry.S
@@ -27,7 +27,7 @@
.align 1024 ; HW requierment for restart first PC
ENTRY(res_service)
-#ifdef CONFIG_EZNPS_MTM_EXT
+#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
; There is no work for HW thread id != 0
lr r3, [CTOP_AUX_THREAD_ID]
cmp r3, 0
diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h
index ee2e32df5e90..0c7d11022d0f 100644
--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@@ -39,6 +39,7 @@
#define CTOP_AUX_LOGIC_CORE_ID (CTOP_AUX_BASE + 0x018)
#define CTOP_AUX_MT_CTRL (CTOP_AUX_BASE + 0x020)
#define CTOP_AUX_HW_COMPLY (CTOP_AUX_BASE + 0x024)
+#define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C)
#define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030)
#define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080)
#define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088)
@@ -46,6 +47,7 @@
#define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300)
/* EZchip core instructions */
+#define CTOP_INST_HWSCHD_WFT_IE12 0x3E6F7344
#define CTOP_INST_HWSCHD_OFF_R4 0x3C6F00BF
#define CTOP_INST_HWSCHD_RESTORE_R4 0x3E6F7103
#define CTOP_INST_SCHD_RW 0x3E6F7004
diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c
index aaaaffd3d940..2388de3d09ef 100644
--- a/arch/arc/plat-eznps/mtm.c
+++ b/arch/arc/plat-eznps/mtm.c
@@ -21,10 +21,22 @@
#include <plat/mtm.h>
#include <plat/smp.h>
-#define MT_CTRL_HS_CNT 0xFF
+#define MT_HS_CNT_MIN 0x01
+#define MT_HS_CNT_MAX 0xFF
#define MT_CTRL_ST_CNT 0xF
#define NPS_NUM_HW_THREADS 0x10
+static int mtm_hs_ctr = MT_HS_CNT_MAX;
+
+#ifdef CONFIG_EZNPS_MEM_ERROR_ALIGN
+int do_memory_error(unsigned long address, struct pt_regs *regs)
+{
+ die("Invalid Mem Access", regs, address);
+
+ return 1;
+}
+#endif
+
static void mtm_init_nat(int cpu)
{
struct nps_host_reg_mtm_cfg mtm_cfg;
@@ -98,6 +110,18 @@ void mtm_enable_core(unsigned int cpu)
int i;
struct nps_host_reg_aux_mt_ctrl mt_ctrl;
struct nps_host_reg_mtm_cfg mtm_cfg;
+ struct nps_host_reg_aux_dpc dpc;
+
+ /*
+ * Initializing dpc register in each CPU.
+ * Overwriting the init value of the DPC
+ * register so that CMEM and FMT virtual address
+ * spaces are accessible, and Data Plane HW
+ * facilities are enabled.
+ */
+ dpc.ien = 1;
+ dpc.men = 1;
+ write_aux_reg(CTOP_AUX_DPC, dpc.value);
if (NPS_CPU_TO_THREAD_NUM(cpu) != 0)
return;
@@ -118,9 +142,7 @@ void mtm_enable_core(unsigned int cpu)
/* Enable HW schedule, stall counter, mtm */
mt_ctrl.value = 0;
mt_ctrl.hsen = 1;
- mt_ctrl.hs_cnt = MT_CTRL_HS_CNT;
- mt_ctrl.sten = 1;
- mt_ctrl.st_cnt = MT_CTRL_ST_CNT;
+ mt_ctrl.hs_cnt = mtm_hs_ctr;
mt_ctrl.mten = 1;
write_aux_reg(CTOP_AUX_MT_CTRL, mt_ctrl.value);
@@ -131,3 +153,23 @@ void mtm_enable_core(unsigned int cpu)
*/
cpu_relax();
}
+
+/* Verify and set the value of the mtm hs counter */
+static int __init set_mtm_hs_ctr(char *ctr_str)
+{
+ long hs_ctr;
+ int ret;
+
+ ret = kstrtol(ctr_str, 0, &hs_ctr);
+
+ if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) {
+ pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n",
+ hs_ctr, MT_HS_CNT_MIN, MT_HS_CNT_MAX);
+ return -EINVAL;
+ }
+
+ mtm_hs_ctr = hs_ctr;
+
+ return 0;
+}
+early_param("nps_mtm_hs_ctr", set_mtm_hs_ctr);
diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig
new file mode 100644
index 000000000000..5a6ed5afb009
--- /dev/null
+++ b/arch/arc/plat-hsdk/Kconfig
@@ -0,0 +1,9 @@
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+menuconfig ARC_SOC_HSDK
+ bool "ARC HS Development Kit SOC"
diff --git a/arch/arc/plat-hsdk/Makefile b/arch/arc/plat-hsdk/Makefile
new file mode 100644
index 000000000000..9a50c511a672
--- /dev/null
+++ b/arch/arc/plat-hsdk/Makefile
@@ -0,0 +1,9 @@
+#
+# Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y := platform.o
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
new file mode 100644
index 000000000000..a2e7fd17e36d
--- /dev/null
+++ b/arch/arc/plat-hsdk/platform.c
@@ -0,0 +1,66 @@
+/*
+ * ARC HSDK Platform support code
+ *
+ * Copyright (C) 2017 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/arcregs.h>
+#include <asm/io.h>
+#include <asm/mach_desc.h>
+
+#define ARC_CCM_UNUSED_ADDR 0x60000000
+
+static void __init hsdk_init_per_cpu(unsigned int cpu)
+{
+ /*
+ * By default ICCM is mapped to 0x7z while this area is used for
+ * kernel virtual mappings, so move it to currently unused area.
+ */
+ if (cpuinfo_arc700[cpu].iccm.sz)
+ write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR);
+
+ /*
+ * By default DCCM is mapped to 0x8z while this area is used by kernel,
+ * so move it to currently unused area.
+ */
+ if (cpuinfo_arc700[cpu].dccm.sz)
+ write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR);
+}
+
+#define ARC_PERIPHERAL_BASE 0xf0000000
+#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000)
+#define CREG_PAE (CREG_BASE + 0x180)
+#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
+
+static void __init hsdk_init_early(void)
+{
+ /*
+ * PAE remapping for DMA clients does not work due to an RTL bug, so
+ * CREG_PAE register must be programmed to all zeroes, otherwise it
+ * will cause problems with DMA to/from peripherals even if PAE40 is
+ * not used.
+ */
+
+ /* Default is 1, which means "PAE offset = 4GByte" */
+ writel_relaxed(0, (void __iomem *) CREG_PAE);
+
+ /* Really apply settings made above */
+ writel(1, (void __iomem *) CREG_PAE_UPDATE);
+}
+
+static const char *hsdk_compat[] __initconst = {
+ "snps,hsdk",
+ NULL,
+};
+
+MACHINE_START(SIMULATION, "hsdk")
+ .dt_compat = hsdk_compat,
+ .init_early = hsdk_init_early,
+ .init_per_cpu = hsdk_init_per_cpu,
+MACHINE_END
diff --git a/arch/arc/plat-sim/Kconfig b/arch/arc/plat-sim/Kconfig
deleted file mode 100644
index ac6af96a82f3..000000000000
--- a/arch/arc/plat-sim/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-
-menuconfig ARC_PLAT_SIM
- bool "ARC nSIM based simulation virtual platforms"
- help
- Support for nSIM based ARC simulation platforms
- This includes the standalone nSIM (uart only) vs. System C OSCI VP
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index aea87389e44b..5cda56b1a2ea 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -20,11 +20,14 @@
*/
static const char *simulation_compat[] __initconst = {
+#ifdef CONFIG_ISA_ARCOMPACT
"snps,nsim",
- "snps,nsim_hs",
"snps,nsimosci",
+#else
+ "snps,nsim_hs",
"snps,nsimosci_hs",
"snps,zebu_hs",
+#endif
NULL,
};
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb15067e..f1b3f1d575d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,7 +50,7 @@ config ARM
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
- select HAVE_CBPF_JIT
+ select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
select HAVE_CC_STACKPROTECTOR
select HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index a17ca8d78656..c94a88ae834d 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013-2015 Linaro Ltd
+ * Copyright (C) 2013-2017 Linaro Ltd
* Authors: Roy Franz <roy.franz@linaro.org>
* Ard Biesheuvel <ard.biesheuvel@linaro.org>
*
@@ -8,6 +8,9 @@
* published by the Free Software Foundation.
*/
+#include <linux/pe.h>
+#include <linux/sizes.h>
+
.macro __nop
#ifdef CONFIG_EFI_STUB
@ This is almost but not quite a NOP, since it does clobber the
@@ -15,7 +18,7 @@
@ PE/COFF expects the magic string "MZ" at offset 0, while the
@ ARM/Linux boot protocol expects an executable instruction
@ there.
- .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
+ .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000
#else
AR_CLASS( mov r0, r0 )
M_CLASS( nop.w )
@@ -34,96 +37,97 @@
@ The only 2 fields of the MSDOS header that are used are this
@ PE/COFF offset, and the "MZ" bytes at offset 0x0.
@
- .long pe_header - start @ Offset to the PE header.
+ .long pe_header - start @ Offset to the PE header.
pe_header:
- .ascii "PE\0\0"
+ .long PE_MAGIC
coff_header:
- .short 0x01c2 @ ARM or Thumb
- .short 2 @ nr_sections
- .long 0 @ TimeDateStamp
- .long 0 @ PointerToSymbolTable
- .long 1 @ NumberOfSymbols
- .short section_table - optional_header
- @ SizeOfOptionalHeader
- .short 0x306 @ Characteristics.
- @ IMAGE_FILE_32BIT_MACHINE |
- @ IMAGE_FILE_DEBUG_STRIPPED |
- @ IMAGE_FILE_EXECUTABLE_IMAGE |
- @ IMAGE_FILE_LINE_NUMS_STRIPPED
+ .short IMAGE_FILE_MACHINE_THUMB @ Machine
+ .short section_count @ NumberOfSections
+ .long 0 @ TimeDateStamp
+ .long 0 @ PointerToSymbolTable
+ .long 0 @ NumberOfSymbols
+ .short section_table - optional_header @ SizeOfOptionalHeader
+ .short IMAGE_FILE_32BIT_MACHINE | \
+ IMAGE_FILE_DEBUG_STRIPPED | \
+ IMAGE_FILE_EXECUTABLE_IMAGE | \
+ IMAGE_FILE_LINE_NUMS_STRIPPED @ Characteristics
+
+#define __pecoff_code_size (__pecoff_data_start - __efi_start)
optional_header:
- .short 0x10b @ PE32 format
- .byte 0x02 @ MajorLinkerVersion
- .byte 0x14 @ MinorLinkerVersion
- .long _end - __efi_start @ SizeOfCode
- .long 0 @ SizeOfInitializedData
- .long 0 @ SizeOfUninitializedData
- .long efi_stub_entry - start @ AddressOfEntryPoint
- .long start_offset @ BaseOfCode
- .long 0 @ data
+ .short PE_OPT_MAGIC_PE32 @ PE32 format
+ .byte 0x02 @ MajorLinkerVersion
+ .byte 0x14 @ MinorLinkerVersion
+ .long __pecoff_code_size @ SizeOfCode
+ .long __pecoff_data_size @ SizeOfInitializedData
+ .long 0 @ SizeOfUninitializedData
+ .long efi_stub_entry - start @ AddressOfEntryPoint
+ .long start_offset @ BaseOfCode
+ .long __pecoff_data_start - start @ BaseOfData
extra_header_fields:
- .long 0 @ ImageBase
- .long 0x200 @ SectionAlignment
- .long 0x200 @ FileAlignment
- .short 0 @ MajorOperatingSystemVersion
- .short 0 @ MinorOperatingSystemVersion
- .short 0 @ MajorImageVersion
- .short 0 @ MinorImageVersion
- .short 0 @ MajorSubsystemVersion
- .short 0 @ MinorSubsystemVersion
- .long 0 @ Win32VersionValue
+ .long 0 @ ImageBase
+ .long SZ_4K @ SectionAlignment
+ .long SZ_512 @ FileAlignment
+ .short 0 @ MajorOsVersion
+ .short 0 @ MinorOsVersion
+ .short 0 @ MajorImageVersion
+ .short 0 @ MinorImageVersion
+ .short 0 @ MajorSubsystemVersion
+ .short 0 @ MinorSubsystemVersion
+ .long 0 @ Win32VersionValue
- .long _end - start @ SizeOfImage
- .long start_offset @ SizeOfHeaders
- .long 0 @ CheckSum
- .short 0xa @ Subsystem (EFI application)
- .short 0 @ DllCharacteristics
- .long 0 @ SizeOfStackReserve
- .long 0 @ SizeOfStackCommit
- .long 0 @ SizeOfHeapReserve
- .long 0 @ SizeOfHeapCommit
- .long 0 @ LoaderFlags
- .long 0x6 @ NumberOfRvaAndSizes
+ .long __pecoff_end - start @ SizeOfImage
+ .long start_offset @ SizeOfHeaders
+ .long 0 @ CheckSum
+ .short IMAGE_SUBSYSTEM_EFI_APPLICATION @ Subsystem
+ .short 0 @ DllCharacteristics
+ .long 0 @ SizeOfStackReserve
+ .long 0 @ SizeOfStackCommit
+ .long 0 @ SizeOfHeapReserve
+ .long 0 @ SizeOfHeapCommit
+ .long 0 @ LoaderFlags
+ .long (section_table - .) / 8 @ NumberOfRvaAndSizes
- .quad 0 @ ExportTable
- .quad 0 @ ImportTable
- .quad 0 @ ResourceTable
- .quad 0 @ ExceptionTable
- .quad 0 @ CertificationTable
- .quad 0 @ BaseRelocationTable
+ .quad 0 @ ExportTable
+ .quad 0 @ ImportTable
+ .quad 0 @ ResourceTable
+ .quad 0 @ ExceptionTable
+ .quad 0 @ CertificationTable
+ .quad 0 @ BaseRelocationTable
section_table:
- @
- @ The EFI application loader requires a relocation section
- @ because EFI applications must be relocatable. This is a
- @ dummy section as far as we are concerned.
- @
- .ascii ".reloc\0\0"
- .long 0 @ VirtualSize
- .long 0 @ VirtualAddress
- .long 0 @ SizeOfRawData
- .long 0 @ PointerToRawData
- .long 0 @ PointerToRelocations
- .long 0 @ PointerToLineNumbers
- .short 0 @ NumberOfRelocations
- .short 0 @ NumberOfLineNumbers
- .long 0x42100040 @ Characteristics
-
.ascii ".text\0\0\0"
- .long _end - __efi_start @ VirtualSize
- .long __efi_start @ VirtualAddress
- .long _edata - __efi_start @ SizeOfRawData
- .long __efi_start @ PointerToRawData
- .long 0 @ PointerToRelocations
- .long 0 @ PointerToLineNumbers
- .short 0 @ NumberOfRelocations
- .short 0 @ NumberOfLineNumbers
- .long 0xe0500020 @ Characteristics
+ .long __pecoff_code_size @ VirtualSize
+ .long __efi_start @ VirtualAddress
+ .long __pecoff_code_size @ SizeOfRawData
+ .long __efi_start @ PointerToRawData
+ .long 0 @ PointerToRelocations
+ .long 0 @ PointerToLineNumbers
+ .short 0 @ NumberOfRelocations
+ .short 0 @ NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_CODE | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_EXECUTE @ Characteristics
+
+ .ascii ".data\0\0\0"
+ .long __pecoff_data_size @ VirtualSize
+ .long __pecoff_data_start - start @ VirtualAddress
+ .long __pecoff_data_rawsize @ SizeOfRawData
+ .long __pecoff_data_start - start @ PointerToRawData
+ .long 0 @ PointerToRelocations
+ .long 0 @ PointerToLineNumbers
+ .short 0 @ NumberOfRelocations
+ .short 0 @ NumberOfLineNumbers
+ .long IMAGE_SCN_CNT_INITIALIZED_DATA | \
+ IMAGE_SCN_MEM_READ | \
+ IMAGE_SCN_MEM_WRITE @ Characteristics
+
+ .set section_count, (. - section_table) / 40
- .align 9
+ .align 12
__efi_start:
#endif
.endm
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index 81c493156ce8..7a4c59154361 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -48,13 +48,6 @@ SECTIONS
*(.rodata)
*(.rodata.*)
}
- .data : {
- /*
- * The EFI stub always executes from RAM, and runs strictly before the
- * decompressor, so we can make an exception for its r/w data, and keep it
- */
- *(.data.efistub)
- }
.piggydata : {
*(.piggydata)
}
@@ -70,6 +63,26 @@ SECTIONS
/* ensure the zImage file size is always a multiple of 64 bits */
/* (without a dummy byte, ld just ignores the empty section) */
.pad : { BYTE(0); . = ALIGN(8); }
+
+#ifdef CONFIG_EFI_STUB
+ .data : ALIGN(4096) {
+ __pecoff_data_start = .;
+ /*
+ * The EFI stub always executes from RAM, and runs strictly before the
+ * decompressor, so we can make an exception for its r/w data, and keep it
+ */
+ *(.data.efistub)
+ __pecoff_data_end = .;
+
+ /*
+ * PE/COFF mandates a file size which is a multiple of 512 bytes if the
+ * section size equals or exceeds 4 KB
+ */
+ . = ALIGN(512);
+ }
+ __pecoff_data_rawsize = . - ADDR(.data);
+#endif
+
_edata = .;
_magic_sig = ZIMAGE_MAGIC(0x016f2818);
@@ -84,6 +97,9 @@ SECTIONS
. = ALIGN(8); /* the stack must be 64-bit aligned */
.stack : { *(.stack) }
+ PROVIDE(__pecoff_data_size = ALIGN(512) - ADDR(.data));
+ PROVIDE(__pecoff_end = ALIGN(512));
+
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
index f92f95741207..a183b56283f8 100644
--- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
+++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
@@ -266,6 +266,7 @@
&hdmicec {
status = "okay";
+ needs-hpd;
};
&hsi2c_4 {
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index dfcc8e00cf1c..0ade3619f3c3 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -297,6 +297,7 @@
#address-cells = <1>;
#size-cells = <1>;
status = "disabled";
+ ranges;
adc: adc@50030800 {
compatible = "fsl,imx25-gcq";
diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
index 1f0f950dc11e..e0aea782c666 100644
--- a/arch/arm/boot/dts/imx6q-evi.dts
+++ b/arch/arm/boot/dts/imx6q-evi.dts
@@ -94,6 +94,15 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_ecspi1 &pinctrl_ecspi1cs>;
status = "okay";
+
+ fpga: fpga@0 {
+ compatible = "altr,fpga-passive-serial";
+ spi-max-frequency = <20000000>;
+ reg = <0>;
+ pinctrl-0 = <&pinctrl_fpgaspi>;
+ nconfig-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>;
+ nstat-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>;
+ };
};
&ecspi3 {
@@ -319,6 +328,13 @@
>;
};
+ pinctrl_fpgaspi: fpgaspigrp {
+ fsl,pins = <
+ MX6QDL_PAD_KEY_ROW1__GPIO4_IO09 0x1b0b0
+ MX6QDL_PAD_KEY_ROW2__GPIO4_IO11 0x1b0b0
+ >;
+ };
+
pinctrl_gpminand: gpminandgrp {
fsl,pins = <
MX6QDL_PAD_NANDF_CLE__NAND_CLE 0xb0b1
diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
index aeaa5a6e4fcf..a24e4f1911ab 100644
--- a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi
@@ -507,7 +507,7 @@
pinctrl_pcie: pciegrp {
fsl,pins = <
/* PCIe reset */
- MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0x030b0
+ MX6QDL_PAD_EIM_DA0__GPIO3_IO00 0x030b0
MX6QDL_PAD_EIM_DA4__GPIO3_IO04 0x030b0
>;
};
@@ -668,7 +668,7 @@
&pcie {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pcie>;
- reset-gpio = <&gpio6 31 GPIO_ACTIVE_LOW>;
+ reset-gpio = <&gpio3 0 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
index 54c45402286b..0a24d1bf3c39 100644
--- a/arch/arm/boot/dts/imx7d-sdb.dts
+++ b/arch/arm/boot/dts/imx7d-sdb.dts
@@ -557,6 +557,14 @@
>;
};
+ pinctrl_spi4: spi4grp {
+ fsl,pins = <
+ MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59
+ MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59
+ MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59
+ >;
+ };
+
pinctrl_tsc2046_pendown: tsc2046_pendown {
fsl,pins = <
MX7D_PAD_EPDC_BDR1__GPIO2_IO29 0x59
@@ -697,13 +705,5 @@
fsl,pins = <
MX7D_PAD_LPSR_GPIO1_IO01__PWM1_OUT 0x110b0
>;
-
- pinctrl_spi4: spi4grp {
- fsl,pins = <
- MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59
- MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59
- MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59
- >;
- };
};
};
diff --git a/arch/arm/boot/dts/imx7ulp-pinfunc.h b/arch/arm/boot/dts/imx7ulp-pinfunc.h
new file mode 100644
index 000000000000..fe511775b518
--- /dev/null
+++ b/arch/arm/boot/dts/imx7ulp-pinfunc.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __DTS_IMX7ULP_PINFUNC_H
+#define __DTS_IMX7ULP_PINFUNC_H
+
+/*
+ * The pin function ID is a tuple of
+ * <mux_conf_reg input_reg mux_mode input_val>
+ */
+
+#define IMX7ULP_PAD_PTC0__PTC0 0x0000 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC0__TRACE_D15 0x0000 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC0__LPUART4_CTS_B 0x0000 0x0244 0x4 0x1
+#define IMX7ULP_PAD_PTC0__LPI2C4_SCL 0x0000 0x0278 0x5 0x1
+#define IMX7ULP_PAD_PTC0__TPM4_CLKIN 0x0000 0x0298 0x6 0x1
+#define IMX7ULP_PAD_PTC0__FB_AD0 0x0000 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC1__PTC1 0x0004 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC1__TRACE_D14 0x0004 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC1__LPUART4_RTS_B 0x0004 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC1__LPI2C4_SDA 0x0004 0x027c 0x5 0x1
+#define IMX7ULP_PAD_PTC1__TPM4_CH0 0x0004 0x0280 0x6 0x1
+#define IMX7ULP_PAD_PTC1__FB_AD1 0x0004 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC2__PTC2 0x0008 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC2__TRACE_D13 0x0008 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC2__LPUART4_TX 0x0008 0x024c 0x4 0x1
+#define IMX7ULP_PAD_PTC2__LPI2C4_HREQ 0x0008 0x0274 0x5 0x1
+#define IMX7ULP_PAD_PTC2__TPM4_CH1 0x0008 0x0284 0x6 0x1
+#define IMX7ULP_PAD_PTC2__FB_AD2 0x0008 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC3__PTC3 0x000c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC3__TRACE_D12 0x000c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC3__LPUART4_RX 0x000c 0x0248 0x4 0x1
+#define IMX7ULP_PAD_PTC3__TPM4_CH2 0x000c 0x0288 0x6 0x1
+#define IMX7ULP_PAD_PTC3__FB_AD3 0x000c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC4__PTC4 0x0010 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC4__TRACE_D11 0x0010 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC4__FXIO1_D0 0x0010 0x0204 0x2 0x1
+#define IMX7ULP_PAD_PTC4__LPSPI2_PCS1 0x0010 0x02a0 0x3 0x1
+#define IMX7ULP_PAD_PTC4__LPUART5_CTS_B 0x0010 0x0250 0x4 0x1
+#define IMX7ULP_PAD_PTC4__LPI2C5_SCL 0x0010 0x02bc 0x5 0x1
+#define IMX7ULP_PAD_PTC4__TPM4_CH3 0x0010 0x028c 0x6 0x1
+#define IMX7ULP_PAD_PTC4__FB_AD4 0x0010 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC5__PTC5 0x0014 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC5__TRACE_D10 0x0014 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC5__FXIO1_D1 0x0014 0x0208 0x2 0x1
+#define IMX7ULP_PAD_PTC5__LPSPI2_PCS2 0x0014 0x02a4 0x3 0x1
+#define IMX7ULP_PAD_PTC5__LPUART5_RTS_B 0x0014 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC5__LPI2C5_SDA 0x0014 0x02c0 0x5 0x1
+#define IMX7ULP_PAD_PTC5__TPM4_CH4 0x0014 0x0290 0x6 0x1
+#define IMX7ULP_PAD_PTC5__FB_AD5 0x0014 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC6__PTC6 0x0018 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC6__TRACE_D9 0x0018 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC6__FXIO1_D2 0x0018 0x020c 0x2 0x1
+#define IMX7ULP_PAD_PTC6__LPSPI2_PCS3 0x0018 0x02a8 0x3 0x1
+#define IMX7ULP_PAD_PTC6__LPUART5_TX 0x0018 0x0258 0x4 0x1
+#define IMX7ULP_PAD_PTC6__LPI2C5_HREQ 0x0018 0x02b8 0x5 0x1
+#define IMX7ULP_PAD_PTC6__TPM4_CH5 0x0018 0x0294 0x6 0x1
+#define IMX7ULP_PAD_PTC6__FB_AD6 0x0018 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC7__PTC7 0x001c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC7__TRACE_D8 0x001c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC7__FXIO1_D3 0x001c 0x0210 0x2 0x1
+#define IMX7ULP_PAD_PTC7__LPUART5_RX 0x001c 0x0254 0x4 0x1
+#define IMX7ULP_PAD_PTC7__TPM5_CH1 0x001c 0x02c8 0x6 0x1
+#define IMX7ULP_PAD_PTC7__FB_AD7 0x001c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC8__PTC8 0x0020 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC8__TRACE_D7 0x0020 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC8__FXIO1_D4 0x0020 0x0214 0x2 0x1
+#define IMX7ULP_PAD_PTC8__LPSPI2_SIN 0x0020 0x02b0 0x3 0x1
+#define IMX7ULP_PAD_PTC8__LPUART6_CTS_B 0x0020 0x025c 0x4 0x1
+#define IMX7ULP_PAD_PTC8__LPI2C6_SCL 0x0020 0x02fc 0x5 0x1
+#define IMX7ULP_PAD_PTC8__TPM5_CLKIN 0x0020 0x02cc 0x6 0x1
+#define IMX7ULP_PAD_PTC8__FB_AD8 0x0020 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC9__PTC9 0x0024 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC9__TRACE_D6 0x0024 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC9__FXIO1_D5 0x0024 0x0218 0x2 0x1
+#define IMX7ULP_PAD_PTC9__LPSPI2_SOUT 0x0024 0x02b4 0x3 0x1
+#define IMX7ULP_PAD_PTC9__LPUART6_RTS_B 0x0024 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC9__LPI2C6_SDA 0x0024 0x0300 0x5 0x1
+#define IMX7ULP_PAD_PTC9__TPM5_CH0 0x0024 0x02c4 0x6 0x1
+#define IMX7ULP_PAD_PTC9__FB_AD9 0x0024 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC10__PTC10 0x0028 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC10__TRACE_D5 0x0028 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC10__FXIO1_D6 0x0028 0x021c 0x2 0x1
+#define IMX7ULP_PAD_PTC10__LPSPI2_SCK 0x0028 0x02ac 0x3 0x1
+#define IMX7ULP_PAD_PTC10__LPUART6_TX 0x0028 0x0264 0x4 0x1
+#define IMX7ULP_PAD_PTC10__LPI2C6_HREQ 0x0028 0x02f8 0x5 0x1
+#define IMX7ULP_PAD_PTC10__TPM7_CH3 0x0028 0x02e8 0x6 0x1
+#define IMX7ULP_PAD_PTC10__FB_AD10 0x0028 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC11__PTC11 0x002c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC11__TRACE_D4 0x002c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC11__FXIO1_D7 0x002c 0x0220 0x2 0x1
+#define IMX7ULP_PAD_PTC11__LPSPI2_PCS0 0x002c 0x029c 0x3 0x1
+#define IMX7ULP_PAD_PTC11__LPUART6_RX 0x002c 0x0260 0x4 0x1
+#define IMX7ULP_PAD_PTC11__TPM7_CH4 0x002c 0x02ec 0x6 0x1
+#define IMX7ULP_PAD_PTC11__FB_AD11 0x002c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC12__PTC12 0x0030 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC12__TRACE_D3 0x0030 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC12__FXIO1_D8 0x0030 0x0224 0x2 0x1
+#define IMX7ULP_PAD_PTC12__LPSPI3_PCS1 0x0030 0x0314 0x3 0x1
+#define IMX7ULP_PAD_PTC12__LPUART7_CTS_B 0x0030 0x0268 0x4 0x1
+#define IMX7ULP_PAD_PTC12__LPI2C7_SCL 0x0030 0x0308 0x5 0x1
+#define IMX7ULP_PAD_PTC12__TPM7_CH5 0x0030 0x02f0 0x6 0x1
+#define IMX7ULP_PAD_PTC12__FB_AD12 0x0030 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC13__PTC13 0x0034 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC13__TRACE_D2 0x0034 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC13__FXIO1_D9 0x0034 0x0228 0x2 0x1
+#define IMX7ULP_PAD_PTC13__LPSPI3_PCS2 0x0034 0x0318 0x3 0x1
+#define IMX7ULP_PAD_PTC13__LPUART7_RTS_B 0x0034 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTC13__LPI2C7_SDA 0x0034 0x030c 0x5 0x1
+#define IMX7ULP_PAD_PTC13__TPM7_CLKIN 0x0034 0x02f4 0x6 0x1
+#define IMX7ULP_PAD_PTC13__FB_AD13 0x0034 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC14__PTC14 0x0038 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC14__TRACE_D1 0x0038 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC14__FXIO1_D10 0x0038 0x022c 0x2 0x1
+#define IMX7ULP_PAD_PTC14__LPSPI3_PCS3 0x0038 0x031c 0x3 0x1
+#define IMX7ULP_PAD_PTC14__LPUART7_TX 0x0038 0x0270 0x4 0x1
+#define IMX7ULP_PAD_PTC14__LPI2C7_HREQ 0x0038 0x0304 0x5 0x1
+#define IMX7ULP_PAD_PTC14__TPM7_CH0 0x0038 0x02dc 0x6 0x1
+#define IMX7ULP_PAD_PTC14__FB_AD14 0x0038 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC15__PTC15 0x003c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC15__TRACE_D0 0x003c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC15__FXIO1_D11 0x003c 0x0230 0x2 0x1
+#define IMX7ULP_PAD_PTC15__LPUART7_RX 0x003c 0x026c 0x4 0x1
+#define IMX7ULP_PAD_PTC15__TPM7_CH1 0x003c 0x02e0 0x6 0x1
+#define IMX7ULP_PAD_PTC15__FB_AD15 0x003c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC16__PTC16 0x0040 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC16__TRACE_CLKOUT 0x0040 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTC16__FXIO1_D12 0x0040 0x0234 0x2 0x1
+#define IMX7ULP_PAD_PTC16__LPSPI3_SIN 0x0040 0x0324 0x3 0x1
+#define IMX7ULP_PAD_PTC16__TPM7_CH2 0x0040 0x02e4 0x6 0x1
+#define IMX7ULP_PAD_PTC16__FB_ALE_FB_CS1_B_FB_TS_B 0x0040 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC17__PTC17 0x0044 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC17__FXIO1_D13 0x0044 0x0238 0x2 0x1
+#define IMX7ULP_PAD_PTC17__LPSPI3_SOUT 0x0044 0x0328 0x3 0x1
+#define IMX7ULP_PAD_PTC17__TPM6_CLKIN 0x0044 0x02d8 0x6 0x1
+#define IMX7ULP_PAD_PTC17__FB_CS0_B 0x0044 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC18__PTC18 0x0048 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC18__FXIO1_D14 0x0048 0x023c 0x2 0x1
+#define IMX7ULP_PAD_PTC18__LPSPI3_SCK 0x0048 0x0320 0x3 0x1
+#define IMX7ULP_PAD_PTC18__TPM6_CH0 0x0048 0x02d0 0x6 0x1
+#define IMX7ULP_PAD_PTC18__FB_OE_B 0x0048 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTC19__PTC19 0x004c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTC19__FXIO1_D15 0x004c 0x0240 0x2 0x1
+#define IMX7ULP_PAD_PTC19__LPSPI3_PCS0 0x004c 0x0310 0x3 0x1
+#define IMX7ULP_PAD_PTC19__TPM6_CH1 0x004c 0x02d4 0x6 0x1
+#define IMX7ULP_PAD_PTC19__FB_A16 0x004c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTD0__PTD0 0x0080 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD0__SDHC0_RESET_B 0x0080 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD1__PTD1 0x0084 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD1__SDHC0_CMD 0x0084 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD2__PTD2 0x0088 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD2__SDHC0_CLK 0x0088 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD3__PTD3 0x008c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD3__SDHC0_D7 0x008c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD4__PTD4 0x0090 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD4__SDHC0_D6 0x0090 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD5__PTD5 0x0094 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD5__SDHC0_D5 0x0094 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD6__PTD6 0x0098 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD6__SDHC0_D4 0x0098 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD7__PTD7 0x009c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD7__SDHC0_D3 0x009c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD8__PTD8 0x00a0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD8__TPM4_CLKIN 0x00a0 0x0298 0x6 0x2
+#define IMX7ULP_PAD_PTD8__SDHC0_D2 0x00a0 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD9__PTD9 0x00a4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD9__TPM4_CH0 0x00a4 0x0280 0x6 0x2
+#define IMX7ULP_PAD_PTD9__SDHC0_D1 0x00a4 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD10__PTD10 0x00a8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD10__TPM4_CH1 0x00a8 0x0284 0x6 0x2
+#define IMX7ULP_PAD_PTD10__SDHC0_D0 0x00a8 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTD11__PTD11 0x00ac 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTD11__TPM4_CH2 0x00ac 0x0288 0x6 0x2
+#define IMX7ULP_PAD_PTD11__SDHC0_DQS 0x00ac 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE0__PTE0 0x0100 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE0__FXIO1_D31 0x0100 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE0__LPSPI2_PCS1 0x0100 0x02a0 0x3 0x2
+#define IMX7ULP_PAD_PTE0__LPUART4_CTS_B 0x0100 0x0244 0x4 0x2
+#define IMX7ULP_PAD_PTE0__LPI2C4_SCL 0x0100 0x0278 0x5 0x2
+#define IMX7ULP_PAD_PTE0__SDHC1_D1 0x0100 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE0__FB_A25 0x0100 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE1__PTE1 0x0104 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE1__FXIO1_D30 0x0104 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE1__LPSPI2_PCS2 0x0104 0x02a4 0x3 0x2
+#define IMX7ULP_PAD_PTE1__LPUART4_RTS_B 0x0104 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE1__LPI2C4_SDA 0x0104 0x027c 0x5 0x2
+#define IMX7ULP_PAD_PTE1__SDHC1_D0 0x0104 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE1__FB_A26 0x0104 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE2__PTE2 0x0108 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE2__FXIO1_D29 0x0108 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE2__LPSPI2_PCS3 0x0108 0x02a8 0x3 0x2
+#define IMX7ULP_PAD_PTE2__LPUART4_TX 0x0108 0x024c 0x4 0x2
+#define IMX7ULP_PAD_PTE2__LPI2C4_HREQ 0x0108 0x0274 0x5 0x2
+#define IMX7ULP_PAD_PTE2__SDHC1_CLK 0x0108 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE3__PTE3 0x010c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE3__FXIO1_D28 0x010c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE3__LPUART4_RX 0x010c 0x0248 0x4 0x2
+#define IMX7ULP_PAD_PTE3__TPM5_CH1 0x010c 0x02c8 0x6 0x2
+#define IMX7ULP_PAD_PTE3__SDHC1_CMD 0x010c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE4__PTE4 0x0110 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE4__FXIO1_D27 0x0110 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE4__LPSPI2_SIN 0x0110 0x02b0 0x3 0x2
+#define IMX7ULP_PAD_PTE4__LPUART5_CTS_B 0x0110 0x0250 0x4 0x2
+#define IMX7ULP_PAD_PTE4__LPI2C5_SCL 0x0110 0x02bc 0x5 0x2
+#define IMX7ULP_PAD_PTE4__TPM5_CLKIN 0x0110 0x02cc 0x6 0x2
+#define IMX7ULP_PAD_PTE4__SDHC1_D3 0x0110 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE5__PTE5 0x0114 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE5__FXIO1_D26 0x0114 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE5__LPSPI2_SOUT 0x0114 0x02b4 0x3 0x2
+#define IMX7ULP_PAD_PTE5__LPUART5_RTS_B 0x0114 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE5__LPI2C5_SDA 0x0114 0x02c0 0x5 0x2
+#define IMX7ULP_PAD_PTE5__TPM5_CH0 0x0114 0x02c4 0x6 0x2
+#define IMX7ULP_PAD_PTE5__SDHC1_D2 0x0114 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE6__PTE6 0x0118 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE6__FXIO1_D25 0x0118 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE6__LPSPI2_SCK 0x0118 0x02ac 0x3 0x2
+#define IMX7ULP_PAD_PTE6__LPUART5_TX 0x0118 0x0258 0x4 0x2
+#define IMX7ULP_PAD_PTE6__LPI2C5_HREQ 0x0118 0x02b8 0x5 0x2
+#define IMX7ULP_PAD_PTE6__TPM7_CH3 0x0118 0x02e8 0x6 0x2
+#define IMX7ULP_PAD_PTE6__SDHC1_D4 0x0118 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE6__FB_A17 0x0118 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE7__PTE7 0x011c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE7__TRACE_D7 0x011c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE7__VIU_FID 0x011c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE7__FXIO1_D24 0x011c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE7__LPSPI2_PCS0 0x011c 0x029c 0x3 0x2
+#define IMX7ULP_PAD_PTE7__LPUART5_RX 0x011c 0x0254 0x4 0x2
+#define IMX7ULP_PAD_PTE7__TPM7_CH4 0x011c 0x02ec 0x6 0x2
+#define IMX7ULP_PAD_PTE7__SDHC1_D5 0x011c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE7__FB_A18 0x011c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE8__PTE8 0x0120 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE8__TRACE_D6 0x0120 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE8__VIU_D16 0x0120 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE8__FXIO1_D23 0x0120 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE8__LPSPI3_PCS1 0x0120 0x0314 0x3 0x2
+#define IMX7ULP_PAD_PTE8__LPUART6_CTS_B 0x0120 0x025c 0x4 0x2
+#define IMX7ULP_PAD_PTE8__LPI2C6_SCL 0x0120 0x02fc 0x5 0x2
+#define IMX7ULP_PAD_PTE8__TPM7_CH5 0x0120 0x02f0 0x6 0x2
+#define IMX7ULP_PAD_PTE8__SDHC1_WP 0x0120 0x0200 0x7 0x1
+#define IMX7ULP_PAD_PTE8__SDHC1_D6 0x0120 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE8__FB_CS3_B_FB_BE7_0_BLS31_24_B 0x0120 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE9__PTE9 0x0124 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE9__TRACE_D5 0x0124 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE9__VIU_D17 0x0124 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE9__FXIO1_D22 0x0124 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE9__LPSPI3_PCS2 0x0124 0x0318 0x3 0x2
+#define IMX7ULP_PAD_PTE9__LPUART6_RTS_B 0x0124 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE9__LPI2C6_SDA 0x0124 0x0300 0x5 0x2
+#define IMX7ULP_PAD_PTE9__TPM7_CLKIN 0x0124 0x02f4 0x6 0x2
+#define IMX7ULP_PAD_PTE9__SDHC1_CD 0x0124 0x032c 0x7 0x1
+#define IMX7ULP_PAD_PTE9__SDHC1_D7 0x0124 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE9__FB_TBST_B_FB_CS2_B_FB_BE15_8_BLS23_16_B 0x0124 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE10__PTE10 0x0128 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE10__TRACE_D4 0x0128 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE10__VIU_D18 0x0128 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE10__FXIO1_D21 0x0128 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE10__LPSPI3_PCS3 0x0128 0x031c 0x3 0x2
+#define IMX7ULP_PAD_PTE10__LPUART6_TX 0x0128 0x0264 0x4 0x2
+#define IMX7ULP_PAD_PTE10__LPI2C6_HREQ 0x0128 0x02f8 0x5 0x2
+#define IMX7ULP_PAD_PTE10__TPM7_CH0 0x0128 0x02dc 0x6 0x2
+#define IMX7ULP_PAD_PTE10__SDHC1_VS 0x0128 0x0000 0x7 0x0
+#define IMX7ULP_PAD_PTE10__SDHC1_DQS 0x0128 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE10__FB_A19 0x0128 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE11__PTE11 0x012c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE11__TRACE_D3 0x012c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE11__VIU_D19 0x012c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE11__FXIO1_D20 0x012c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE11__LPUART6_RX 0x012c 0x0260 0x4 0x2
+#define IMX7ULP_PAD_PTE11__TPM7_CH1 0x012c 0x02e0 0x6 0x2
+#define IMX7ULP_PAD_PTE11__SDHC1_RESET_B 0x012c 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE11__FB_A20 0x012c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE12__PTE12 0x0130 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE12__TRACE_D2 0x0130 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE12__VIU_D20 0x0130 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE12__FXIO1_D19 0x0130 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE12__LPSPI3_SIN 0x0130 0x0324 0x3 0x2
+#define IMX7ULP_PAD_PTE12__LPUART7_CTS_B 0x0130 0x0268 0x4 0x2
+#define IMX7ULP_PAD_PTE12__LPI2C7_SCL 0x0130 0x0308 0x5 0x2
+#define IMX7ULP_PAD_PTE12__TPM7_CH2 0x0130 0x02e4 0x6 0x2
+#define IMX7ULP_PAD_PTE12__SDHC1_WP 0x0130 0x0200 0x8 0x2
+#define IMX7ULP_PAD_PTE12__FB_A21 0x0130 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE13__PTE13 0x0134 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE13__TRACE_D1 0x0134 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE13__VIU_D21 0x0134 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE13__FXIO1_D18 0x0134 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE13__LPSPI3_SOUT 0x0134 0x0328 0x3 0x2
+#define IMX7ULP_PAD_PTE13__LPUART7_RTS_B 0x0134 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTE13__LPI2C7_SDA 0x0134 0x030c 0x5 0x2
+#define IMX7ULP_PAD_PTE13__TPM6_CLKIN 0x0134 0x02d8 0x6 0x2
+#define IMX7ULP_PAD_PTE13__SDHC1_CD 0x0134 0x032c 0x8 0x2
+#define IMX7ULP_PAD_PTE13__FB_A22 0x0134 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE14__PTE14 0x0138 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE14__TRACE_D0 0x0138 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE14__VIU_D22 0x0138 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE14__FXIO1_D17 0x0138 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE14__LPSPI3_SCK 0x0138 0x0320 0x3 0x2
+#define IMX7ULP_PAD_PTE14__LPUART7_TX 0x0138 0x0270 0x4 0x2
+#define IMX7ULP_PAD_PTE14__LPI2C7_HREQ 0x0138 0x0304 0x5 0x2
+#define IMX7ULP_PAD_PTE14__TPM6_CH0 0x0138 0x02d0 0x6 0x2
+#define IMX7ULP_PAD_PTE14__SDHC1_VS 0x0138 0x0000 0x8 0x0
+#define IMX7ULP_PAD_PTE14__FB_A23 0x0138 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTE15__PTE15 0x013c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTE15__TRACE_CLKOUT 0x013c 0x0000 0xa 0x0
+#define IMX7ULP_PAD_PTE15__VIU_D23 0x013c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTE15__FXIO1_D16 0x013c 0x0000 0x2 0x0
+#define IMX7ULP_PAD_PTE15__LPSPI3_PCS0 0x013c 0x0310 0x3 0x2
+#define IMX7ULP_PAD_PTE15__LPUART7_RX 0x013c 0x026c 0x4 0x2
+#define IMX7ULP_PAD_PTE15__TPM6_CH1 0x013c 0x02d4 0x6 0x2
+#define IMX7ULP_PAD_PTE15__FB_A24 0x013c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF0__PTF0 0x0180 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF0__VIU_DE 0x0180 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF0__LPUART4_CTS_B 0x0180 0x0244 0x4 0x3
+#define IMX7ULP_PAD_PTF0__LPI2C4_SCL 0x0180 0x0278 0x5 0x3
+#define IMX7ULP_PAD_PTF0__TPM4_CLKIN 0x0180 0x0298 0x6 0x3
+#define IMX7ULP_PAD_PTF0__FB_RW_B 0x0180 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF1__PTF1 0x0184 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF1__VIU_HSYNC 0x0184 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF1__LPUART4_RTS_B 0x0184 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF1__LPI2C4_SDA 0x0184 0x027c 0x5 0x3
+#define IMX7ULP_PAD_PTF1__TPM4_CH0 0x0184 0x0280 0x6 0x3
+#define IMX7ULP_PAD_PTF1__CLKOUT 0x0184 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF2__PTF2 0x0188 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF2__VIU_VSYNC 0x0188 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF2__LPUART4_TX 0x0188 0x024c 0x4 0x3
+#define IMX7ULP_PAD_PTF2__LPI2C4_HREQ 0x0188 0x0274 0x5 0x3
+#define IMX7ULP_PAD_PTF2__TPM4_CH1 0x0188 0x0284 0x6 0x3
+#define IMX7ULP_PAD_PTF2__FB_TSIZ1_FB_CS5_B_FB_BE23_16_BLS15_8_B 0x0188 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF3__PTF3 0x018c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF3__VIU_PCLK 0x018c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF3__LPUART4_RX 0x018c 0x0248 0x4 0x3
+#define IMX7ULP_PAD_PTF3__TPM4_CH2 0x018c 0x0288 0x6 0x3
+#define IMX7ULP_PAD_PTF3__FB_AD16 0x018c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF4__PTF4 0x0190 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF4__VIU_D0 0x0190 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF4__FXIO1_D0 0x0190 0x0204 0x2 0x2
+#define IMX7ULP_PAD_PTF4__LPSPI2_PCS1 0x0190 0x02a0 0x3 0x3
+#define IMX7ULP_PAD_PTF4__LPUART5_CTS_B 0x0190 0x0250 0x4 0x3
+#define IMX7ULP_PAD_PTF4__LPI2C5_SCL 0x0190 0x02bc 0x5 0x3
+#define IMX7ULP_PAD_PTF4__TPM4_CH3 0x0190 0x028c 0x6 0x2
+#define IMX7ULP_PAD_PTF4__FB_AD17 0x0190 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF5__PTF5 0x0194 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF5__VIU_D1 0x0194 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF5__FXIO1_D1 0x0194 0x0208 0x2 0x2
+#define IMX7ULP_PAD_PTF5__LPSPI2_PCS2 0x0194 0x02a4 0x3 0x3
+#define IMX7ULP_PAD_PTF5__LPUART5_RTS_B 0x0194 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF5__LPI2C5_SDA 0x0194 0x02c0 0x5 0x3
+#define IMX7ULP_PAD_PTF5__TPM4_CH4 0x0194 0x0290 0x6 0x2
+#define IMX7ULP_PAD_PTF5__FB_AD18 0x0194 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF6__PTF6 0x0198 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF6__VIU_D2 0x0198 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF6__FXIO1_D2 0x0198 0x020c 0x2 0x2
+#define IMX7ULP_PAD_PTF6__LPSPI2_PCS3 0x0198 0x02a8 0x3 0x3
+#define IMX7ULP_PAD_PTF6__LPUART5_TX 0x0198 0x0258 0x4 0x3
+#define IMX7ULP_PAD_PTF6__LPI2C5_HREQ 0x0198 0x02b8 0x5 0x3
+#define IMX7ULP_PAD_PTF6__TPM4_CH5 0x0198 0x0294 0x6 0x2
+#define IMX7ULP_PAD_PTF6__FB_AD19 0x0198 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF7__PTF7 0x019c 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF7__VIU_D3 0x019c 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF7__FXIO1_D3 0x019c 0x0210 0x2 0x2
+#define IMX7ULP_PAD_PTF7__LPUART5_RX 0x019c 0x0254 0x4 0x3
+#define IMX7ULP_PAD_PTF7__TPM5_CH1 0x019c 0x02c8 0x6 0x3
+#define IMX7ULP_PAD_PTF7__FB_AD20 0x019c 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF8__PTF8 0x01a0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF8__USB1_ULPI_CLK 0x01a0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF8__VIU_D4 0x01a0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF8__FXIO1_D4 0x01a0 0x0214 0x2 0x2
+#define IMX7ULP_PAD_PTF8__LPSPI2_SIN 0x01a0 0x02b0 0x3 0x3
+#define IMX7ULP_PAD_PTF8__LPUART6_CTS_B 0x01a0 0x025c 0x4 0x3
+#define IMX7ULP_PAD_PTF8__LPI2C6_SCL 0x01a0 0x02fc 0x5 0x3
+#define IMX7ULP_PAD_PTF8__TPM5_CLKIN 0x01a0 0x02cc 0x6 0x3
+#define IMX7ULP_PAD_PTF8__FB_AD21 0x01a0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF9__PTF9 0x01a4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF9__USB1_ULPI_NXT 0x01a4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF9__VIU_D5 0x01a4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF9__FXIO1_D5 0x01a4 0x0218 0x2 0x2
+#define IMX7ULP_PAD_PTF9__LPSPI2_SOUT 0x01a4 0x02b4 0x3 0x3
+#define IMX7ULP_PAD_PTF9__LPUART6_RTS_B 0x01a4 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF9__LPI2C6_SDA 0x01a4 0x0300 0x5 0x3
+#define IMX7ULP_PAD_PTF9__TPM5_CH0 0x01a4 0x02c4 0x6 0x3
+#define IMX7ULP_PAD_PTF9__FB_AD22 0x01a4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF10__PTF10 0x01a8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF10__USB1_ULPI_STP 0x01a8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF10__VIU_D6 0x01a8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF10__FXIO1_D6 0x01a8 0x021c 0x2 0x2
+#define IMX7ULP_PAD_PTF10__LPSPI2_SCK 0x01a8 0x02ac 0x3 0x3
+#define IMX7ULP_PAD_PTF10__LPUART6_TX 0x01a8 0x0264 0x4 0x3
+#define IMX7ULP_PAD_PTF10__LPI2C6_HREQ 0x01a8 0x02f8 0x5 0x3
+#define IMX7ULP_PAD_PTF10__TPM7_CH3 0x01a8 0x02e8 0x6 0x3
+#define IMX7ULP_PAD_PTF10__FB_AD23 0x01a8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF11__PTF11 0x01ac 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF11__USB1_ULPI_DIR 0x01ac 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF11__VIU_D7 0x01ac 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF11__FXIO1_D7 0x01ac 0x0220 0x2 0x2
+#define IMX7ULP_PAD_PTF11__LPSPI2_PCS0 0x01ac 0x029c 0x3 0x3
+#define IMX7ULP_PAD_PTF11__LPUART6_RX 0x01ac 0x0260 0x4 0x3
+#define IMX7ULP_PAD_PTF11__TPM7_CH4 0x01ac 0x02ec 0x6 0x3
+#define IMX7ULP_PAD_PTF11__FB_CS4_B_FB_TSIZ0_FB_BE31_24_BLS7_0_B 0x01ac 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF12__PTF12 0x01b0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF12__USB1_ULPI_DATA0 0x01b0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF12__VIU_D8 0x01b0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF12__FXIO1_D8 0x01b0 0x0224 0x2 0x2
+#define IMX7ULP_PAD_PTF12__LPSPI3_PCS1 0x01b0 0x0314 0x3 0x3
+#define IMX7ULP_PAD_PTF12__LPUART7_CTS_B 0x01b0 0x0268 0x4 0x3
+#define IMX7ULP_PAD_PTF12__LPI2C7_SCL 0x01b0 0x0308 0x5 0x3
+#define IMX7ULP_PAD_PTF12__TPM7_CH5 0x01b0 0x02f0 0x6 0x3
+#define IMX7ULP_PAD_PTF12__FB_AD24 0x01b0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF13__PTF13 0x01b4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF13__USB1_ULPI_DATA1 0x01b4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF13__VIU_D9 0x01b4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF13__FXIO1_D9 0x01b4 0x0228 0x2 0x2
+#define IMX7ULP_PAD_PTF13__LPSPI3_PCS2 0x01b4 0x0318 0x3 0x3
+#define IMX7ULP_PAD_PTF13__LPUART7_RTS_B 0x01b4 0x0000 0x4 0x0
+#define IMX7ULP_PAD_PTF13__LPI2C7_SDA 0x01b4 0x030c 0x5 0x3
+#define IMX7ULP_PAD_PTF13__TPM7_CLKIN 0x01b4 0x02f4 0x6 0x3
+#define IMX7ULP_PAD_PTF13__FB_AD25 0x01b4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF14__PTF14 0x01b8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF14__USB1_ULPI_DATA2 0x01b8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF14__VIU_D10 0x01b8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF14__FXIO1_D10 0x01b8 0x022c 0x2 0x2
+#define IMX7ULP_PAD_PTF14__LPSPI3_PCS3 0x01b8 0x031c 0x3 0x3
+#define IMX7ULP_PAD_PTF14__LPUART7_TX 0x01b8 0x0270 0x4 0x3
+#define IMX7ULP_PAD_PTF14__LPI2C7_HREQ 0x01b8 0x0304 0x5 0x3
+#define IMX7ULP_PAD_PTF14__TPM7_CH0 0x01b8 0x02dc 0x6 0x3
+#define IMX7ULP_PAD_PTF14__FB_AD26 0x01b8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF15__PTF15 0x01bc 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF15__USB1_ULPI_DATA3 0x01bc 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF15__VIU_D11 0x01bc 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF15__FXIO1_D11 0x01bc 0x0230 0x2 0x2
+#define IMX7ULP_PAD_PTF15__LPUART7_RX 0x01bc 0x026c 0x4 0x3
+#define IMX7ULP_PAD_PTF15__TPM7_CH1 0x01bc 0x02e0 0x6 0x3
+#define IMX7ULP_PAD_PTF15__FB_AD27 0x01bc 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF16__PTF16 0x01c0 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF16__USB1_ULPI_DATA4 0x01c0 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF16__VIU_D12 0x01c0 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF16__FXIO1_D12 0x01c0 0x0234 0x2 0x2
+#define IMX7ULP_PAD_PTF16__LPSPI3_SIN 0x01c0 0x0324 0x3 0x3
+#define IMX7ULP_PAD_PTF16__TPM7_CH2 0x01c0 0x02e4 0x6 0x3
+#define IMX7ULP_PAD_PTF16__FB_AD28 0x01c0 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF17__PTF17 0x01c4 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF17__USB1_ULPI_DATA5 0x01c4 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF17__VIU_D13 0x01c4 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF17__FXIO1_D13 0x01c4 0x0238 0x2 0x2
+#define IMX7ULP_PAD_PTF17__LPSPI3_SOUT 0x01c4 0x0328 0x3 0x3
+#define IMX7ULP_PAD_PTF17__TPM6_CLKIN 0x01c4 0x02d8 0x6 0x3
+#define IMX7ULP_PAD_PTF17__FB_AD29 0x01c4 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF18__PTF18 0x01c8 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF18__USB1_ULPI_DATA6 0x01c8 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF18__VIU_D14 0x01c8 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF18__FXIO1_D14 0x01c8 0x023c 0x2 0x2
+#define IMX7ULP_PAD_PTF18__LPSPI3_SCK 0x01c8 0x0320 0x3 0x3
+#define IMX7ULP_PAD_PTF18__TPM6_CH0 0x01c8 0x02d0 0x6 0x3
+#define IMX7ULP_PAD_PTF18__FB_AD30 0x01c8 0x0000 0x9 0x0
+#define IMX7ULP_PAD_PTF19__PTF19 0x01cc 0x0000 0x1 0x0
+#define IMX7ULP_PAD_PTF19__USB1_ULPI_DATA7 0x01cc 0x0000 0xb 0x0
+#define IMX7ULP_PAD_PTF19__VIU_D15 0x01cc 0x0000 0xc 0x0
+#define IMX7ULP_PAD_PTF19__FXIO1_D15 0x01cc 0x0240 0x2 0x2
+#define IMX7ULP_PAD_PTF19__LPSPI3_PCS0 0x01cc 0x0310 0x3 0x3
+#define IMX7ULP_PAD_PTF19__TPM6_CH1 0x01cc 0x02d4 0x6 0x3
+#define IMX7ULP_PAD_PTF19__FB_AD31 0x01cc 0x0000 0x9 0x0
+
+#endif /* __DTS_IMX7ULP_PINFUNC_H */
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index 7bb9df2c1460..9319e1f0f1d8 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -129,14 +129,14 @@
};
msi1: msi-controller@1570e00 {
- compatible = "fsl,1s1021a-msi";
+ compatible = "fsl,ls1021a-msi";
reg = <0x0 0x1570e00 0x0 0x8>;
msi-controller;
interrupts = <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>;
};
msi2: msi-controller@1570e08 {
- compatible = "fsl,1s1021a-msi";
+ compatible = "fsl,ls1021a-msi";
reg = <0x0 0x1570e08 0x0 0x8>;
msi-controller;
interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>;
@@ -699,7 +699,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */
0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
- msi-parent = <&msi1>;
+ msi-parent = <&msi1>, <&msi2>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0000 0 0 1 &gic GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
@@ -722,7 +722,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000 /* downstream I/O */
0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
- msi-parent = <&msi2>;
+ msi-parent = <&msi1>, <&msi2>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0000 0 0 1 &gic GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/omap2420-n8x0-common.dtsi b/arch/arm/boot/dts/omap2420-n8x0-common.dtsi
index 7e5ffc583c90..91886231e5a8 100644
--- a/arch/arm/boot/dts/omap2420-n8x0-common.dtsi
+++ b/arch/arm/boot/dts/omap2420-n8x0-common.dtsi
@@ -15,8 +15,8 @@
>;
#address-cells = <1>;
#size-cells = <0>;
- retu_mfd: retu@1 {
- compatible = "retu-mfd";
+ retu: retu@1 {
+ compatible = "nokia,retu";
interrupt-parent = <&gpio4>;
interrupts = <12 IRQ_TYPE_EDGE_RISING>;
reg = <0x1>;
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index df3366fa5409..cb47ae79a5f9 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -265,6 +265,20 @@
&i2c2 {
clock-frequency = <400000>;
+
+ as3645a@30 {
+ reg = <0x30>;
+ compatible = "ams,as3645a";
+ flash {
+ flash-timeout-us = <150000>;
+ flash-max-microamp = <320000>;
+ led-max-microamp = <60000>;
+ peak-current-limit = <1750000>;
+ };
+ indicator {
+ led-max-microamp = <10000>;
+ };
+ };
};
&i2c3 {
diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts
index 58834330a5ba..1be9daacc4f9 100644
--- a/arch/arm/boot/dts/rk3228-evb.dts
+++ b/arch/arm/boot/dts/rk3228-evb.dts
@@ -50,6 +50,16 @@
device_type = "memory";
reg = <0x60000000 0x40000000>;
};
+
+ vcc_phy: vcc-phy-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ regulator-name = "vcc_phy";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
};
&emmc {
@@ -60,6 +70,30 @@
status = "okay";
};
+&gmac {
+ assigned-clocks = <&cru SCLK_MAC_SRC>;
+ assigned-clock-rates = <50000000>;
+ clock_in_out = "output";
+ phy-supply = <&vcc_phy>;
+ phy-mode = "rmii";
+ phy-handle = <&phy>;
+ status = "okay";
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy: phy@0 {
+ compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ clocks = <&cru SCLK_MAC_PHY>;
+ resets = <&cru SRST_MACPHY>;
+ phy-is-integrated;
+ };
+ };
+};
+
&tsadc {
status = "okay";
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index cc06da394366..60e69aeacbdb 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -303,7 +303,7 @@
#size-cells = <1>;
atmel,smc = <&hsmc>;
reg = <0x10000000 0x10000000
- 0x40000000 0x30000000>;
+ 0x60000000 0x30000000>;
ranges = <0x0 0x0 0x10000000 0x10000000
0x1 0x0 0x60000000 0x10000000
0x2 0x0 0x70000000 0x10000000
@@ -1048,18 +1048,18 @@
};
hsmc: hsmc@f8014000 {
- compatible = "atmel,sama5d3-smc", "syscon", "simple-mfd";
+ compatible = "atmel,sama5d2-smc", "syscon", "simple-mfd";
reg = <0xf8014000 0x1000>;
- interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH 6>;
clocks = <&hsmc_clk>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
- pmecc: ecc-engine@ffffc070 {
+ pmecc: ecc-engine@f8014070 {
compatible = "atmel,sama5d2-pmecc";
- reg = <0xffffc070 0x490>,
- <0xffffc500 0x100>;
+ reg = <0xf8014070 0x490>,
+ <0xf8014500 0x100>;
};
};
diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi
index 5882a2606ac3..3f14b4df69b4 100644
--- a/arch/arm/boot/dts/ste-hrefprev60.dtsi
+++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi
@@ -30,7 +30,7 @@
i2c@80004000 {
tps61052@33 {
- compatible = "tps61052";
+ compatible = "ti,tps61052";
reg = <0x33>;
};
diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
index 6713d0f2b3f4..b1502df7b509 100644
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -56,8 +56,6 @@
aliases {
serial0 = &uart0;
- /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
- ethernet0 = &emac;
ethernet1 = &xr819;
};
@@ -104,13 +102,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts b/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts
index d756ff825116..a337af1de322 100644
--- a/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts
@@ -52,7 +52,6 @@
compatible = "sinovoip,bpi-m2-plus", "allwinner,sun8i-h3";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
serial1 = &uart1;
};
@@ -115,30 +114,12 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
-
- allwinner,leds-active-low;
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
status = "okay";
};
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <0>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
index 78f6c24952dd..8d2cc6e9a03f 100644
--- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
@@ -46,10 +46,3 @@
model = "FriendlyARM NanoPi NEO";
compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3";
};
-
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
index 17cdeae19c6f..8ff71b1bb45b 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
@@ -54,7 +54,6 @@
aliases {
serial0 = &uart0;
/* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
- ethernet0 = &emac;
ethernet1 = &rtl8189;
};
@@ -118,13 +117,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 6880268e8b87..5fea430e0eb1 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -52,7 +52,6 @@
compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -98,13 +97,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
index a10281b455f5..8b93f5c781a7 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
@@ -53,11 +53,6 @@
};
};
-&emac {
- /* LEDs changed to active high on the plus */
- /delete-property/ allwinner,leds-active-low;
-};
-
&mmc1 {
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
index 998b60f8d295..1a044b17d6c6 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -52,7 +52,6 @@
compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -114,13 +113,6 @@
status = "okay";
};
-&emac {
- phy-handle = <&int_mii_phy>;
- phy-mode = "mii";
- allwinner,leds-active-low;
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts
index 331ed683ac62..828ae7a526d9 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts
@@ -47,10 +47,6 @@
model = "Xunlong Orange Pi Plus / Plus 2";
compatible = "xunlong,orangepi-plus", "allwinner,sun8i-h3";
- aliases {
- ethernet0 = &emac;
- };
-
reg_gmac_3v3: gmac-3v3 {
compatible = "regulator-fixed";
regulator-name = "gmac-3v3";
@@ -78,24 +74,6 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
-
- allwinner,leds-active-low;
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <0>;
- };
-};
-
&mmc2 {
pinctrl-names = "default";
pinctrl-0 = <&mmc2_8bit_pins>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
index 80026f3caafc..97920b12a944 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
@@ -61,19 +61,3 @@
gpio = <&pio 3 6 GPIO_ACTIVE_HIGH>; /* PD6 */
};
};
-
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index d38282b9e5d4..11240a8313c2 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -391,32 +391,6 @@
clocks = <&osc24M>;
};
- emac: ethernet@1c30000 {
- compatible = "allwinner,sun8i-h3-emac";
- syscon = <&syscon>;
- reg = <0x01c30000 0x10000>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
- resets = <&ccu RST_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu CLK_BUS_EMAC>;
- clock-names = "stmmaceth";
- #address-cells = <1>;
- #size-cells = <0>;
- status = "disabled";
-
- mdio: mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- int_mii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- clocks = <&ccu CLK_BUS_EPHY>;
- resets = <&ccu RST_BUS_EPHY>;
- };
- };
- };
-
spi0: spi@01c68000 {
compatible = "allwinner,sun8i-h3-spi";
reg = <0x01c68000 0x1000>;
diff --git a/arch/arm/boot/dts/tango4-smp8758.dtsi b/arch/arm/boot/dts/tango4-smp8758.dtsi
index d2e65c46bcc7..eca33d568690 100644
--- a/arch/arm/boot/dts/tango4-smp8758.dtsi
+++ b/arch/arm/boot/dts/tango4-smp8758.dtsi
@@ -13,7 +13,6 @@
reg = <0>;
clocks = <&clkgen CPU_CLK>;
clock-latency = <1>;
- operating-points = <1215000 0 607500 0 405000 0 243000 0 135000 0>;
};
cpu1: cpu@1 {
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index e74de69caeab..1736813bdea7 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -226,7 +226,7 @@ CONFIG_REGULATOR_MC13892=y
CONFIG_REGULATOR_PFUZE100=y
CONFIG_MEDIA_SUPPORT=y
CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
CONFIG_RC_DEVICES=y
CONFIG_IR_GPIO_CIR=y
CONFIG_MEDIA_USB_SUPPORT=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 4d19c1b4b8e7..94d7e71c69c4 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -270,6 +270,7 @@ CONFIG_ICPLUS_PHY=y
CONFIG_REALTEK_PHY=y
CONFIG_MICREL_PHY=y
CONFIG_FIXED_PHY=y
+CONFIG_ROCKCHIP_PHY=y
CONFIG_USB_PEGASUS=y
CONFIG_USB_RTL8152=m
CONFIG_USB_USBNET=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index a120ae816260..0414acf731ce 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -304,7 +304,7 @@ CONFIG_REGULATOR_TPS65910=y
CONFIG_REGULATOR_TWL4030=y
CONFIG_MEDIA_SUPPORT=m
CONFIG_MEDIA_CAMERA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=m
CONFIG_MEDIA_CONTROLLER=y
CONFIG_VIDEO_V4L2_SUBDEV_API=y
CONFIG_LIRC=m
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 0ec1d1ec130f..22cd559531a9 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -95,7 +95,7 @@ CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_REGULATOR_AXP20X=y
CONFIG_REGULATOR_GPIO=y
CONFIG_MEDIA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
CONFIG_RC_DEVICES=y
CONFIG_IR_SUNXI=y
CONFIG_DRM=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b9adedcc5b2e..ec72752d5668 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
ARMv8 Crypto Extensions
config CRYPTO_GHASH_ARM_CE
- tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+ tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
help
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
- that is part of the ARMv8 Crypto Extensions
+ that is part of the ARMv8 Crypto Extensions, or a slower variant that
+ uses the vmull.p8 instruction that is part of the basic NEON ISA.
config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 0f966a8ca1ce..d0a9cec73707 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
num_rounds(ctx), blocks, walk.iv);
- if (tdst != tsrc)
- memcpy(tdst, tsrc, nbytes);
- crypto_xor(tdst, tail, nbytes);
+ crypto_xor_cpy(tdst, tsrc, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index c817a86c4ca8..54b384084637 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/cache.h>
.text
.align 5
@@ -32,19 +33,19 @@
.endif
.endm
- .macro __load, out, in, idx
+ .macro __load, out, in, idx, sz, op
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
- ldr \out, [ttab, \in, lsr #(8 * \idx) - 2]
+ ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
.else
- ldr \out, [ttab, \in, lsl #2]
+ ldr\op \out, [ttab, \in, lsl #\sz]
.endif
.endm
- .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
__select \out0, \in0, 0
__select t0, \in1, 1
- __load \out0, \out0, 0
- __load t0, t0, 1
+ __load \out0, \out0, 0, \sz, \op
+ __load t0, t0, 1, \sz, \op
.if \enc
__select \out1, \in1, 0
@@ -53,10 +54,10 @@
__select \out1, \in3, 0
__select t1, \in0, 1
.endif
- __load \out1, \out1, 0
+ __load \out1, \out1, 0, \sz, \op
__select t2, \in2, 2
- __load t1, t1, 1
- __load t2, t2, 2
+ __load t1, t1, 1, \sz, \op
+ __load t2, t2, 2, \sz, \op
eor \out0, \out0, t0, ror #24
@@ -68,9 +69,9 @@
__select \t3, \in1, 2
__select \t4, \in2, 3
.endif
- __load \t3, \t3, 2
- __load t0, t0, 3
- __load \t4, \t4, 3
+ __load \t3, \t3, 2, \sz, \op
+ __load t0, t0, 3, \sz, \op
+ __load \t4, \t4, 3, \sz, \op
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
eor \out1, \out1, t2
.endm
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
.macro __rev, out, in
@@ -114,7 +115,7 @@
.endif
.endm
- .macro do_crypt, round, ttab, ltab
+ .macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
ldr r4, [in]
@@ -146,9 +147,12 @@
1: subs rounds, rounds, #4
\round r8, r9, r10, r11, r4, r5, r6, r7
- __adrl ttab, \ltab, ls
+ bls 2f
\round r4, r5, r6, r7, r8, r9, r10, r11
- bhi 0b
+ b 0b
+
+2: __adrl ttab, \ltab
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
@@ -170,10 +174,48 @@
.ltorg
.endm
+ .align L1_CACHE_SHIFT
+ .type __aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
ENTRY(__aes_arm_encrypt)
- do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
ENDPROC(__aes_arm_encrypt)
+ .align 5
ENTRY(__aes_arm_decrypt)
- do_crypt iround, crypto_it_tab, crypto_il_tab
+ do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index c76377961444..18768f330449 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
- if (dst != src)
- memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
- crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, final,
+ walk.total % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, 0);
break;
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index f6ab8bcc9efe..2f78c10b1881 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
/*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
*
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -12,40 +12,162 @@
#include <asm/assembler.h>
SHASH .req q0
- SHASH2 .req q1
- T1 .req q2
- T2 .req q3
- MASK .req q4
- XL .req q5
- XM .req q6
- XH .req q7
- IN1 .req q7
+ T1 .req q1
+ XL .req q2
+ XM .req q3
+ XH .req q4
+ IN1 .req q4
SHASH_L .req d0
SHASH_H .req d1
- SHASH2_L .req d2
- T1_L .req d4
- MASK_L .req d8
- XL_L .req d10
- XL_H .req d11
- XM_L .req d12
- XM_H .req d13
- XH_L .req d14
+ T1_L .req d2
+ T1_H .req d3
+ XL_L .req d4
+ XL_H .req d5
+ XM_L .req d6
+ XM_H .req d7
+ XH_L .req d8
+
+ t0l .req d10
+ t0h .req d11
+ t1l .req d12
+ t1h .req d13
+ t2l .req d14
+ t2h .req d15
+ t3l .req d16
+ t3h .req d17
+ t4l .req d18
+ t4h .req d19
+
+ t0q .req q5
+ t1q .req q6
+ t2q .req q7
+ t3q .req q8
+ t4q .req q9
+ T2 .req q9
+
+ s1l .req d20
+ s1h .req d21
+ s2l .req d22
+ s2h .req d23
+ s3l .req d24
+ s3h .req d25
+ s4l .req d26
+ s4h .req d27
+
+ MASK .req d28
+ SHASH2_p8 .req d28
+
+ k16 .req d29
+ k32 .req d30
+ k48 .req d31
+ SHASH2_p64 .req d31
.text
.fpu crypto-neon-fp-armv8
+ .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
+ vmull.p64 \rd, \rn, \rm
+ .endm
+
/*
- * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- * struct ghash_key const *k, const char *head)
+ * This implementation of 64x64 -> 128 bit polynomial multiplication
+ * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+ * "Fast Software Polynomial Multiplication on ARM Processors Using
+ * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+ * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+ *
+ * It has been slightly tweaked for in-order performance, and to allow
+ * 'rq' to overlap with 'ad' or 'bd'.
*/
-ENTRY(pmull_ghash_update)
- vld1.64 {SHASH}, [r3]
+ .macro __pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+ vext.8 t0l, \ad, \ad, #1 @ A1
+ .ifc \b1, t4l
+ vext.8 t4l, \bd, \bd, #1 @ B1
+ .endif
+ vmull.p8 t0q, t0l, \bd @ F = A1*B
+ vext.8 t1l, \ad, \ad, #2 @ A2
+ vmull.p8 t4q, \ad, \b1 @ E = A*B1
+ .ifc \b2, t3l
+ vext.8 t3l, \bd, \bd, #2 @ B2
+ .endif
+ vmull.p8 t1q, t1l, \bd @ H = A2*B
+ vext.8 t2l, \ad, \ad, #3 @ A3
+ vmull.p8 t3q, \ad, \b2 @ G = A*B2
+ veor t0q, t0q, t4q @ L = E + F
+ .ifc \b3, t4l
+ vext.8 t4l, \bd, \bd, #3 @ B3
+ .endif
+ vmull.p8 t2q, t2l, \bd @ J = A3*B
+ veor t0l, t0l, t0h @ t0 = (L) (P0 + P1) << 8
+ veor t1q, t1q, t3q @ M = G + H
+ .ifc \b4, t3l
+ vext.8 t3l, \bd, \bd, #4 @ B4
+ .endif
+ vmull.p8 t4q, \ad, \b3 @ I = A*B3
+ veor t1l, t1l, t1h @ t1 = (M) (P2 + P3) << 16
+ vmull.p8 t3q, \ad, \b4 @ K = A*B4
+ vand t0h, t0h, k48
+ vand t1h, t1h, k32
+ veor t2q, t2q, t4q @ N = I + J
+ veor t0l, t0l, t0h
+ veor t1l, t1l, t1h
+ veor t2l, t2l, t2h @ t2 = (N) (P4 + P5) << 24
+ vand t2h, t2h, k16
+ veor t3l, t3l, t3h @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 t3h, #0
+ vext.8 t0q, t0q, t0q, #15
+ veor t2l, t2l, t2h
+ vext.8 t1q, t1q, t1q, #14
+ vmull.p8 \rq, \ad, \bd @ D = A*B
+ vext.8 t2q, t2q, t2q, #13
+ vext.8 t3q, t3q, t3q, #12
+ veor t0q, t0q, t1q
+ veor t2q, t2q, t3q
+ veor \rq, \rq, t0q
+ veor \rq, \rq, t2q
+ .endm
+
+ //
+ // PMULL (64x64->128) based reduction for CPUs that can do
+ // it in a single instruction.
+ //
+ .macro __pmull_reduce_p64
+ vmull.p64 T1, XL_L, MASK
+
+ veor XH_L, XH_L, XM_H
+ vext.8 T1, T1, T1, #8
+ veor XL_H, XL_H, XM_L
+ veor T1, T1, XL
+
+ vmull.p64 XL, T1_H, MASK
+ .endm
+
+ //
+ // Alternative reduction for CPUs that lack support for the
+ // 64x64->128 PMULL instruction
+ //
+ .macro __pmull_reduce_p8
+ veor XL_H, XL_H, XM_L
+ veor XH_L, XH_L, XM_H
+
+ vshl.i64 T1, XL, #57
+ vshl.i64 T2, XL, #62
+ veor T1, T1, T2
+ vshl.i64 T2, XL, #63
+ veor T1, T1, T2
+ veor XL_H, XL_H, T1_L
+ veor XH_L, XH_L, T1_H
+
+ vshr.u64 T1, XL, #1
+ veor XH, XH, XL
+ veor XL, XL, T1
+ vshr.u64 T1, T1, #6
+ vshr.u64 XL, XL, #1
+ .endm
+
+ .macro ghash_update, pn
vld1.64 {XL}, [r1]
- vmov.i8 MASK, #0xe1
- vext.8 SHASH2, SHASH, SHASH, #8
- vshl.u64 MASK, MASK, #57
- veor SHASH2, SHASH2, SHASH
/* do the head block first, if supplied */
ldr ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
#endif
- vext.8 T2, XL, XL, #8
vext.8 IN1, T1, T1, #8
- veor T1, T1, T2
+ veor T1_L, T1_L, XL_H
veor XL, XL, IN1
- vmull.p64 XH, SHASH_H, XL_H @ a1 * b1
+ __pmull_\pn XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h @ a1 * b1
veor T1, T1, XL
- vmull.p64 XL, SHASH_L, XL_L @ a0 * b0
- vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0)
+ __pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
+ __pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
- vext.8 T1, XL, XH, #8
- veor T2, XL, XH
+ veor T1, XL, XH
veor XM, XM, T1
- veor XM, XM, T2
- vmull.p64 T2, XL_L, MASK_L
- vmov XH_L, XM_H
- vmov XM_H, XL_L
+ __pmull_reduce_\pn
- veor XL, XM, T2
- vext.8 T2, XL, XL, #8
- vmull.p64 XL, XL_L, MASK_L
- veor T2, T2, XH
- veor XL, XL, T2
+ veor T1, T1, XH
+ veor XL, XL, T1
bne 0b
vst1.64 {XL}, [r1]
bx lr
-ENDPROC(pmull_ghash_update)
+ .endm
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update_p64)
+ vld1.64 {SHASH}, [r3]
+ veor SHASH2_p64, SHASH_L, SHASH_H
+
+ vmov.i8 MASK, #0xe1
+ vshl.u64 MASK, MASK, #57
+
+ ghash_update p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+ vld1.64 {SHASH}, [r3]
+ veor SHASH2_p8, SHASH_L, SHASH_H
+
+ vext.8 s1l, SHASH_L, SHASH_L, #1
+ vext.8 s2l, SHASH_L, SHASH_L, #2
+ vext.8 s3l, SHASH_L, SHASH_L, #3
+ vext.8 s4l, SHASH_L, SHASH_L, #4
+ vext.8 s1h, SHASH_H, SHASH_H, #1
+ vext.8 s2h, SHASH_H, SHASH_H, #2
+ vext.8 s3h, SHASH_H, SHASH_H, #3
+ vext.8 s4h, SHASH_H, SHASH_H, #4
+
+ vmov.i64 k16, #0xffff
+ vmov.i64 k32, #0xffffffff
+ vmov.i64 k48, #0xffffffffffff
+
+ ghash_update p8
+ENDPROC(pmull_ghash_update_p8)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 6bac8bea9f1e..d9bb52cae2ac 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -22,6 +22,7 @@
MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
static int ghash_init(struct shash_desc *desc)
{
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
{
int err;
+ if (!(elf_hwcap & HWCAP_NEON))
+ return -ENODEV;
+
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ pmull_ghash_update = pmull_ghash_update_p64;
+ else
+ pmull_ghash_update = pmull_ghash_update_p8;
+
err = crypto_register_shash(&ghash_alg);
if (err)
return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 27475904e096..eee269321923 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -276,6 +276,12 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
#define gicr_write_pendbaser(v, c) __gic_writeq_nonatomic(v, c)
/*
+ * GICR_xLPIR - only the lower bits are significant
+ */
+#define gic_read_lpir(c) readl_relaxed(c)
+#define gic_write_lpir(v, c) writel_relaxed(lower_32_bits(v), c)
+
+/*
* GITS_TYPER is an ID register and doesn't need atomicity.
*/
#define gits_read_typer(c) __gic_readq_nonatomic(c)
@@ -291,5 +297,33 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr)
*/
#define gits_write_cwriter(v, c) __gic_writeq_nonatomic(v, c)
+/*
+ * GITS_VPROPBASER - hi and lo bits may be accessed independently.
+ */
+#define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c)
+
+/*
+ * GITS_VPENDBASER - the Valid bit must be cleared before changing
+ * anything else.
+ */
+static inline void gits_write_vpendbaser(u64 val, void * __iomem addr)
+{
+ u32 tmp;
+
+ tmp = readl_relaxed(addr + 4);
+ if (tmp & (GICR_VPENDBASER_Valid >> 32)) {
+ tmp &= ~(GICR_VPENDBASER_Valid >> 32);
+ writel_relaxed(tmp, addr + 4);
+ }
+
+ /*
+ * Use the fact that __gic_writeq_nonatomic writes the second
+ * half of the 64bit quantity after the first.
+ */
+ __gic_writeq_nonatomic(val, addr);
+}
+
+#define gits_read_vpendbaser(c) __gic_readq_nonatomic(c)
+
#endif /* !__ASSEMBLY__ */
#endif /* !__ASM_ARCH_GICV3_H */
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 6795368ad023..cc414382dab4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -128,20 +128,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
#endif /* !SMP */
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
#ifndef CONFIG_SMP
preempt_disable();
#endif
@@ -172,17 +162,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
preempt_enable();
#endif
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index ebf020b02bc8..c8781450905b 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -227,7 +227,6 @@
#define HSR_DABT_S1PTW (_AC(1, UL) << 7)
#define HSR_DABT_CM (_AC(1, UL) << 8)
-#define HSR_DABT_EA (_AC(1, UL) << 9)
#define kvm_arm_exception_type \
{0, "RESET" }, \
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9a8a45aaf19a..98089ffd91bb 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu)
return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
-{
- return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA;
-}
-
static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW;
@@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
}
+static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu)
+{
+ switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+ case FSC_SEA:
+ case FSC_SEA_TTW0:
+ case FSC_SEA_TTW1:
+ case FSC_SEA_TTW2:
+ case FSC_SEA_TTW3:
+ case FSC_SECC:
+ case FSC_SECC_TTW0:
+ case FSC_SECC_TTW1:
+ case FSC_SECC_TTW2:
+ case FSC_SECC_TTW3:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 127e2dd2e21c..4a879f6ff13b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,12 +225,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-/* We do not have shadow page tables, hence the empty hooks */
-static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
-{
-}
-
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 4bec45442072..c030143c18c6 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -52,22 +52,6 @@ static inline void dsb_sev(void)
* memory.
*/
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- u16 owner = READ_ONCE(lock->tickets.owner);
-
- for (;;) {
- arch_spinlock_t tmp = READ_ONCE(*lock);
-
- if (tmp.tickets.owner == tmp.tickets.next ||
- tmp.tickets.owner != owner)
- break;
-
- wfe();
- }
- smp_acquire__after_ctrl_dep();
-}
-
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..fe1c6af3a1b1 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,20 @@ extern void * memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8, v >> 32);
+}
+
extern void __memzero(void *ptr, __kernel_size_t n);
#define memset(p,v,n) \
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 776757d1604a..1d468b527b7b 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,10 +139,11 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_UPROBE 3 /* breakpointed or singlestepping */
-#define TIF_SYSCALL_TRACE 4 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
+#define TIF_FSCHECK 4 /* Check FS is USER_DS on return */
+#define TIF_SYSCALL_TRACE 5 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT 7 /* syscall tracepoint instrumentation */
+#define TIF_SECCOMP 8 /* seccomp syscall filtering active */
#define TIF_NOHZ 12 /* in adaptive nohz mode */
#define TIF_USING_IWMMXT 17
@@ -153,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
@@ -166,8 +168,9 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
/*
* Change these and you break ASM code in entry-common.S
*/
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_UPROBE)
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ _TIF_FSCHECK)
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3f2eb76243e3..d5562f9ce600 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
}
static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
tlb->mm = mm;
tlb->fullmm = !(start | (end+1));
@@ -166,8 +167,14 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
}
static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
{
+ if (force) {
+ tlb->range_start = start;
+ tlb->range_end = end;
+ }
+
tlb_flush_mmu(tlb);
/* keep the page table cache within bounds */
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f555bb3664dc..683d9230984a 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -18,7 +18,6 @@ struct undef_hook {
void register_undef_hook(struct undef_hook *hook);
void unregister_undef_hook(struct undef_hook *hook);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
extern char __irqentry_text_start[];
@@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 0bf2347495f1..87936dd5d151 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -70,6 +70,8 @@ static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
+ /* On user-mode return, check fs is correct */
+ set_thread_flag(TIF_FSCHECK);
}
#define segment_eq(a, b) ((a) == (b))
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..5266fd9ad6b4 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,8 @@ EXPORT_SYMBOL(__raw_writesl);
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index eb5cd77bf1d8..e33c32d56193 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -41,7 +41,9 @@ ret_fast_syscall:
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #_TIF_SYSCALL_WORK
+ bne fast_work_pending
+ tst r1, #_TIF_WORK_MASK
bne fast_work_pending
/* perform architecture specific actions before user return */
@@ -67,12 +69,15 @@ ret_fast_syscall:
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+ tst r1, #_TIF_SYSCALL_WORK
+ bne fast_work_pending
+ tst r1, #_TIF_WORK_MASK
beq no_work_pending
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
/* Slower path - fall through to work_pending */
+fast_work_pending:
#endif
tst r1, #_TIF_SYSCALL_WORK
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5814298ef0b7..e2de50bf8742 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/uprobes.h>
+#include <linux/syscalls.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
@@ -613,6 +614,10 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
* Update the trace code with the current status.
*/
trace_hardirqs_off();
+
+ /* Check valid user FS if needed */
+ addr_limit_user_check();
+
do {
if (likely(thread_flags & _TIF_NEED_RESCHED)) {
schedule();
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 54442e375354..cf8bf6bf87c4 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
trace_kvm_wfx(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
- kvm_vcpu_on_spin(vcpu);
+ kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else {
trace_kvm_wfx(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..ed6d35d9cdb5 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -28,7 +28,7 @@ UNWIND( .fnstart )
1: orr r1, r1, r1, lsl #8
orr r1, r1, r1, lsl #16
mov r3, r1
- cmp r2, #16
+7: cmp r2, #16
blt 4f
#if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r8, lr} )
mov r8, r1
- mov lr, r1
+ mov lr, r3
2: subs r2, r2, #64
stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r4-r8, lr} )
mov r4, r1
- mov r5, r1
+ mov r5, r3
mov r6, r1
- mov r7, r1
+ mov r7, r3
mov r8, r1
- mov lr, r1
+ mov lr, r3
cmp r2, #96
tstgt ip, #31
@@ -114,7 +114,7 @@ UNWIND( .fnstart )
tst r2, #4
strne r1, [ip], #4
/*
- * When we get here, we've got less than 4 bytes to zero. We
+ * When we get here, we've got less than 4 bytes to set. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
@@ -135,3 +135,15 @@ UNWIND( .fnstart )
UNWIND( .fnend )
ENDPROC(memset)
ENDPROC(mmioset)
+
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index d735e5fc4772..195da38cb9a2 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -1,7 +1,7 @@
menuconfig ARCH_AT91
bool "Atmel SoCs"
depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7 || ARM_SINGLE_ARMV7M
- select ARM_CPU_SUSPEND if PM
+ select ARM_CPU_SUSPEND if PM && ARCH_MULTI_V7
select COMMON_CLK_AT91
select GPIOLIB
select PINCTRL
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 667fddac3856..5036f996e694 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -608,6 +608,9 @@ static void __init at91_pm_init(void (*pm_idle)(void))
void __init at91rm9200_pm_init(void)
{
+ if (!IS_ENABLED(CONFIG_SOC_AT91RM9200))
+ return;
+
at91_dt_ramc();
/*
@@ -620,18 +623,27 @@ void __init at91rm9200_pm_init(void)
void __init at91sam9_pm_init(void)
{
+ if (!IS_ENABLED(CONFIG_SOC_AT91SAM9))
+ return;
+
at91_dt_ramc();
at91_pm_init(at91sam9_idle);
}
void __init sama5_pm_init(void)
{
+ if (!IS_ENABLED(CONFIG_SOC_SAMA5))
+ return;
+
at91_dt_ramc();
at91_pm_init(NULL);
}
void __init sama5d2_pm_init(void)
{
+ if (!IS_ENABLED(CONFIG_SOC_SAMA5D2))
+ return;
+
at91_pm_backup_init();
sama5_pm_init();
}
diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig
index a3b091a4d344..65a048fa08ec 100644
--- a/arch/arm/mach-hisi/Kconfig
+++ b/arch/arm/mach-hisi/Kconfig
@@ -39,6 +39,7 @@ config ARCH_HIP04
select HAVE_ARM_ARCH_TIMER
select MCPM if SMP
select MCPM_QUAD_CLUSTER if SMP
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK
help
Support for Hisilicon HiP04 SoC family
diff --git a/arch/arm/mach-omap1/board-h2-mmc.c b/arch/arm/mach-omap1/board-h2-mmc.c
index 357be2debc9d..91bda9c802ff 100644
--- a/arch/arm/mach-omap1/board-h2-mmc.c
+++ b/arch/arm/mach-omap1/board-h2-mmc.c
@@ -14,7 +14,7 @@
#include <linux/gpio.h>
#include <linux/platform_device.h>
#include <linux/platform_data/gpio-omap.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include "board-h2.h"
#include "mmc.h"
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 675254ee4b1e..dece47d76282 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -28,7 +28,7 @@
#include <linux/mtd/partitions.h>
#include <linux/mtd/physmap.h>
#include <linux/input.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include <linux/smc91x.h>
#include <linux/omapfb.h>
#include <linux/platform_data/gpio-omap.h>
diff --git a/arch/arm/mach-omap1/board-h3-mmc.c b/arch/arm/mach-omap1/board-h3-mmc.c
index 4f58bfa5e754..692c267a9a90 100644
--- a/arch/arm/mach-omap1/board-h3-mmc.c
+++ b/arch/arm/mach-omap1/board-h3-mmc.c
@@ -14,7 +14,7 @@
#include <linux/gpio.h>
#include <linux/platform_device.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include "common.h"
#include "board-h3.h"
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index e62f9d454f10..6d32beeb2d88 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -28,7 +28,7 @@
#include <linux/mtd/physmap.h>
#include <linux/input.h>
#include <linux/spi/spi.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include <linux/smc91x.h>
#include <linux/omapfb.h>
#include <linux/platform_data/gpio-omap.h>
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index ee8d9f553db4..06243c0b12d2 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -233,10 +233,10 @@ static struct platform_device nokia770_cbus_device = {
static struct i2c_board_info nokia770_i2c_board_info_2[] __initdata = {
{
- I2C_BOARD_INFO("retu-mfd", 0x01),
+ I2C_BOARD_INFO("retu", 0x01),
},
{
- I2C_BOARD_INFO("tahvo-mfd", 0x02),
+ I2C_BOARD_INFO("tahvo", 0x02),
},
};
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index 95ac1929aede..d579f4e04137 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -38,7 +38,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/mtd/physmap.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include <linux/platform_data/gpio-omap.h>
#include <linux/platform_data/omap1_bl.h>
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 779fb1f680b3..b3b3b3a19183 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -I$(srctree)/$(src)/include \
# Common support
obj-y := id.o io.o control.o devices.o fb.o timer.o pm.o \
common.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \
- omap_device.o omap-headsmp.o sram.o drm.o
+ omap_device.o omap-headsmp.o sram.o
hwmod-common = omap_hwmod.o omap_hwmod_reset.o \
omap_hwmod_common_data.o
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index b1e661bb5521..583fc39d84cd 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -33,6 +33,7 @@ static void __init __maybe_unused omap_generic_init(void)
pdata_quirks_init(omap_dt_match_table);
omapdss_init_of();
+ omap_soc_device_init();
}
#ifdef CONFIG_SOC_OMAP2420
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 8cc6338fcb12..b5ad7fcb80ed 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -29,7 +29,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/i2c.h>
-#include <linux/i2c/twl.h>
+#include <linux/mfd/twl.h>
#include <linux/i2c-omap.h>
#include <linux/reboot.h>
#include <linux/irqchip/irq-omap-intc.h>
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 8fa01c0ecdb2..b3f6eb5d04a2 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -66,6 +66,7 @@
*/
#define FRAMEDONE_IRQ_TIMEOUT 100
+#if defined(CONFIG_FB_OMAP2)
static struct platform_device omap_display_device = {
.name = "omapdss",
.id = -1,
@@ -163,6 +164,65 @@ static enum omapdss_version __init omap_display_get_version(void)
return OMAPDSS_VER_UNKNOWN;
}
+static int __init omapdss_init_fbdev(void)
+{
+ static struct omap_dss_board_info board_data = {
+ .dsi_enable_pads = omap_dsi_enable_pads,
+ .dsi_disable_pads = omap_dsi_disable_pads,
+ .set_min_bus_tput = omap_dss_set_min_bus_tput,
+ };
+ struct device_node *node;
+ int r;
+
+ board_data.version = omap_display_get_version();
+ if (board_data.version == OMAPDSS_VER_UNKNOWN) {
+ pr_err("DSS not supported on this SoC\n");
+ return -ENODEV;
+ }
+
+ omap_display_device.dev.platform_data = &board_data;
+
+ r = platform_device_register(&omap_display_device);
+ if (r < 0) {
+ pr_err("Unable to register omapdss device\n");
+ return r;
+ }
+
+ /* create vrfb device */
+ r = omap_init_vrfb();
+ if (r < 0) {
+ pr_err("Unable to register omapvrfb device\n");
+ return r;
+ }
+
+ /* create FB device */
+ r = omap_init_fb();
+ if (r < 0) {
+ pr_err("Unable to register omapfb device\n");
+ return r;
+ }
+
+ /* create V4L2 display device */
+ r = omap_init_vout();
+ if (r < 0) {
+ pr_err("Unable to register omap_vout device\n");
+ return r;
+ }
+
+ /* add DSI info for omap4 */
+ node = of_find_node_by_name(NULL, "omap4_padconf_global");
+ if (node)
+ omap4_dsi_mux_syscon = syscon_node_to_regmap(node);
+
+ return 0;
+}
+#else
+static inline int omapdss_init_fbdev(void)
+{
+ return 0;
+}
+#endif /* CONFIG_FB_OMAP2 */
+
static void dispc_disable_outputs(void)
{
u32 v, irq_mask = 0;
@@ -335,16 +395,9 @@ static struct device_node * __init omapdss_find_dss_of_node(void)
int __init omapdss_init_of(void)
{
int r;
- enum omapdss_version ver;
struct device_node *node;
struct platform_device *pdev;
- static struct omap_dss_board_info board_data = {
- .dsi_enable_pads = omap_dsi_enable_pads,
- .dsi_disable_pads = omap_dsi_disable_pads,
- .set_min_bus_tput = omap_dss_set_min_bus_tput,
- };
-
/* only create dss helper devices if dss is enabled in the .dts */
node = omapdss_find_dss_of_node();
@@ -354,13 +407,6 @@ int __init omapdss_init_of(void)
if (!of_device_is_available(node))
return 0;
- ver = omap_display_get_version();
-
- if (ver == OMAPDSS_VER_UNKNOWN) {
- pr_err("DSS not supported on this SoC\n");
- return -ENODEV;
- }
-
pdev = of_find_device_by_node(node);
if (!pdev) {
@@ -374,48 +420,5 @@ int __init omapdss_init_of(void)
return r;
}
- board_data.version = ver;
-
- omap_display_device.dev.platform_data = &board_data;
-
- r = platform_device_register(&omap_display_device);
- if (r < 0) {
- pr_err("Unable to register omapdss device\n");
- return r;
- }
-
- /* create DRM device */
- r = omap_init_drm();
- if (r < 0) {
- pr_err("Unable to register omapdrm device\n");
- return r;
- }
-
- /* create vrfb device */
- r = omap_init_vrfb();
- if (r < 0) {
- pr_err("Unable to register omapvrfb device\n");
- return r;
- }
-
- /* create FB device */
- r = omap_init_fb();
- if (r < 0) {
- pr_err("Unable to register omapfb device\n");
- return r;
- }
-
- /* create V4L2 display device */
- r = omap_init_vout();
- if (r < 0) {
- pr_err("Unable to register omap_vout device\n");
- return r;
- }
-
- /* add DSI info for omap4 */
- node = of_find_node_by_name(NULL, "omap4_padconf_global");
- if (node)
- omap4_dsi_mux_syscon = syscon_node_to_regmap(node);
-
- return 0;
+ return omapdss_init_fbdev();
}
diff --git a/arch/arm/mach-omap2/display.h b/arch/arm/mach-omap2/display.h
index 9a39646d4316..42ec2e99a2f4 100644
--- a/arch/arm/mach-omap2/display.h
+++ b/arch/arm/mach-omap2/display.h
@@ -26,7 +26,6 @@ struct omap_dss_dispc_dev_attr {
bool has_framedonetv_irq;
};
-int omap_init_drm(void);
int omap_init_vrfb(void);
int omap_init_fb(void);
int omap_init_vout(void);
diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c
deleted file mode 100644
index 44fef961bb70..000000000000
--- a/arch/arm/mach-omap2/drm.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * DRM/KMS device registration for TI OMAP platforms
- *
- * Copyright (C) 2012 Texas Instruments
- * Author: Rob Clark <rob.clark@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_data/omap_drm.h>
-
-#include "soc.h"
-#include "display.h"
-
-#if IS_ENABLED(CONFIG_DRM_OMAP)
-
-static struct omap_drm_platform_data platform_data;
-
-static struct platform_device omap_drm_device = {
- .dev = {
- .coherent_dma_mask = DMA_BIT_MASK(32),
- .platform_data = &platform_data,
- },
- .name = "omapdrm",
- .id = 0,
-};
-
-int __init omap_init_drm(void)
-{
- platform_data.omaprev = GET_OMAP_TYPE;
-
- return platform_device_register(&omap_drm_device);
-
-}
-#else
-int __init omap_init_drm(void) { return 0; }
-#endif
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 1cd20e4d56b0..cb5d7314cf99 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -428,7 +428,6 @@ static void __init __maybe_unused omap_hwmod_init_postsetup(void)
static void __init __maybe_unused omap_common_late_init(void)
{
omap2_common_pm_late_init();
- omap_soc_device_init();
}
#ifdef CONFIG_SOC_OMAP2420
diff --git a/arch/arm/mach-omap2/omap_twl.c b/arch/arm/mach-omap2/omap_twl.c
index 1346b3ab34a5..295124b248ae 100644
--- a/arch/arm/mach-omap2/omap_twl.c
+++ b/arch/arm/mach-omap2/omap_twl.c
@@ -16,7 +16,7 @@
#include <linux/err.h>
#include <linux/io.h>
#include <linux/kernel.h>
-#include <linux/i2c/twl.h>
+#include <linux/mfd/twl.h>
#include "soc.h"
#include "voltage.h"
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index e2c97728b3c6..9d662fed03ec 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -377,7 +377,7 @@ static struct gpiod_lookup_table raumfeld_rotary_gpios_table = {
},
};
-static struct property_entry raumfeld_rotary_properties[] = {
+static const struct property_entry raumfeld_rotary_properties[] __initconst = {
PROPERTY_ENTRY_INTEGER("rotary-encoder,steps-per-period", u32, 24),
PROPERTY_ENTRY_INTEGER("linux,axis", u32, REL_X),
PROPERTY_ENTRY_INTEGER("rotary-encoder,relative_axis", u32, 1),
diff --git a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c
index 262ab0744748..6cac7da15e2b 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris-dvs.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris-dvs.c
@@ -17,7 +17,7 @@
#include <linux/cpufreq.h>
#include <linux/gpio.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include <plat/cpu-freq.h>
#include <mach/gpio-samsung.h>
diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c
index 70b0eb7d3134..64b1a0b7b803 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris.c
@@ -24,7 +24,7 @@
#include <linux/io.h>
#include <linux/platform_device.h>
-#include <linux/i2c/tps65010.h>
+#include <linux/mfd/tps65010.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index d3aa9be16621..e3fbcfedf845 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -60,7 +60,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
return index;
}
-static void tegra114_idle_enter_freeze(struct cpuidle_device *dev,
+static void tegra114_idle_enter_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
@@ -77,7 +77,7 @@ static struct cpuidle_driver tegra_idle_driver = {
#ifdef CONFIG_PM_SLEEP
[1] = {
.enter = tegra114_idle_power_down,
- .enter_freeze = tegra114_idle_enter_freeze,
+ .enter_s2idle = tegra114_idle_enter_s2idle,
.exit_latency = 500,
.target_residency = 1000,
.flags = CPUIDLE_FLAG_TIMER_STOP,
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d5b9fa19b684..c199990e12b6 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1,6 +1,7 @@
/*
- * Just-In-Time compiler for BPF filters on 32bit ARM
+ * Just-In-Time compiler for eBPF filters on 32bit ARM
*
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -8,6 +9,7 @@
* Free Software Foundation; version 2 of the License.
*/
+#include <linux/bpf.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/errno.h>
@@ -18,54 +20,101 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
#include <asm/hwcap.h>
#include <asm/opcodes.h>
#include "bpf_jit_32.h"
+int bpf_jit_enable __read_mostly;
+
+#define STACK_OFFSET(k) (k)
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
+#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
+
+/* Flags used for JIT optimization */
+#define SEEN_CALL (1 << 0)
+
+#define FLAG_IMM_OVERFLOW (1 << 0)
+
/*
- * ABI:
+ * Map eBPF registers to ARM 32bit registers or stack scratch space.
+ *
+ * 1. First argument is passed using the arm 32bit registers and rest of the
+ * arguments are passed on stack scratch space.
+ * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
+ * arguments are mapped to scratch space on stack.
+ * 3. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ *
+ * As the eBPF registers are all 64 bit registers and arm has only 32 bit
+ * registers, we have to map each eBPF registers with two arm 32 bit regs or
+ * scratch memory space and we have to build eBPF 64 bit register from those.
*
- * r0 scratch register
- * r4 BPF register A
- * r5 BPF register X
- * r6 pointer to the skb
- * r7 skb->data
- * r8 skb_headlen(skb)
*/
+static const u8 bpf2a32[][2] = {
+ /* return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = {ARM_R1, ARM_R0},
+ /* arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = {ARM_R3, ARM_R2},
+ /* Stored on stack scratch space */
+ [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+ [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+ [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+ [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+ /* callee saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = {ARM_R5, ARM_R4},
+ /* Stored on stack scratch space */
+ [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+ [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+ [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+ /* Read only Frame Pointer to access Stack */
+ [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+ /* Temporary Register for internal BPF JIT, can be used
+ * for constant blindings and others.
+ */
+ [TMP_REG_1] = {ARM_R7, ARM_R6},
+ [TMP_REG_2] = {ARM_R10, ARM_R8},
+ /* Tail call count. Stored on stack scratch space. */
+ [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+ /* temporary register for blinding constants.
+ * Stored on stack scratch space.
+ */
+ [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+};
-#define r_scratch ARM_R0
-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
-#define r_off ARM_R1
-#define r_A ARM_R4
-#define r_X ARM_R5
-#define r_skb ARM_R6
-#define r_skb_data ARM_R7
-#define r_skb_hl ARM_R8
-
-#define SCRATCH_SP_OFFSET 0
-#define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k))
-
-#define SEEN_MEM ((1 << BPF_MEMWORDS) - 1)
-#define SEEN_MEM_WORD(k) (1 << (k))
-#define SEEN_X (1 << BPF_MEMWORDS)
-#define SEEN_CALL (1 << (BPF_MEMWORDS + 1))
-#define SEEN_SKB (1 << (BPF_MEMWORDS + 2))
-#define SEEN_DATA (1 << (BPF_MEMWORDS + 3))
+#define dst_lo dst[1]
+#define dst_hi dst[0]
+#define src_lo src[1]
+#define src_hi src[0]
-#define FLAG_NEED_X_RESET (1 << 0)
-#define FLAG_IMM_OVERFLOW (1 << 1)
+/*
+ * JIT Context:
+ *
+ * prog : bpf_prog
+ * idx : index of current last JITed instruction.
+ * prologue_bytes : bytes used in prologue.
+ * epilogue_offset : offset of epilogue starting.
+ * seen : bit mask used for JIT optimization.
+ * offsets : array of eBPF instruction offsets in
+ * JITed code.
+ * target : final JITed code.
+ * epilogue_bytes : no of bytes used in epilogue.
+ * imm_count : no of immediate counts used for global
+ * variables.
+ * imms : array of global variable addresses.
+ */
struct jit_ctx {
- const struct bpf_prog *skf;
- unsigned idx;
- unsigned prologue_bytes;
- int ret0_fp_idx;
+ const struct bpf_prog *prog;
+ unsigned int idx;
+ unsigned int prologue_bytes;
+ unsigned int epilogue_offset;
u32 seen;
u32 flags;
u32 *offsets;
u32 *target;
+ u32 stack_size;
#if __LINUX_ARM_ARCH__ < 7
u16 epilogue_bytes;
u16 imm_count;
@@ -73,68 +122,16 @@ struct jit_ctx {
#endif
};
-int bpf_jit_enable __read_mostly;
-
-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
- unsigned int size)
-{
- void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
-
- if (!ptr)
- return -EFAULT;
- memcpy(ret, ptr, size);
- return 0;
-}
-
-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
-{
- u8 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 1);
- else
- err = skb_copy_bits(skb, offset, &ret, 1);
-
- return (u64)err << 32 | ret;
-}
-
-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
-{
- u16 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 2);
- else
- err = skb_copy_bits(skb, offset, &ret, 2);
-
- return (u64)err << 32 | ntohs(ret);
-}
-
-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
-{
- u32 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 4);
- else
- err = skb_copy_bits(skb, offset, &ret, 4);
-
- return (u64)err << 32 | ntohl(ret);
-}
-
/*
* Wrappers which handle both OABI and EABI and assures Thumb2 interworking
* (where the assembly routines like __aeabi_uidiv could cause problems).
*/
-static u32 jit_udiv(u32 dividend, u32 divisor)
+static u32 jit_udiv32(u32 dividend, u32 divisor)
{
return dividend / divisor;
}
-static u32 jit_mod(u32 dividend, u32 divisor)
+static u32 jit_mod32(u32 dividend, u32 divisor)
{
return dividend % divisor;
}
@@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
_emit(ARM_COND_AL, inst, ctx);
}
-static u16 saved_regs(struct jit_ctx *ctx)
+/*
+ * Checks if immediate value can be converted to imm12(12 bits) value.
+ */
+static int16_t imm8m(u32 x)
{
- u16 ret = 0;
-
- if ((ctx->skf->len > 1) ||
- (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
- ret |= 1 << r_A;
-
-#ifdef CONFIG_FRAME_POINTER
- ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
-#else
- if (ctx->seen & SEEN_CALL)
- ret |= 1 << ARM_LR;
-#endif
- if (ctx->seen & (SEEN_DATA | SEEN_SKB))
- ret |= 1 << r_skb;
- if (ctx->seen & SEEN_DATA)
- ret |= (1 << r_skb_data) | (1 << r_skb_hl);
- if (ctx->seen & SEEN_X)
- ret |= 1 << r_X;
-
- return ret;
-}
+ u32 rot;
-static inline int mem_words_used(struct jit_ctx *ctx)
-{
- /* yes, we do waste some stack space IF there are "holes" in the set" */
- return fls(ctx->seen & SEEN_MEM);
+ for (rot = 0; rot < 16; rot++)
+ if ((x & ~ror32(0xff, 2 * rot)) == 0)
+ return rol32(x, 2 * rot) | (rot << 8);
+ return -1;
}
+/*
+ * Initializes the JIT space with undefined instructions.
+ */
static void jit_fill_hole(void *area, unsigned int size)
{
u32 *ptr;
@@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
}
-static void build_prologue(struct jit_ctx *ctx)
-{
- u16 reg_set = saved_regs(ctx);
- u16 off;
-
-#ifdef CONFIG_FRAME_POINTER
- emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
- emit(ARM_PUSH(reg_set), ctx);
- emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
-#else
- if (reg_set)
- emit(ARM_PUSH(reg_set), ctx);
-#endif
-
- if (ctx->seen & (SEEN_DATA | SEEN_SKB))
- emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
-
- if (ctx->seen & SEEN_DATA) {
- off = offsetof(struct sk_buff, data);
- emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
- /* headlen = len - data_len */
- off = offsetof(struct sk_buff, len);
- emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
- off = offsetof(struct sk_buff, data_len);
- emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
- emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
- }
-
- if (ctx->flags & FLAG_NEED_X_RESET)
- emit(ARM_MOV_I(r_X, 0), ctx);
-
- /* do not leak kernel data to userspace */
- if (bpf_needs_clear_a(&ctx->skf->insns[0]))
- emit(ARM_MOV_I(r_A, 0), ctx);
-
- /* stack space for the BPF_MEM words */
- if (ctx->seen & SEEN_MEM)
- emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-}
-
-static void build_epilogue(struct jit_ctx *ctx)
-{
- u16 reg_set = saved_regs(ctx);
-
- if (ctx->seen & SEEN_MEM)
- emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-
- reg_set &= ~(1 << ARM_LR);
+/* Stack must be multiples of 16 Bytes */
+#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
-#ifdef CONFIG_FRAME_POINTER
- /* the first instruction of the prologue was: mov ip, sp */
- reg_set &= ~(1 << ARM_IP);
- reg_set |= (1 << ARM_SP);
- emit(ARM_LDM(ARM_SP, reg_set), ctx);
-#else
- if (reg_set) {
- if (ctx->seen & SEEN_CALL)
- reg_set |= 1 << ARM_PC;
- emit(ARM_POP(reg_set), ctx);
- }
+/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP and Tail call counts.
+ */
+#define SCRATCH_SIZE 80
- if (!(ctx->seen & SEEN_CALL))
- emit(ARM_BX(ARM_LR), ctx);
-#endif
-}
+/* total stack size used in JITed code */
+#define _STACK_SIZE \
+ (ctx->prog->aux->stack_depth + \
+ + SCRATCH_SIZE + \
+ + 4 /* extra for skb_copy_bits buffer */)
-static int16_t imm8m(u32 x)
-{
- u32 rot;
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
- for (rot = 0; rot < 16; rot++)
- if ((x & ~ror32(0xff, 2 * rot)) == 0)
- return rol32(x, 2 * rot) | (rot << 8);
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (STACK_SIZE-off-4)
- return -1;
-}
+/* Offset of skb_copy_bits buffer */
+#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
#if __LINUX_ARM_ARCH__ < 7
static u16 imm_offset(u32 k, struct jit_ctx *ctx)
{
- unsigned i = 0, offset;
+ unsigned int i = 0, offset;
u16 imm;
/* on the "fake" run we just count them (duplicates included) */
@@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
ctx->imms[i] = k;
/* constants go just after the epilogue */
- offset = ctx->offsets[ctx->skf->len];
+ offset = ctx->offsets[ctx->prog->len - 1] * 4;
offset += ctx->prologue_bytes;
offset += ctx->epilogue_bytes;
offset += i * 4;
@@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
#endif /* __LINUX_ARM_ARCH__ */
+static inline int bpf2a32_offset(int bpf_to, int bpf_from,
+ const struct jit_ctx *ctx) {
+ int to, from;
+
+ if (ctx->target == NULL)
+ return 0;
+ to = ctx->offsets[bpf_to];
+ from = ctx->offsets[bpf_from];
+
+ return to - from - 1;
+}
+
/*
* Move an immediate that's not an imm8m to a core register.
*/
-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
{
#if __LINUX_ARM_ARCH__ < 7
emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
@@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
#endif
}
-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
{
int imm12 = imm8m(val);
@@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
emit_mov_i_no8m(rd, val, ctx);
}
-#if __LINUX_ARM_ARCH__ < 6
-
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
{
- _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
- _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
- _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
- _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
- _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
+ ctx->seen |= SEEN_CALL;
+#if __LINUX_ARM_ARCH__ < 5
+ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+
+ if (elf_hwcap & HWCAP_THUMB)
+ emit(ARM_BX(tgt_reg), ctx);
+ else
+ emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+#else
+ emit(ARM_BLX_R(tgt_reg), ctx);
+#endif
}
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline int epilogue_offset(const struct jit_ctx *ctx)
{
- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
- _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
+ int to, from;
+ /* No need for 1st dummy run */
+ if (ctx->target == NULL)
+ return 0;
+ to = ctx->epilogue_offset;
+ from = ctx->idx;
+
+ return to - from - 2;
}
-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
- /* r_dst = (r_src << 8) | (r_src >> 8) */
- emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
- emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ s32 jmp_offset;
+
+ /* checks if divisor is zero or not. If it is, then
+ * exit directly.
+ */
+ emit(ARM_CMP_I(rn, 0), ctx);
+ _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
+ jmp_offset = epilogue_offset(ctx);
+ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+#if __LINUX_ARM_ARCH__ == 7
+ if (elf_hwcap & HWCAP_IDIVA) {
+ if (op == BPF_DIV)
+ emit(ARM_UDIV(rd, rm, rn), ctx);
+ else {
+ emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+ emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
+ }
+ return;
+ }
+#endif
/*
- * we need to mask out the bits set in r_dst[23:16] due to
- * the first shift instruction.
- *
- * note that 0x8ff is the encoded immediate 0x00ff0000.
+ * For BPF_ALU | BPF_DIV | BPF_K instructions
+ * As ARM_R1 and ARM_R0 contains 1st argument of bpf
+ * function, we need to save it on caller side to save
+ * it from getting destroyed within callee.
+ * After the return from the callee, we restore ARM_R0
+ * ARM_R1.
*/
- emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
-}
+ if (rn != ARM_R1) {
+ emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
+ emit(ARM_MOV_R(ARM_R1, rn), ctx);
+ }
+ if (rm != ARM_R0) {
+ emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
+ emit(ARM_MOV_R(ARM_R0, rm), ctx);
+ }
-#else /* ARMv6+ */
+ /* Call appropriate function */
+ ctx->seen |= SEEN_CALL;
+ emit_mov_i(ARM_IP, op == BPF_DIV ?
+ (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ emit_blx_r(ARM_IP, ctx);
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
-{
- _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
- _emit(cond, ARM_REV(r_res, r_res), ctx);
-#endif
+ /* Save return value */
+ if (rd != ARM_R0)
+ emit(ARM_MOV_R(rd, ARM_R0), ctx);
+
+ /* Restore ARM_R0 and ARM_R1 */
+ if (rn != ARM_R1)
+ emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
+ if (rm != ARM_R0)
+ emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+/* Checks whether BPF register is on scratch stack space or not. */
+static inline bool is_on_stack(u8 bpf_reg)
{
- _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
- _emit(cond, ARM_REV16(r_res, r_res), ctx);
-#endif
+ static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
+ BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
+ BPF_REG_2, BPF_REG_FP};
+ int i, reg_len = sizeof(stack_regs);
+
+ for (i = 0 ; i < reg_len ; i++) {
+ if (bpf_reg == stack_regs[i])
+ return true;
+ }
+ return false;
}
-static inline void emit_swap16(u8 r_dst __maybe_unused,
- u8 r_src __maybe_unused,
- struct jit_ctx *ctx __maybe_unused)
+static inline void emit_a32_mov_i(const u8 dst, const u32 val,
+ bool dstk, struct jit_ctx *ctx)
{
-#ifdef __LITTLE_ENDIAN
- emit(ARM_REV16(r_dst, r_src), ctx);
-#endif
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+
+ if (dstk) {
+ emit_mov_i(tmp[1], val, ctx);
+ emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
+ } else {
+ emit_mov_i(dst, val, ctx);
+ }
}
-#endif /* __LINUX_ARM_ARCH__ < 6 */
+/* Sign extended move */
+static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
+ const u32 val, bool dstk,
+ struct jit_ctx *ctx) {
+ u32 hi = 0;
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+ emit_a32_mov_i(dst_lo, val, dstk, ctx);
+ emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+}
-/* Compute the immediate value for a PC-relative branch. */
-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
-{
- u32 imm;
+static inline void emit_a32_add_r(const u8 dst, const u8 src,
+ const bool is64, const bool hi,
+ struct jit_ctx *ctx) {
+ /* 64 bit :
+ * adds dst_lo, dst_lo, src_lo
+ * adc dst_hi, dst_hi, src_hi
+ * 32 bit :
+ * add dst_lo, dst_lo, src_lo
+ */
+ if (!hi && is64)
+ emit(ARM_ADDS_R(dst, dst, src), ctx);
+ else if (hi && is64)
+ emit(ARM_ADC_R(dst, dst, src), ctx);
+ else
+ emit(ARM_ADD_R(dst, dst, src), ctx);
+}
- if (ctx->target == NULL)
- return 0;
- /*
- * BPF allows only forward jumps and the offset of the target is
- * still the one computed during the first pass.
+static inline void emit_a32_sub_r(const u8 dst, const u8 src,
+ const bool is64, const bool hi,
+ struct jit_ctx *ctx) {
+ /* 64 bit :
+ * subs dst_lo, dst_lo, src_lo
+ * sbc dst_hi, dst_hi, src_hi
+ * 32 bit :
+ * sub dst_lo, dst_lo, src_lo
*/
- imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
+ if (!hi && is64)
+ emit(ARM_SUBS_R(dst, dst, src), ctx);
+ else if (hi && is64)
+ emit(ARM_SBC_R(dst, dst, src), ctx);
+ else
+ emit(ARM_SUB_R(dst, dst, src), ctx);
+}
- return imm >> 2;
+static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
+ const bool hi, const u8 op, struct jit_ctx *ctx){
+ switch (BPF_OP(op)) {
+ /* dst = dst + src */
+ case BPF_ADD:
+ emit_a32_add_r(dst, src, is64, hi, ctx);
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ emit_a32_sub_r(dst, src, is64, hi, ctx);
+ break;
+ /* dst = dst | src */
+ case BPF_OR:
+ emit(ARM_ORR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst & src */
+ case BPF_AND:
+ emit(ARM_AND_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst ^ src */
+ case BPF_XOR:
+ emit(ARM_EOR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst * src */
+ case BPF_MUL:
+ emit(ARM_MUL(dst, dst, src), ctx);
+ break;
+ /* dst = dst << src */
+ case BPF_LSH:
+ emit(ARM_LSL_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst >> src */
+ case BPF_RSH:
+ emit(ARM_LSR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst >> src (signed)*/
+ case BPF_ARSH:
+ emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
+ break;
+ }
}
-#define OP_IMM3(op, r1, r2, imm_val, ctx) \
- do { \
- imm12 = imm8m(imm_val); \
- if (imm12 < 0) { \
- emit_mov_i_no8m(r_scratch, imm_val, ctx); \
- emit(op ## _R((r1), (r2), r_scratch), ctx); \
- } else { \
- emit(op ## _I((r1), (r2), imm12), ctx); \
- } \
- } while (0)
-
-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
-{
- if (ctx->ret0_fp_idx >= 0) {
- _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
- /* NOP to keep the size constant between passes */
- emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
+/* ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_a32_alu_r(const u8 dst, const u8 src,
+ bool dstk, bool sstk,
+ struct jit_ctx *ctx, const bool is64,
+ const bool hi, const u8 op) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rn = sstk ? tmp[1] : src;
+
+ if (sstk)
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
+
+ /* ALU operation */
+ if (dstk) {
+ emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
+ emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
+ emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
} else {
- _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
- _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+ emit_alu_r(dst, rn, is64, hi, op, ctx);
}
}
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
-{
-#if __LINUX_ARM_ARCH__ < 5
- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+/* ALU operation (64 bit) */
+static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx,
+ const u8 op) {
+ emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
+ if (is64)
+ emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
+ else
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+}
- if (elf_hwcap & HWCAP_THUMB)
- emit(ARM_BX(tgt_reg), ctx);
+/* dst = imm (4 bytes)*/
+static inline void emit_a32_mov_r(const u8 dst, const u8 src,
+ bool dstk, bool sstk,
+ struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rt = sstk ? tmp[0] : src;
+
+ if (sstk)
+ emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
+ if (dstk)
+ emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
else
- emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
-#else
- emit(ARM_BLX_R(tgt_reg), ctx);
-#endif
+ emit(ARM_MOV_R(dst, rt), ctx);
}
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
- int bpf_op)
-{
-#if __LINUX_ARM_ARCH__ == 7
- if (elf_hwcap & HWCAP_IDIVA) {
- if (bpf_op == BPF_DIV)
- emit(ARM_UDIV(rd, rm, rn), ctx);
- else {
- emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
- emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
- }
- return;
+/* dst = src */
+static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
+ if (is64) {
+ /* complete 8 byte move */
+ emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
+ } else {
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
}
-#endif
+}
- /*
- * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
- * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
- * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
- * before using it as a source for ARM_R1.
- *
- * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
- * ARM_R5 (r_X) so there is no particular register overlap
- * issues.
- */
- if (rn != ARM_R1)
- emit(ARM_MOV_R(ARM_R1, rn), ctx);
- if (rm != ARM_R0)
- emit(ARM_MOV_R(ARM_R0, rm), ctx);
+/* Shift operations */
+static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
+ struct jit_ctx *ctx, const u8 op) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[0] : dst;
+
+ if (dstk)
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+
+ /* Do shift operation */
+ switch (op) {
+ case BPF_LSH:
+ emit(ARM_LSL_I(rd, rd, val), ctx);
+ break;
+ case BPF_RSH:
+ emit(ARM_LSR_I(rd, rd, val), ctx);
+ break;
+ case BPF_NEG:
+ emit(ARM_RSB_I(rd, rd, val), ctx);
+ break;
+ }
+ if (dstk)
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_a32_neg64(const u8 dst[], bool dstk,
+ struct jit_ctx *ctx){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[1] : dst[1];
+ u8 rm = dstk ? tmp[0] : dst[0];
+
+ /* Setup Operand */
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do Negate Operation */
+ emit(ARM_RSBS_I(rd, rd, 0), ctx);
+ emit(ARM_RSC_I(rm, rm, 0), ctx);
+
+ if (dstk) {
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+}
+
+/* dst = dst << src */
+static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSH operation */
+ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
ctx->seen |= SEEN_CALL;
- emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
- ctx);
- emit_blx_r(ARM_R3, ctx);
+ emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
+
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
+}
- if (rd != ARM_R0)
- emit(ARM_MOV_R(rd, ARM_R0), ctx);
+/* dst = dst >> src (signed)*/
+static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do the ARSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ _emit(ARM_COND_MI, ARM_B(0), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
+}
+
+/* dst = dst >> src */
+static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
}
-static inline void update_on_xread(struct jit_ctx *ctx)
+/* dst = dst << val */
+static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
+ const u32 val, struct jit_ctx *ctx){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup operands */
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSH operation */
+ if (val < 32) {
+ emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx);
+ emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx);
+ emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx);
+ } else {
+ if (val == 32)
+ emit(ARM_MOV_R(rm, rd), ctx);
+ else
+ emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx);
+ emit(ARM_EOR_R(rd, rd, rd), ctx);
+ }
+
+ if (dstk) {
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+}
+
+/* dst = dst >> val */
+static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk,
+ const u32 val, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup operands */
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSR operation */
+ if (val < 32) {
+ emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+ emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+ emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx);
+ } else if (val == 32) {
+ emit(ARM_MOV_R(rd, rm), ctx);
+ emit(ARM_MOV_I(rm, 0), ctx);
+ } else {
+ emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx);
+ emit(ARM_MOV_I(rm, 0), ctx);
+ }
+
+ if (dstk) {
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk,
+ const u32 val, struct jit_ctx *ctx){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup operands */
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do ARSH operation */
+ if (val < 32) {
+ emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+ emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+ emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx);
+ } else if (val == 32) {
+ emit(ARM_MOV_R(rd, rm), ctx);
+ emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+ } else {
+ emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx);
+ emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+ }
+
+ if (dstk) {
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+}
+
+static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup operands for multiplication */
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rn = sstk ? tmp2[0] : src_hi;
+
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+ if (sstk) {
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx);
+ }
+
+ /* Do Multiplication */
+ emit(ARM_MUL(ARM_IP, rd, rn), ctx);
+ emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+ /* As we are using ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+
+ emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
+ emit(ARM_ADD_R(rm, ARM_LR, rm), ctx);
+ if (dstk) {
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_IP), ctx);
+ }
+}
+
+/* *(size *)(dst + off) = src */
+static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
+ const s32 off, struct jit_ctx *ctx, const u8 sz){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[1] : dst;
+
+ if (dstk)
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+ if (off) {
+ emit_a32_mov_i(tmp[0], off, false, ctx);
+ emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
+ rd = tmp[0];
+ }
+ switch (sz) {
+ case BPF_W:
+ /* Store a Word */
+ emit(ARM_STR_I(src, rd, 0), ctx);
+ break;
+ case BPF_H:
+ /* Store a HalfWord */
+ emit(ARM_STRH_I(src, rd, 0), ctx);
+ break;
+ case BPF_B:
+ /* Store a Byte */
+ emit(ARM_STRB_I(src, rd, 0), ctx);
+ break;
+ }
+}
+
+/* dst = *(size*)(src + off) */
+static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
+ const s32 off, struct jit_ctx *ctx, const u8 sz){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[1] : dst;
+ u8 rm = src;
+
+ if (off) {
+ emit_a32_mov_i(tmp[0], off, false, ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+ rm = tmp[0];
+ }
+ switch (sz) {
+ case BPF_W:
+ /* Load a Word */
+ emit(ARM_LDR_I(rd, rm, 0), ctx);
+ break;
+ case BPF_H:
+ /* Load a HalfWord */
+ emit(ARM_LDRH_I(rd, rm, 0), ctx);
+ break;
+ case BPF_B:
+ /* Load a Byte */
+ emit(ARM_LDRB_I(rd, rm, 0), ctx);
+ break;
+ }
+ if (dstk)
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* Arithmatic Operation */
+static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
+ const u8 rn, struct jit_ctx *ctx, u8 op) {
+ switch (op) {
+ case BPF_JSET:
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+ emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+ emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+ break;
+ case BPF_JEQ:
+ case BPF_JNE:
+ case BPF_JGT:
+ case BPF_JGE:
+ case BPF_JLE:
+ case BPF_JLT:
+ emit(ARM_CMP_R(rd, rm), ctx);
+ _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+ break;
+ case BPF_JSLE:
+ case BPF_JSGT:
+ emit(ARM_CMP_R(rn, rt), ctx);
+ emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+ break;
+ case BPF_JSLT:
+ case BPF_JSGE:
+ emit(ARM_CMP_R(rt, rn), ctx);
+ emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+ break;
+ }
+}
+
+static int out_offset = -1; /* initialized on the first pass of build_body() */
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
{
- if (!(ctx->seen & SEEN_X))
- ctx->flags |= FLAG_NEED_X_RESET;
- ctx->seen |= SEEN_X;
+ /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+ const u8 *r2 = bpf2a32[BPF_REG_2];
+ const u8 *r3 = bpf2a32[BPF_REG_3];
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ const u8 *tcc = bpf2a32[TCALL_CNT];
+ const int idx0 = ctx->idx;
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+ u32 off, lo, hi;
+
+ /* if (index >= array->map.max_entries)
+ * goto out;
+ */
+ off = offsetof(struct bpf_array, map.max_entries);
+ /* array->map.max_entries */
+ emit_a32_mov_i(tmp[1], off, false, ctx);
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+ emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+ /* index (64 bit) */
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+ /* index >= array->map.max_entries */
+ emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
+ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
+
+ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ * tail_call_cnt++;
+ */
+ lo = (u32)MAX_TAIL_CALL_CNT;
+ hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+ emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+ emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+ emit(ARM_CMP_I(tmp[0], hi), ctx);
+ _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx);
+ _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+ emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx);
+ emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx);
+ emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+ emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+
+ /* prog = array->ptrs[index]
+ * if (prog == NULL)
+ * goto out;
+ */
+ off = offsetof(struct bpf_array, ptrs);
+ emit_a32_mov_i(tmp[1], off, false, ctx);
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+ emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx);
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+ emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx);
+ emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
+ emit(ARM_CMP_I(tmp[1], 0), ctx);
+ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ off = offsetof(struct bpf_prog, bpf_func);
+ emit_a32_mov_i(tmp2[1], off, false, ctx);
+ emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+ emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+ emit(ARM_BX(tmp[1]), ctx);
+
+ /* out: */
+ if (out_offset == -1)
+ out_offset = cur_offset;
+ if (cur_offset != out_offset) {
+ pr_err_once("tail_call out_offset = %d, expected %d!\n",
+ cur_offset, out_offset);
+ return -1;
+ }
+ return 0;
+#undef cur_offset
+#undef jmp_offset
}
-static int build_body(struct jit_ctx *ctx)
+/* 0xabcd => 0xcdab */
+static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
{
- void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
- const struct bpf_prog *prog = ctx->skf;
- const struct sock_filter *inst;
- unsigned i, load_order, off, condt;
- int imm12;
- u32 k;
+#if __LINUX_ARM_ARCH__ < 6
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+ emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
+ emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+ emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
+#else /* ARMv6+ */
+ emit(ARM_REV16(rd, rn), ctx);
+#endif
+}
- for (i = 0; i < prog->len; i++) {
- u16 code;
+/* 0xabcdefgh => 0xghefcdab */
+static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 6
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+ emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
+ emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
+
+ emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
+ emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
+ emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
+ emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+ emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
+ emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
+ emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
+
+#else /* ARMv6+ */
+ emit(ARM_REV(rd, rn), ctx);
+#endif
+}
- inst = &(prog->insns[i]);
- /* K as an immediate value operand */
- k = inst->k;
- code = bpf_anc_helper(inst);
+// push the scratch stack register on top of the stack
+static inline void emit_push_r64(const u8 src[], const u8 shift,
+ struct jit_ctx *ctx)
+{
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ u16 reg_set = 0;
- /* compute offsets only in the fake pass */
- if (ctx->target == NULL)
- ctx->offsets[i] = ctx->idx * 4;
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx);
+ emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx);
+
+ reg_set = (1 << tmp2[1]) | (1 << tmp2[0]);
+ emit(ARM_PUSH(reg_set), ctx);
+}
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+ const u8 r0 = bpf2a32[BPF_REG_0][1];
+ const u8 r2 = bpf2a32[BPF_REG_1][1];
+ const u8 r3 = bpf2a32[BPF_REG_1][0];
+ const u8 r4 = bpf2a32[BPF_REG_6][1];
+ const u8 r5 = bpf2a32[BPF_REG_6][0];
+ const u8 r6 = bpf2a32[TMP_REG_1][1];
+ const u8 r7 = bpf2a32[TMP_REG_1][0];
+ const u8 r8 = bpf2a32[TMP_REG_2][1];
+ const u8 r10 = bpf2a32[TMP_REG_2][0];
+ const u8 fplo = bpf2a32[BPF_REG_FP][1];
+ const u8 fphi = bpf2a32[BPF_REG_FP][0];
+ const u8 sp = ARM_SP;
+ const u8 *tcc = bpf2a32[TCALL_CNT];
+
+ u16 reg_set = 0;
+
+ /*
+ * eBPF prog stack layout
+ *
+ * high
+ * original ARM_SP => +-----+ eBPF prologue
+ * |FP/LR|
+ * current ARM_FP => +-----+
+ * | ... | callee saved registers
+ * eBPF fp register => +-----+ <= (BPF_FP)
+ * | ... | eBPF JIT scratch space
+ * | | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+ * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ */
+
+ /* Save callee saved registers. */
+ reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+ reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
+ emit(ARM_MOV_R(ARM_IP, sp), ctx);
+ emit(ARM_PUSH(reg_set), ctx);
+ emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+#else
+ /* Check if call instruction exists in BPF body */
+ if (ctx->seen & SEEN_CALL)
+ reg_set |= (1<<ARM_LR);
+ emit(ARM_PUSH(reg_set), ctx);
+#endif
+ /* Save frame pointer for later */
+ emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+
+ ctx->stack_size = imm8m(STACK_SIZE);
+
+ /* Set up function call stack */
+ emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
- switch (code) {
- case BPF_LD | BPF_IMM:
- emit_mov_i(r_A, k, ctx);
+ /* Set up BPF prog stack base register */
+ emit_a32_mov_r(fplo, ARM_IP, true, false, ctx);
+ emit_a32_mov_i(fphi, 0, true, ctx);
+
+ /* mov r4, 0 */
+ emit(ARM_MOV_I(r4, 0), ctx);
+
+ /* Move BPF_CTX to BPF_R1 */
+ emit(ARM_MOV_R(r3, r4), ctx);
+ emit(ARM_MOV_R(r2, r0), ctx);
+ /* Initialize Tail Count */
+ emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx);
+ emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx);
+ /* end of prologue */
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+ const u8 r4 = bpf2a32[BPF_REG_6][1];
+ const u8 r5 = bpf2a32[BPF_REG_6][0];
+ const u8 r6 = bpf2a32[TMP_REG_1][1];
+ const u8 r7 = bpf2a32[TMP_REG_1][0];
+ const u8 r8 = bpf2a32[TMP_REG_2][1];
+ const u8 r10 = bpf2a32[TMP_REG_2][0];
+ u16 reg_set = 0;
+
+ /* unwind function call stack */
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
+
+ /* restore callee saved registers. */
+ reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+ /* the first instruction of the prologue was: mov ip, sp */
+ reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+ emit(ARM_LDM(ARM_SP, reg_set), ctx);
+#else
+ if (ctx->seen & SEEN_CALL)
+ reg_set |= (1<<ARM_PC);
+ /* Restore callee saved registers. */
+ emit(ARM_POP(reg_set), ctx);
+ /* Return back to the callee function */
+ if (!(ctx->seen & SEEN_CALL))
+ emit(ARM_BX(ARM_LR), ctx);
+#endif
+}
+
+/*
+ * Convert an eBPF instruction to native instruction, i.e
+ * JITs an eBPF instruction.
+ * Returns :
+ * 0 - Successfully JITed an 8-byte eBPF instruction
+ * >0 - Successfully JITed a 16-byte eBPF instruction
+ * <0 - Failed to JIT.
+ */
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+ const u8 code = insn->code;
+ const u8 *dst = bpf2a32[insn->dst_reg];
+ const u8 *src = bpf2a32[insn->src_reg];
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ const s16 off = insn->off;
+ const s32 imm = insn->imm;
+ const int i = insn - ctx->prog->insnsi;
+ const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+ const bool dstk = is_on_stack(insn->dst_reg);
+ const bool sstk = is_on_stack(insn->src_reg);
+ u8 rd, rt, rm, rn;
+ s32 jmp_offset;
+
+#define check_imm(bits, imm) do { \
+ if ((((imm) > 0) && ((imm) >> (bits))) || \
+ (((imm) < 0) && (~(imm) >> (bits)))) { \
+ pr_info("[%2d] imm=%d(0x%x) out of range\n", \
+ i, imm, imm); \
+ return -EINVAL; \
+ } \
+} while (0)
+#define check_imm24(imm) check_imm(24, imm)
+
+ switch (code) {
+ /* ALU operations */
+
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ case BPF_ALU | BPF_MOV | BPF_X:
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx);
break;
- case BPF_LD | BPF_W | BPF_LEN:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- emit(ARM_LDR_I(r_A, r_skb,
- offsetof(struct sk_buff, len)), ctx);
+ case BPF_K:
+ /* Sign-extend immediate value to destination reg */
+ emit_a32_mov_i64(is64, dst, imm, dstk, ctx);
break;
- case BPF_LD | BPF_MEM:
- /* A = scratch[k] */
- ctx->seen |= SEEN_MEM_WORD(k);
- emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
+ }
+ break;
+ /* dst = dst + src/imm */
+ /* dst = dst - src/imm */
+ /* dst = dst | src/imm */
+ /* dst = dst & src/imm */
+ /* dst = dst ^ src/imm */
+ /* dst = dst * src/imm */
+ /* dst = dst << src */
+ /* dst = dst >> src */
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ case BPF_ALU | BPF_LSH | BPF_X:
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_a32_alu_r64(is64, dst, src, dstk, sstk,
+ ctx, BPF_OP(code));
break;
- case BPF_LD | BPF_W | BPF_ABS:
- load_order = 2;
- goto load;
- case BPF_LD | BPF_H | BPF_ABS:
- load_order = 1;
- goto load;
- case BPF_LD | BPF_B | BPF_ABS:
- load_order = 0;
-load:
- emit_mov_i(r_off, k, ctx);
-load_common:
- ctx->seen |= SEEN_DATA | SEEN_CALL;
-
- if (load_order > 0) {
- emit(ARM_SUB_I(r_scratch, r_skb_hl,
- 1 << load_order), ctx);
- emit(ARM_CMP_R(r_scratch, r_off), ctx);
- condt = ARM_COND_GE;
- } else {
- emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
- condt = ARM_COND_HI;
- }
-
- /*
- * test for negative offset, only if we are
- * currently scheduled to take the fast
- * path. this will update the flags so that
- * the slowpath instruction are ignored if the
- * offset is negative.
- *
- * for loard_order == 0 the HI condition will
- * make loads at offset 0 take the slow path too.
+ case BPF_K:
+ /* Move immediate value to the temporary register
+ * and then do the ALU operation on the temporary
+ * register as this will sign-extend the immediate
+ * value into temporary reg and then it would be
+ * safe to do the operation on it.
*/
- _emit(condt, ARM_CMP_I(r_off, 0), ctx);
-
- _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
- ctx);
-
- if (load_order == 0)
- _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
- ctx);
- else if (load_order == 1)
- emit_load_be16(condt, r_A, r_scratch, ctx);
- else if (load_order == 2)
- emit_load_be32(condt, r_A, r_scratch, ctx);
-
- _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
-
- /* the slowpath */
- emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
- emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
- /* the offset is already in R1 */
- emit_blx_r(ARM_R3, ctx);
- /* check the result of skb_copy_bits */
- emit(ARM_CMP_I(ARM_R1, 0), ctx);
- emit_err_ret(ARM_COND_NE, ctx);
- emit(ARM_MOV_R(r_A, ARM_R0), ctx);
+ emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+ emit_a32_alu_r64(is64, dst, tmp2, dstk, false,
+ ctx, BPF_OP(code));
break;
- case BPF_LD | BPF_W | BPF_IND:
- load_order = 2;
- goto load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- load_order = 1;
- goto load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- load_order = 0;
-load_ind:
- update_on_xread(ctx);
- OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
- goto load_common;
- case BPF_LDX | BPF_IMM:
- ctx->seen |= SEEN_X;
- emit_mov_i(r_X, k, ctx);
+ }
+ break;
+ /* dst = dst / src(imm) */
+ /* dst = dst % src(imm) */
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ rt = src_lo;
+ rd = dstk ? tmp2[1] : dst_lo;
+ if (dstk)
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ rt = sstk ? tmp2[0] : rt;
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)),
+ ctx);
break;
- case BPF_LDX | BPF_W | BPF_LEN:
- ctx->seen |= SEEN_X | SEEN_SKB;
- emit(ARM_LDR_I(r_X, r_skb,
- offsetof(struct sk_buff, len)), ctx);
+ case BPF_K:
+ rt = tmp2[0];
+ emit_a32_mov_i(rt, imm, false, ctx);
break;
- case BPF_LDX | BPF_MEM:
- ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
- emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
+ }
+ emit_udivmod(rd, rd, rt, ctx, BPF_OP(code));
+ if (dstk)
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ goto notyet;
+ /* dst = dst >> imm */
+ /* dst = dst << imm */
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ if (unlikely(imm > 31))
+ return -EINVAL;
+ if (imm)
+ emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code));
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+ break;
+ /* dst = dst << imm */
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ if (unlikely(imm > 63))
+ return -EINVAL;
+ emit_a32_lsh_i64(dst, dstk, imm, ctx);
+ break;
+ /* dst = dst >> imm */
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ if (unlikely(imm > 63))
+ return -EINVAL;
+ emit_a32_lsr_i64(dst, dstk, imm, ctx);
+ break;
+ /* dst = dst << src */
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ emit_a32_lsh_r64(dst, src, dstk, sstk, ctx);
+ break;
+ /* dst = dst >> src */
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ emit_a32_lsr_r64(dst, src, dstk, sstk, ctx);
+ break;
+ /* dst = dst >> src (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ emit_a32_arsh_r64(dst, src, dstk, sstk, ctx);
+ break;
+ /* dst = dst >> imm (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ if (unlikely(imm > 63))
+ return -EINVAL;
+ emit_a32_arsh_i64(dst, dstk, imm, ctx);
+ break;
+ /* dst = ~dst */
+ case BPF_ALU | BPF_NEG:
+ emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code));
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+ break;
+ /* dst = ~dst (64 bit) */
+ case BPF_ALU64 | BPF_NEG:
+ emit_a32_neg64(dst, dstk, ctx);
+ break;
+ /* dst = dst * src/imm */
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_a32_mul_r64(dst, src, dstk, sstk, ctx);
break;
- case BPF_LDX | BPF_B | BPF_MSH:
- /* x = ((*(frame + k)) & 0xf) << 2; */
- ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
- /* the interpreter should deal with the negative K */
- if ((int)k < 0)
- return -1;
- /* offset in r1: we might have to take the slow path */
- emit_mov_i(r_off, k, ctx);
- emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
-
- /* load in r0: common with the slowpath */
- _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
- ARM_R1), ctx);
- /*
- * emit_mov_i() might generate one or two instructions,
- * the same holds for emit_blx_r()
+ case BPF_K:
+ /* Move immediate value to the temporary register
+ * and then do the multiplication on it as this
+ * will sign-extend the immediate value into temp
+ * reg then it would be safe to do the operation
+ * on it.
*/
- _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
-
- emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
- /* r_off is r1 */
- emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
- emit_blx_r(ARM_R3, ctx);
- /* check the return value of skb_copy_bits */
- emit(ARM_CMP_I(ARM_R1, 0), ctx);
- emit_err_ret(ARM_COND_NE, ctx);
-
- emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
- emit(ARM_LSL_I(r_X, r_X, 2), ctx);
- break;
- case BPF_ST:
- ctx->seen |= SEEN_MEM_WORD(k);
- emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
- break;
- case BPF_STX:
- update_on_xread(ctx);
- ctx->seen |= SEEN_MEM_WORD(k);
- emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
- break;
- case BPF_ALU | BPF_ADD | BPF_K:
- /* A += K */
- OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_ADD | BPF_X:
- update_on_xread(ctx);
- emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
- break;
- case BPF_ALU | BPF_SUB | BPF_K:
- /* A -= K */
- OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_SUB | BPF_X:
- update_on_xread(ctx);
- emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
- break;
- case BPF_ALU | BPF_MUL | BPF_K:
- /* A *= K */
- emit_mov_i(r_scratch, k, ctx);
- emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
- break;
- case BPF_ALU | BPF_MUL | BPF_X:
- update_on_xread(ctx);
- emit(ARM_MUL(r_A, r_A, r_X), ctx);
+ emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+ emit_a32_mul_r64(dst, tmp2, dstk, false, ctx);
break;
- case BPF_ALU | BPF_DIV | BPF_K:
- if (k == 1)
- break;
- emit_mov_i(r_scratch, k, ctx);
- emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV);
- break;
- case BPF_ALU | BPF_DIV | BPF_X:
- update_on_xread(ctx);
- emit(ARM_CMP_I(r_X, 0), ctx);
- emit_err_ret(ARM_COND_EQ, ctx);
- emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV);
+ }
+ break;
+ /* dst = htole(dst) */
+ /* dst = htobe(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ rd = dstk ? tmp[0] : dst_hi;
+ rt = dstk ? tmp[1] : dst_lo;
+ if (dstk) {
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+ if (BPF_SRC(code) == BPF_FROM_LE)
+ goto emit_bswap_uxt;
+ switch (imm) {
+ case 16:
+ emit_rev16(rt, rt, ctx);
+ goto emit_bswap_uxt;
+ case 32:
+ emit_rev32(rt, rt, ctx);
+ goto emit_bswap_uxt;
+ case 64:
+ /* Because of the usage of ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit_rev32(ARM_LR, rt, ctx);
+ emit_rev32(rt, rd, ctx);
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
break;
- case BPF_ALU | BPF_MOD | BPF_K:
- if (k == 1) {
- emit_mov_i(r_A, 0, ctx);
- break;
- }
- emit_mov_i(r_scratch, k, ctx);
- emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD);
+ }
+ goto exit;
+emit_bswap_uxt:
+ switch (imm) {
+ case 16:
+ /* zero-extend 16 bits into 64 bits */
+#if __LINUX_ARM_ARCH__ < 6
+ emit_a32_mov_i(tmp2[1], 0xffff, false, ctx);
+ emit(ARM_AND_R(rt, rt, tmp2[1]), ctx);
+#else /* ARMv6+ */
+ emit(ARM_UXTH(rt, rt), ctx);
+#endif
+ emit(ARM_EOR_R(rd, rd, rd), ctx);
break;
- case BPF_ALU | BPF_MOD | BPF_X:
- update_on_xread(ctx);
- emit(ARM_CMP_I(r_X, 0), ctx);
- emit_err_ret(ARM_COND_EQ, ctx);
- emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
+ case 32:
+ /* zero-extend 32 bits into 64 bits */
+ emit(ARM_EOR_R(rd, rd, rd), ctx);
break;
- case BPF_ALU | BPF_OR | BPF_K:
- /* A |= K */
- OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
+ case 64:
+ /* nop */
break;
- case BPF_ALU | BPF_OR | BPF_X:
- update_on_xread(ctx);
- emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
+ }
+exit:
+ if (dstk) {
+ emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+ break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW:
+ {
+ const struct bpf_insn insn1 = insn[1];
+ u32 hi, lo = imm;
+
+ hi = insn1.imm;
+ emit_a32_mov_i(dst_lo, lo, dstk, ctx);
+ emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+
+ return 1;
+ }
+ /* LDX: dst = *(size *)(src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ rn = sstk ? tmp2[1] : src_lo;
+ if (sstk)
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ /* Load a Word */
+ case BPF_H:
+ /* Load a Half-Word */
+ case BPF_B:
+ /* Load a Byte */
+ emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
break;
- case BPF_ALU | BPF_XOR | BPF_K:
- /* A ^= K; */
- OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
+ case BPF_DW:
+ /* Load a double word */
+ emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+ emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
break;
- case BPF_ANC | SKF_AD_ALU_XOR_X:
- case BPF_ALU | BPF_XOR | BPF_X:
- /* A ^= X */
- update_on_xread(ctx);
- emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
+ }
+ break;
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+ case BPF_LD | BPF_ABS | BPF_W:
+ case BPF_LD | BPF_ABS | BPF_H:
+ case BPF_LD | BPF_ABS | BPF_B:
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
+ case BPF_LD | BPF_IND | BPF_W:
+ case BPF_LD | BPF_IND | BPF_H:
+ case BPF_LD | BPF_IND | BPF_B:
+ {
+ const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
+ const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
+ /* rtn value */
+ const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
+ const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
+ const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
+ const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
+ int size;
+
+ /* Setting up first argument */
+ emit(ARM_MOV_R(r0, r4), ctx);
+
+ /* Setting up second argument */
+ emit_a32_mov_i(r1, imm, false, ctx);
+ if (BPF_MODE(code) == BPF_IND)
+ emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
+ false, false, BPF_ADD);
+
+ /* Setting up third argument */
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ size = 4;
break;
- case BPF_ALU | BPF_AND | BPF_K:
- /* A &= K */
- OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
+ case BPF_H:
+ size = 2;
break;
- case BPF_ALU | BPF_AND | BPF_X:
- update_on_xread(ctx);
- emit(ARM_AND_R(r_A, r_A, r_X), ctx);
+ case BPF_B:
+ size = 1;
break;
- case BPF_ALU | BPF_LSH | BPF_K:
- if (unlikely(k > 31))
- return -1;
- emit(ARM_LSL_I(r_A, r_A, k), ctx);
+ default:
+ return -EINVAL;
+ }
+ emit_a32_mov_i(r2, size, false, ctx);
+
+ /* Setting up fourth argument */
+ emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
+
+ /* Setting up function pointer to call */
+ emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
+ emit_blx_r(r6, ctx);
+
+ emit(ARM_EOR_R(r1, r1, r1), ctx);
+ /* Check if return address is NULL or not.
+ * if NULL then jump to epilogue
+ * else continue to load the value from retn address
+ */
+ emit(ARM_CMP_I(r0, 0), ctx);
+ jmp_offset = epilogue_offset(ctx);
+ check_imm24(jmp_offset);
+ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+ /* Load value from the address */
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ emit(ARM_LDR_I(r0, r0, 0), ctx);
+ emit_rev32(r0, r0, ctx);
break;
- case BPF_ALU | BPF_LSH | BPF_X:
- update_on_xread(ctx);
- emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
+ case BPF_H:
+ emit(ARM_LDRH_I(r0, r0, 0), ctx);
+ emit_rev16(r0, r0, ctx);
break;
- case BPF_ALU | BPF_RSH | BPF_K:
- if (unlikely(k > 31))
- return -1;
- if (k)
- emit(ARM_LSR_I(r_A, r_A, k), ctx);
+ case BPF_B:
+ emit(ARM_LDRB_I(r0, r0, 0), ctx);
+ /* No need to reverse */
break;
- case BPF_ALU | BPF_RSH | BPF_X:
- update_on_xread(ctx);
- emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
+ }
+ break;
+ }
+ /* ST: *(size *)(dst + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ switch (BPF_SIZE(code)) {
+ case BPF_DW:
+ /* Sign-extend immediate value into temp reg */
+ emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+ emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W);
+ emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W);
break;
- case BPF_ALU | BPF_NEG:
- /* A = -A */
- emit(ARM_RSB_I(r_A, r_A, 0), ctx);
+ case BPF_W:
+ case BPF_H:
+ case BPF_B:
+ emit_a32_mov_i(tmp2[1], imm, false, ctx);
+ emit_str_r(dst_lo, tmp2[1], dstk, off, ctx,
+ BPF_SIZE(code));
break;
- case BPF_JMP | BPF_JA:
- /* pc += K */
- emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
+ }
+ break;
+ /* STX XADD: lock *(u32 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* STX XADD: lock *(u64 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_DW:
+ goto notyet;
+ /* STX: *(size *)(dst + off) = src */
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ {
+ u8 sz = BPF_SIZE(code);
+
+ rn = sstk ? tmp2[1] : src_lo;
+ rm = sstk ? tmp2[0] : src_hi;
+ if (sstk) {
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+ }
+
+ /* Store the value */
+ if (BPF_SIZE(code) == BPF_DW) {
+ emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+ emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W);
+ } else {
+ emit_str_r(dst_lo, rn, dstk, off, ctx, sz);
+ }
+ break;
+ }
+ /* PC += off if dst == src */
+ /* PC += off if dst > src */
+ /* PC += off if dst >= src */
+ /* PC += off if dst < src */
+ /* PC += off if dst <= src */
+ /* PC += off if dst != src */
+ /* PC += off if dst > src (signed) */
+ /* PC += off if dst >= src (signed) */
+ /* PC += off if dst < src (signed) */
+ /* PC += off if dst <= src (signed) */
+ /* PC += off if dst & src */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ /* Setup source registers */
+ rm = sstk ? tmp2[0] : src_hi;
+ rn = sstk ? tmp2[1] : src_lo;
+ if (sstk) {
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+ }
+ goto go_jmp;
+ /* PC += off if dst == imm */
+ /* PC += off if dst > imm */
+ /* PC += off if dst >= imm */
+ /* PC += off if dst < imm */
+ /* PC += off if dst <= imm */
+ /* PC += off if dst != imm */
+ /* PC += off if dst > imm (signed) */
+ /* PC += off if dst >= imm (signed) */
+ /* PC += off if dst < imm (signed) */
+ /* PC += off if dst <= imm (signed) */
+ /* PC += off if dst & imm */
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (off == 0)
break;
- case BPF_JMP | BPF_JEQ | BPF_K:
- /* pc += (A == K) ? pc->jt : pc->jf */
- condt = ARM_COND_EQ;
- goto cmp_imm;
- case BPF_JMP | BPF_JGT | BPF_K:
- /* pc += (A > K) ? pc->jt : pc->jf */
- condt = ARM_COND_HI;
- goto cmp_imm;
- case BPF_JMP | BPF_JGE | BPF_K:
- /* pc += (A >= K) ? pc->jt : pc->jf */
- condt = ARM_COND_HS;
-cmp_imm:
- imm12 = imm8m(k);
- if (imm12 < 0) {
- emit_mov_i_no8m(r_scratch, k, ctx);
- emit(ARM_CMP_R(r_A, r_scratch), ctx);
- } else {
- emit(ARM_CMP_I(r_A, imm12), ctx);
- }
-cond_jump:
- if (inst->jt)
- _emit(condt, ARM_B(b_imm(i + inst->jt + 1,
- ctx)), ctx);
- if (inst->jf)
- _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
- ctx)), ctx);
+ rm = tmp2[0];
+ rn = tmp2[1];
+ /* Sign-extend immediate value */
+ emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+go_jmp:
+ /* Setup destination register */
+ rd = dstk ? tmp[0] : dst_hi;
+ rt = dstk ? tmp[1] : dst_lo;
+ if (dstk) {
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Check for the condition */
+ emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code));
+
+ /* Setup JUMP instruction */
+ jmp_offset = bpf2a32_offset(i+off, i, ctx);
+ switch (BPF_OP(code)) {
+ case BPF_JNE:
+ case BPF_JSET:
+ _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
break;
- case BPF_JMP | BPF_JEQ | BPF_X:
- /* pc += (A == X) ? pc->jt : pc->jf */
- condt = ARM_COND_EQ;
- goto cmp_x;
- case BPF_JMP | BPF_JGT | BPF_X:
- /* pc += (A > X) ? pc->jt : pc->jf */
- condt = ARM_COND_HI;
- goto cmp_x;
- case BPF_JMP | BPF_JGE | BPF_X:
- /* pc += (A >= X) ? pc->jt : pc->jf */
- condt = ARM_COND_CS;
-cmp_x:
- update_on_xread(ctx);
- emit(ARM_CMP_R(r_A, r_X), ctx);
- goto cond_jump;
- case BPF_JMP | BPF_JSET | BPF_K:
- /* pc += (A & K) ? pc->jt : pc->jf */
- condt = ARM_COND_NE;
- /* not set iff all zeroes iff Z==1 iff EQ */
-
- imm12 = imm8m(k);
- if (imm12 < 0) {
- emit_mov_i_no8m(r_scratch, k, ctx);
- emit(ARM_TST_R(r_A, r_scratch), ctx);
- } else {
- emit(ARM_TST_I(r_A, imm12), ctx);
- }
- goto cond_jump;
- case BPF_JMP | BPF_JSET | BPF_X:
- /* pc += (A & X) ? pc->jt : pc->jf */
- update_on_xread(ctx);
- condt = ARM_COND_NE;
- emit(ARM_TST_R(r_A, r_X), ctx);
- goto cond_jump;
- case BPF_RET | BPF_A:
- emit(ARM_MOV_R(ARM_R0, r_A), ctx);
- goto b_epilogue;
- case BPF_RET | BPF_K:
- if ((k == 0) && (ctx->ret0_fp_idx < 0))
- ctx->ret0_fp_idx = i;
- emit_mov_i(ARM_R0, k, ctx);
-b_epilogue:
- if (i != ctx->skf->len - 1)
- emit(ARM_B(b_imm(prog->len, ctx)), ctx);
+ case BPF_JEQ:
+ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
break;
- case BPF_MISC | BPF_TAX:
- /* X = A */
- ctx->seen |= SEEN_X;
- emit(ARM_MOV_R(r_X, r_A), ctx);
+ case BPF_JGT:
+ _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
break;
- case BPF_MISC | BPF_TXA:
- /* A = X */
- update_on_xread(ctx);
- emit(ARM_MOV_R(r_A, r_X), ctx);
+ case BPF_JGE:
+ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_PROTOCOL:
- /* A = ntohs(skb->protocol) */
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- protocol) != 2);
- off = offsetof(struct sk_buff, protocol);
- emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
- emit_swap16(r_A, r_scratch, ctx);
+ case BPF_JSGT:
+ _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_CPU:
- /* r_scratch = current_thread_info() */
- OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
- /* A = current_thread_info()->cpu */
- BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
- off = offsetof(struct thread_info, cpu);
- emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
+ case BPF_JSGE:
+ _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_IFINDEX:
- case BPF_ANC | SKF_AD_HATYPE:
- /* A = skb->dev->ifindex */
- /* A = skb->dev->type */
- ctx->seen |= SEEN_SKB;
- off = offsetof(struct sk_buff, dev);
- emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
-
- emit(ARM_CMP_I(r_scratch, 0), ctx);
- emit_err_ret(ARM_COND_EQ, ctx);
-
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- type) != 2);
-
- if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
- off = offsetof(struct net_device, ifindex);
- emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
- } else {
- /*
- * offset of field "type" in "struct
- * net_device" is above what can be
- * used in the ldrh rd, [rn, #imm]
- * instruction, so load the offset in
- * a register and use ldrh rd, [rn, rm]
- */
- off = offsetof(struct net_device, type);
- emit_mov_i(ARM_R3, off, ctx);
- emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx);
- }
+ case BPF_JLE:
+ _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_MARK:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- off = offsetof(struct sk_buff, mark);
- emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+ case BPF_JLT:
+ _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_RXHASH:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- off = offsetof(struct sk_buff, hash);
- emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+ case BPF_JSLT:
+ _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- off = offsetof(struct sk_buff, vlan_tci);
- emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
- if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
- OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
- else {
- OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
- OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
- }
+ case BPF_JSLE:
+ _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
break;
- case BPF_ANC | SKF_AD_PKTTYPE:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- __pkt_type_offset[0]) != 1);
- off = PKT_TYPE_OFFSET();
- emit(ARM_LDRB_I(r_A, r_skb, off), ctx);
- emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx);
-#ifdef __BIG_ENDIAN_BITFIELD
- emit(ARM_LSR_I(r_A, r_A, 5), ctx);
-#endif
+ }
+ break;
+ /* JMP OFF */
+ case BPF_JMP | BPF_JA:
+ {
+ if (off == 0)
break;
- case BPF_ANC | SKF_AD_QUEUE:
- ctx->seen |= SEEN_SKB;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- queue_mapping) != 2);
- BUILD_BUG_ON(offsetof(struct sk_buff,
- queue_mapping) > 0xff);
- off = offsetof(struct sk_buff, queue_mapping);
- emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
+ jmp_offset = bpf2a32_offset(i+off, i, ctx);
+ check_imm24(jmp_offset);
+ emit(ARM_B(jmp_offset), ctx);
+ break;
+ }
+ /* tail call */
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (emit_bpf_tail_call(ctx))
+ return -EFAULT;
+ break;
+ /* function call */
+ case BPF_JMP | BPF_CALL:
+ {
+ const u8 *r0 = bpf2a32[BPF_REG_0];
+ const u8 *r1 = bpf2a32[BPF_REG_1];
+ const u8 *r2 = bpf2a32[BPF_REG_2];
+ const u8 *r3 = bpf2a32[BPF_REG_3];
+ const u8 *r4 = bpf2a32[BPF_REG_4];
+ const u8 *r5 = bpf2a32[BPF_REG_5];
+ const u32 func = (u32)__bpf_call_base + (u32)imm;
+
+ emit_a32_mov_r64(true, r0, r1, false, false, ctx);
+ emit_a32_mov_r64(true, r1, r2, false, true, ctx);
+ emit_push_r64(r5, 0, ctx);
+ emit_push_r64(r4, 8, ctx);
+ emit_push_r64(r3, 16, ctx);
+
+ emit_a32_mov_i(tmp[1], func, false, ctx);
+ emit_blx_r(tmp[1], ctx);
+
+ emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
+ break;
+ }
+ /* function return */
+ case BPF_JMP | BPF_EXIT:
+ /* Optimization: when last instruction is EXIT
+ * simply fallthrough to epilogue.
+ */
+ if (i == ctx->prog->len - 1)
break;
- case BPF_ANC | SKF_AD_PAY_OFFSET:
- ctx->seen |= SEEN_SKB | SEEN_CALL;
+ jmp_offset = epilogue_offset(ctx);
+ check_imm24(jmp_offset);
+ emit(ARM_B(jmp_offset), ctx);
+ break;
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+ default:
+ pr_err_once("unknown opcode %02x\n", code);
+ return -EINVAL;
+ }
- emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
- emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx);
- emit_blx_r(ARM_R3, ctx);
- emit(ARM_MOV_R(r_A, ARM_R0), ctx);
- break;
- case BPF_LDX | BPF_W | BPF_ABS:
- /*
- * load a 32bit word from struct seccomp_data.
- * seccomp_check_filter() will already have checked
- * that k is 32bit aligned and lies within the
- * struct seccomp_data.
- */
- ctx->seen |= SEEN_SKB;
- emit(ARM_LDR_I(r_A, r_skb, k), ctx);
- break;
- default:
- return -1;
+ if (ctx->flags & FLAG_IMM_OVERFLOW)
+ /*
+ * this instruction generated an overflow when
+ * trying to access the literal pool, so
+ * delegate this filter to the kernel interpreter.
+ */
+ return -1;
+ return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+ const struct bpf_prog *prog = ctx->prog;
+ unsigned int i;
+
+ for (i = 0; i < prog->len; i++) {
+ const struct bpf_insn *insn = &(prog->insnsi[i]);
+ int ret;
+
+ ret = build_insn(insn, ctx);
+
+ /* It's used with loading the 64 bit immediate value. */
+ if (ret > 0) {
+ i++;
+ if (ctx->target == NULL)
+ ctx->offsets[i] = ctx->idx;
+ continue;
}
- if (ctx->flags & FLAG_IMM_OVERFLOW)
- /*
- * this instruction generated an overflow when
- * trying to access the literal pool, so
- * delegate this filter to the kernel interpreter.
- */
- return -1;
+ if (ctx->target == NULL)
+ ctx->offsets[i] = ctx->idx;
+
+ /* If unsuccesfull, return with error code */
+ if (ret)
+ return ret;
}
+ return 0;
+}
- /* compute offsets only during the first pass */
- if (ctx->target == NULL)
- ctx->offsets[i] = ctx->idx * 4;
+static int validate_code(struct jit_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < ctx->idx; i++) {
+ if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
+ return -1;
+ }
return 0;
}
+void bpf_jit_compile(struct bpf_prog *prog)
+{
+ /* Nothing to do here. We support Internal BPF. */
+}
-void bpf_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
+ struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
+ bool tmp_blinded = false;
struct jit_ctx ctx;
- unsigned tmp_idx;
- unsigned alloc_size;
- u8 *target_ptr;
+ unsigned int tmp_idx;
+ unsigned int image_size;
+ u8 *image_ptr;
+ /* If BPF JIT was not enabled then we must fall back to
+ * the interpreter.
+ */
if (!bpf_jit_enable)
- return;
+ return orig_prog;
- memset(&ctx, 0, sizeof(ctx));
- ctx.skf = fp;
- ctx.ret0_fp_idx = -1;
+ /* If constant blinding was enabled and we failed during blinding
+ * then we must fall back to the interpreter. Otherwise, we save
+ * the new JITed code.
+ */
+ tmp = bpf_jit_blind_constants(prog);
- ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
- if (ctx.offsets == NULL)
- return;
+ if (IS_ERR(tmp))
+ return orig_prog;
+ if (tmp != prog) {
+ tmp_blinded = true;
+ prog = tmp;
+ }
- /* fake pass to fill in the ctx->seen */
- if (unlikely(build_body(&ctx)))
+ memset(&ctx, 0, sizeof(ctx));
+ ctx.prog = prog;
+
+ /* Not able to allocate memory for offsets[] , then
+ * we must fall back to the interpreter
+ */
+ ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+ if (ctx.offsets == NULL) {
+ prog = orig_prog;
goto out;
+ }
+
+ /* 1) fake pass to find in the length of the JITed code,
+ * to compute ctx->offsets and other context variables
+ * needed to compute final JITed code.
+ * Also, calculate random starting pointer/start of JITed code
+ * which is prefixed by random number of fault instructions.
+ *
+ * If the first pass fails then there is no chance of it
+ * being successful in the second pass, so just fall back
+ * to the interpreter.
+ */
+ if (build_body(&ctx)) {
+ prog = orig_prog;
+ goto out_off;
+ }
tmp_idx = ctx.idx;
build_prologue(&ctx);
ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
+ ctx.epilogue_offset = ctx.idx;
+
#if __LINUX_ARM_ARCH__ < 7
tmp_idx = ctx.idx;
build_epilogue(&ctx);
@@ -1021,64 +1880,83 @@ void bpf_jit_compile(struct bpf_prog *fp)
ctx.idx += ctx.imm_count;
if (ctx.imm_count) {
- ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
- if (ctx.imms == NULL)
- goto out;
+ ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
+ if (ctx.imms == NULL) {
+ prog = orig_prog;
+ goto out_off;
+ }
}
#else
- /* there's nothing after the epilogue on ARMv7 */
+ /* there's nothing about the epilogue on ARMv7 */
build_epilogue(&ctx);
#endif
- alloc_size = 4 * ctx.idx;
- header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
- 4, jit_fill_hole);
- if (header == NULL)
- goto out;
+ /* Now we can get the actual image size of the JITed arm code.
+ * Currently, we are not considering the THUMB-2 instructions
+ * for jit, although it can decrease the size of the image.
+ *
+ * As each arm instruction is of length 32bit, we are translating
+ * number of JITed intructions into the size required to store these
+ * JITed code.
+ */
+ image_size = sizeof(u32) * ctx.idx;
- ctx.target = (u32 *) target_ptr;
+ /* Now we know the size of the structure to make */
+ header = bpf_jit_binary_alloc(image_size, &image_ptr,
+ sizeof(u32), jit_fill_hole);
+ /* Not able to allocate memory for the structure then
+ * we must fall back to the interpretation
+ */
+ if (header == NULL) {
+ prog = orig_prog;
+ goto out_imms;
+ }
+
+ /* 2.) Actual pass to generate final JIT code */
+ ctx.target = (u32 *) image_ptr;
ctx.idx = 0;
build_prologue(&ctx);
+
+ /* If building the body of the JITed code fails somehow,
+ * we fall back to the interpretation.
+ */
if (build_body(&ctx) < 0) {
-#if __LINUX_ARM_ARCH__ < 7
- if (ctx.imm_count)
- kfree(ctx.imms);
-#endif
+ image_ptr = NULL;
bpf_jit_binary_free(header);
- goto out;
+ prog = orig_prog;
+ goto out_imms;
}
build_epilogue(&ctx);
+ /* 3.) Extra pass to validate JITed Code */
+ if (validate_code(&ctx)) {
+ image_ptr = NULL;
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_imms;
+ }
flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
-#if __LINUX_ARM_ARCH__ < 7
- if (ctx.imm_count)
- kfree(ctx.imms);
-#endif
-
if (bpf_jit_enable > 1)
/* there are 2 passes here */
- bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+ bpf_jit_dump(prog->len, image_size, 2, ctx.target);
set_memory_ro((unsigned long)header, header->pages);
- fp->bpf_func = (void *)ctx.target;
- fp->jited = 1;
-out:
+ prog->bpf_func = (void *)ctx.target;
+ prog->jited = 1;
+ prog->jited_len = image_size;
+
+out_imms:
+#if __LINUX_ARM_ARCH__ < 7
+ if (ctx.imm_count)
+ kfree(ctx.imms);
+#endif
+out_off:
kfree(ctx.offsets);
- return;
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(prog, prog == orig_prog ?
+ tmp : orig_prog);
+ return prog;
}
-void bpf_jit_free(struct bpf_prog *fp)
-{
- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
- struct bpf_binary_header *header = (void *)addr;
-
- if (!fp->jited)
- goto free_filter;
-
- set_memory_rw(addr, header->pages);
- bpf_jit_binary_free(header);
-
-free_filter:
- bpf_prog_unlock_free(fp);
-}
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index c46fca2972f7..d5cf5f6208aa 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -11,6 +11,7 @@
#ifndef PFILTER_OPCODES_ARM_H
#define PFILTER_OPCODES_ARM_H
+/* ARM 32bit Registers */
#define ARM_R0 0
#define ARM_R1 1
#define ARM_R2 2
@@ -22,38 +23,43 @@
#define ARM_R8 8
#define ARM_R9 9
#define ARM_R10 10
-#define ARM_FP 11
-#define ARM_IP 12
-#define ARM_SP 13
-#define ARM_LR 14
-#define ARM_PC 15
-
-#define ARM_COND_EQ 0x0
-#define ARM_COND_NE 0x1
-#define ARM_COND_CS 0x2
+#define ARM_FP 11 /* Frame Pointer */
+#define ARM_IP 12 /* Intra-procedure scratch register */
+#define ARM_SP 13 /* Stack pointer: as load/store base reg */
+#define ARM_LR 14 /* Link Register */
+#define ARM_PC 15 /* Program counter */
+
+#define ARM_COND_EQ 0x0 /* == */
+#define ARM_COND_NE 0x1 /* != */
+#define ARM_COND_CS 0x2 /* unsigned >= */
#define ARM_COND_HS ARM_COND_CS
-#define ARM_COND_CC 0x3
+#define ARM_COND_CC 0x3 /* unsigned < */
#define ARM_COND_LO ARM_COND_CC
-#define ARM_COND_MI 0x4
-#define ARM_COND_PL 0x5
-#define ARM_COND_VS 0x6
-#define ARM_COND_VC 0x7
-#define ARM_COND_HI 0x8
-#define ARM_COND_LS 0x9
-#define ARM_COND_GE 0xa
-#define ARM_COND_LT 0xb
-#define ARM_COND_GT 0xc
-#define ARM_COND_LE 0xd
-#define ARM_COND_AL 0xe
+#define ARM_COND_MI 0x4 /* < 0 */
+#define ARM_COND_PL 0x5 /* >= 0 */
+#define ARM_COND_VS 0x6 /* Signed Overflow */
+#define ARM_COND_VC 0x7 /* No Signed Overflow */
+#define ARM_COND_HI 0x8 /* unsigned > */
+#define ARM_COND_LS 0x9 /* unsigned <= */
+#define ARM_COND_GE 0xa /* Signed >= */
+#define ARM_COND_LT 0xb /* Signed < */
+#define ARM_COND_GT 0xc /* Signed > */
+#define ARM_COND_LE 0xd /* Signed <= */
+#define ARM_COND_AL 0xe /* None */
/* register shift types */
#define SRTYPE_LSL 0
#define SRTYPE_LSR 1
#define SRTYPE_ASR 2
#define SRTYPE_ROR 3
+#define SRTYPE_ASL (SRTYPE_LSL)
#define ARM_INST_ADD_R 0x00800000
+#define ARM_INST_ADDS_R 0x00900000
+#define ARM_INST_ADC_R 0x00a00000
+#define ARM_INST_ADC_I 0x02a00000
#define ARM_INST_ADD_I 0x02800000
+#define ARM_INST_ADDS_I 0x02900000
#define ARM_INST_AND_R 0x00000000
#define ARM_INST_AND_I 0x02000000
@@ -76,8 +82,10 @@
#define ARM_INST_LDRH_I 0x01d000b0
#define ARM_INST_LDRH_R 0x019000b0
#define ARM_INST_LDR_I 0x05900000
+#define ARM_INST_LDR_R 0x07900000
#define ARM_INST_LDM 0x08900000
+#define ARM_INST_LDM_IA 0x08b00000
#define ARM_INST_LSL_I 0x01a00000
#define ARM_INST_LSL_R 0x01a00010
@@ -86,6 +94,7 @@
#define ARM_INST_LSR_R 0x01a00030
#define ARM_INST_MOV_R 0x01a00000
+#define ARM_INST_MOVS_R 0x01b00000
#define ARM_INST_MOV_I 0x03a00000
#define ARM_INST_MOVW 0x03000000
#define ARM_INST_MOVT 0x03400000
@@ -96,17 +105,28 @@
#define ARM_INST_PUSH 0x092d0000
#define ARM_INST_ORR_R 0x01800000
+#define ARM_INST_ORRS_R 0x01900000
#define ARM_INST_ORR_I 0x03800000
#define ARM_INST_REV 0x06bf0f30
#define ARM_INST_REV16 0x06bf0fb0
#define ARM_INST_RSB_I 0x02600000
+#define ARM_INST_RSBS_I 0x02700000
+#define ARM_INST_RSC_I 0x02e00000
#define ARM_INST_SUB_R 0x00400000
+#define ARM_INST_SUBS_R 0x00500000
+#define ARM_INST_RSB_R 0x00600000
#define ARM_INST_SUB_I 0x02400000
+#define ARM_INST_SUBS_I 0x02500000
+#define ARM_INST_SBC_I 0x02c00000
+#define ARM_INST_SBC_R 0x00c00000
+#define ARM_INST_SBCS_R 0x00d00000
#define ARM_INST_STR_I 0x05800000
+#define ARM_INST_STRB_I 0x05c00000
+#define ARM_INST_STRH_I 0x01c000b0
#define ARM_INST_TST_R 0x01100000
#define ARM_INST_TST_I 0x03100000
@@ -117,6 +137,8 @@
#define ARM_INST_MLS 0x00600090
+#define ARM_INST_UXTH 0x06ff0070
+
/*
* Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
* We need to be careful not to conflict with those used by other modules
@@ -135,9 +157,15 @@
#define _AL3_R(op, rd, rn, rm) ((op ## _R) | (rd) << 12 | (rn) << 16 | (rm))
/* immediate */
#define _AL3_I(op, rd, rn, imm) ((op ## _I) | (rd) << 12 | (rn) << 16 | (imm))
+/* register with register-shift */
+#define _AL3_SR(inst) (inst | (1 << 4))
#define ARM_ADD_R(rd, rn, rm) _AL3_R(ARM_INST_ADD, rd, rn, rm)
+#define ARM_ADDS_R(rd, rn, rm) _AL3_R(ARM_INST_ADDS, rd, rn, rm)
#define ARM_ADD_I(rd, rn, imm) _AL3_I(ARM_INST_ADD, rd, rn, imm)
+#define ARM_ADDS_I(rd, rn, imm) _AL3_I(ARM_INST_ADDS, rd, rn, imm)
+#define ARM_ADC_R(rd, rn, rm) _AL3_R(ARM_INST_ADC, rd, rn, rm)
+#define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm)
#define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm)
#define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm)
@@ -156,7 +184,9 @@
#define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm)
#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
- | (off))
+ | ((off) & 0xfff))
+#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \
+ | (rm))
#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
| (off))
#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
@@ -167,15 +197,23 @@
| (rm))
#define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs))
+#define ARM_LDM_IA(rn, regs) (ARM_INST_LDM_IA | (rn) << 16 | (regs))
#define ARM_LSL_R(rd, rn, rm) (_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8)
#define ARM_LSL_I(rd, rn, imm) (_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7)
#define ARM_LSR_R(rd, rn, rm) (_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8)
#define ARM_LSR_I(rd, rn, imm) (_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7)
+#define ARM_ASR_R(rd, rn, rm) (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8)
+#define ARM_ASR_I(rd, rn, imm) (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7)
#define ARM_MOV_R(rd, rm) _AL3_R(ARM_INST_MOV, rd, 0, rm)
+#define ARM_MOVS_R(rd, rm) _AL3_R(ARM_INST_MOVS, rd, 0, rm)
#define ARM_MOV_I(rd, imm) _AL3_I(ARM_INST_MOV, rd, 0, imm)
+#define ARM_MOV_SR(rd, rm, type, rs) \
+ (_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_MOV_SI(rd, rm, type, imm6) \
+ (ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7)
#define ARM_MOVW(rd, imm) \
(ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff))
@@ -190,19 +228,38 @@
#define ARM_ORR_R(rd, rn, rm) _AL3_R(ARM_INST_ORR, rd, rn, rm)
#define ARM_ORR_I(rd, rn, imm) _AL3_I(ARM_INST_ORR, rd, rn, imm)
-#define ARM_ORR_S(rd, rn, rm, type, rs) \
- (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7)
+#define ARM_ORR_SR(rd, rn, rm, type, rs) \
+ (_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORRS_R(rd, rn, rm) _AL3_R(ARM_INST_ORRS, rd, rn, rm)
+#define ARM_ORRS_SR(rd, rn, rm, type, rs) \
+ (_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORR_SI(rd, rn, rm, type, imm6) \
+ (ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
+#define ARM_ORRS_SI(rd, rn, rm, type, imm6) \
+ (ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
#define ARM_REV(rd, rm) (ARM_INST_REV | (rd) << 12 | (rm))
#define ARM_REV16(rd, rm) (ARM_INST_REV16 | (rd) << 12 | (rm))
#define ARM_RSB_I(rd, rn, imm) _AL3_I(ARM_INST_RSB, rd, rn, imm)
+#define ARM_RSBS_I(rd, rn, imm) _AL3_I(ARM_INST_RSBS, rd, rn, imm)
+#define ARM_RSC_I(rd, rn, imm) _AL3_I(ARM_INST_RSC, rd, rn, imm)
#define ARM_SUB_R(rd, rn, rm) _AL3_R(ARM_INST_SUB, rd, rn, rm)
+#define ARM_SUBS_R(rd, rn, rm) _AL3_R(ARM_INST_SUBS, rd, rn, rm)
+#define ARM_RSB_R(rd, rn, rm) _AL3_R(ARM_INST_RSB, rd, rn, rm)
+#define ARM_SBC_R(rd, rn, rm) _AL3_R(ARM_INST_SBC, rd, rn, rm)
+#define ARM_SBCS_R(rd, rn, rm) _AL3_R(ARM_INST_SBCS, rd, rn, rm)
#define ARM_SUB_I(rd, rn, imm) _AL3_I(ARM_INST_SUB, rd, rn, imm)
+#define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm)
+#define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm)
#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
- | (off))
+ | ((off) & 0xfff))
+#define ARM_STRH_I(rt, rn, off) (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \
+ | (((off) & 0xf0) << 4) | ((off) & 0xf))
+#define ARM_STRB_I(rt, rn, off) (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \
+ | (((off) & 0xf0) << 4) | ((off) & 0xf))
#define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm)
#define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm)
@@ -214,5 +271,6 @@
#define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
| (ra) << 12)
+#define ARM_UXTH(rd, rm) (ARM_INST_UXTH | (rd) << 12 | (rm))
#endif /* PFILTER_OPCODES_ARM_H */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..0df64a6a56d4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -75,6 +75,7 @@ config ARM64
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_VMAP_STACK
select HAVE_ARM_SMCCC
select HAVE_EBPF_JIT
select HAVE_C_RECORDMCOUNT
@@ -960,6 +961,18 @@ config ARM64_UAO
regular load/store instructions if the cpu does not implement the
feature.
+config ARM64_PMEM
+ bool "Enable support for persistent memory"
+ select ARCH_HAS_PMEM_API
+ select ARCH_HAS_UACCESS_FLUSHCACHE
+ help
+ Say Y to enable support for the persistent memory API based on the
+ ARMv8.2 DCPoP feature.
+
+ The feature is detected at runtime, and the kernel will use DC CVAC
+ operations if DC CVAP is not supported (following the behaviour of
+ DC CVAP itself if the system does not define a point of persistence).
+
endmenu
config ARM64_MODULE_CMODEL_LARGE
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 0d1f026d831a..6872135d7f84 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -67,14 +67,6 @@
};
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rgmii_pins>;
- phy-mode = "rgmii";
- phy-handle = <&ext_rgmii_phy>;
- status = "okay";
-};
-
&i2c1 {
pinctrl-names = "default";
pinctrl-0 = <&i2c1_pins>;
@@ -85,13 +77,6 @@
bias-pull-up;
};
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
index 24f1aac366d6..f82ccf332c0f 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -48,18 +48,3 @@
/* TODO: Camera, touchscreen, etc. */
};
-
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rgmii_pins>;
- phy-mode = "rgmii";
- phy-handle = <&ext_rgmii_phy>;
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
index 08cda24ea194..7c533b6d4ba9 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -78,15 +78,6 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rmii_pins>;
- phy-mode = "rmii";
- phy-handle = <&ext_rmii_phy1>;
- status = "okay";
-
-};
-
&i2c1 {
pinctrl-names = "default";
pinctrl-0 = <&i2c1_pins>;
@@ -97,13 +88,6 @@
bias-pull-up;
};
-&mdio {
- ext_rmii_phy1: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 17eb1cc5bf6b..d891a1a27f6c 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -76,21 +76,6 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&rgmii_pins>;
- phy-mode = "rgmii";
- phy-handle = <&ext_rgmii_phy>;
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc2 {
pinctrl-names = "default";
pinctrl-0 = <&mmc2_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index bd0f33b77f57..68aadc9b96dc 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -449,26 +449,6 @@
#size-cells = <0>;
};
- emac: ethernet@1c30000 {
- compatible = "allwinner,sun50i-a64-emac";
- syscon = <&syscon>;
- reg = <0x01c30000 0x10000>;
- interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
- resets = <&ccu RST_BUS_EMAC>;
- reset-names = "stmmaceth";
- clocks = <&ccu CLK_BUS_EMAC>;
- clock-names = "stmmaceth";
- status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
-
- mdio: mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- };
- };
-
gic: interrupt-controller@1c81000 {
compatible = "arm,gic-400";
reg = <0x01c81000 0x1000>,
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
index 968908761194..1c2387bd5df6 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
@@ -50,7 +50,6 @@
compatible = "friendlyarm,nanopi-neo2", "allwinner,sun50i-h5";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -109,22 +108,6 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
- status = "okay";
-};
-
-&mdio {
- ext_rgmii_phy: ethernet-phy@7 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <7>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
index a8296feee884..4f77c8470f6c 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
@@ -59,7 +59,6 @@
};
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -137,28 +136,12 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
status = "okay";
};
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
index d906b302cbcd..6be06873e5af 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
@@ -54,7 +54,6 @@
compatible = "xunlong,orangepi-prime", "allwinner,sun50i-h5";
aliases {
- ethernet0 = &emac;
serial0 = &uart0;
};
@@ -144,28 +143,12 @@
status = "okay";
};
-&emac {
- pinctrl-names = "default";
- pinctrl-0 = <&emac_rgmii_pins>;
- phy-supply = <&reg_gmac_3v3>;
- phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
- status = "okay";
-};
-
&ir {
pinctrl-names = "default";
pinctrl-0 = <&ir_pins_a>;
status = "okay";
};
-&mdio {
- ext_rgmii_phy: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
&mmc0 {
pinctrl-names = "default";
pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
index 732e2e06f503..d9a720bff05d 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
@@ -120,5 +120,8 @@
};
&pio {
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
compatible = "allwinner,sun50i-h5-pinctrl";
};
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
index e2b0da2c0bc7..105b2938082f 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
@@ -280,9 +280,6 @@
&decon {
status = "okay";
-
- i80-if-timings {
- };
};
&decon_tv {
@@ -1116,9 +1113,6 @@
&mic {
status = "okay";
-
- i80-if-timings {
- };
};
&pmu_system_controller {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index 31fd77f82ced..d16b9cc1e825 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -653,21 +653,21 @@
};
msi1: msi-controller1@1571000 {
- compatible = "fsl,1s1043a-msi";
+ compatible = "fsl,ls1043a-msi";
reg = <0x0 0x1571000 0x0 0x8>;
msi-controller;
interrupts = <0 116 0x4>;
};
msi2: msi-controller2@1572000 {
- compatible = "fsl,1s1043a-msi";
+ compatible = "fsl,ls1043a-msi";
reg = <0x0 0x1572000 0x0 0x8>;
msi-controller;
interrupts = <0 126 0x4>;
};
msi3: msi-controller3@1573000 {
- compatible = "fsl,1s1043a-msi";
+ compatible = "fsl,ls1043a-msi";
reg = <0x0 0x1573000 0x0 0x8>;
msi-controller;
interrupts = <0 160 0x4>;
@@ -689,7 +689,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */
0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
- msi-parent = <&msi1>;
+ msi-parent = <&msi1>, <&msi2>, <&msi3>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0000 0 0 1 &gic 0 110 0x4>,
@@ -714,7 +714,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x00000000 0x48 0x00010000 0x0 0x00010000 /* downstream I/O */
0x82000000 0x0 0x40000000 0x48 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
- msi-parent = <&msi2>;
+ msi-parent = <&msi1>, <&msi2>, <&msi3>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0000 0 0 1 &gic 0 120 0x4>,
@@ -739,7 +739,7 @@
bus-range = <0x0 0xff>;
ranges = <0x81000000 0x0 0x00000000 0x50 0x00010000 0x0 0x00010000 /* downstream I/O */
0x82000000 0x0 0x40000000 0x50 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
- msi-parent = <&msi3>;
+ msi-parent = <&msi1>, <&msi2>, <&msi3>;
#interrupt-cells = <1>;
interrupt-map-mask = <0 0 0 7>;
interrupt-map = <0000 0 0 1 &gic 0 154 0x4>,
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index dc1640be0345..c8ff0baddf1d 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -630,6 +630,37 @@
interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen 4 1>;
};
+
+ msi1: msi-controller@1580000 {
+ compatible = "fsl,ls1046a-msi";
+ msi-controller;
+ reg = <0x0 0x1580000 0x0 0x10000>;
+ interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ msi2: msi-controller@1590000 {
+ compatible = "fsl,ls1046a-msi";
+ msi-controller;
+ reg = <0x0 0x1590000 0x0 0x10000>;
+ interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ msi3: msi-controller@15a0000 {
+ compatible = "fsl,ls1046a-msi";
+ msi-controller;
+ reg = <0x0 0x15a0000 0x0 0x10000>;
+ interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
};
reserved-memory {
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
index 1eb1f1e9aac4..4d360713ed12 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
@@ -268,10 +268,10 @@
ap_gpio: gpio {
compatible = "marvell,armada-8k-gpio";
offset = <0x1040>;
- ngpios = <19>;
+ ngpios = <20>;
gpio-controller;
#gpio-cells = <2>;
- gpio-ranges = <&ap_pinctrl 0 0 19>;
+ gpio-ranges = <&ap_pinctrl 0 0 20>;
};
};
};
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index a451996f590a..f903957da504 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -45,7 +45,7 @@
stdout-path = "serial0:115200n8";
};
- audio_clkout: audio_clkout {
+ audio_clkout: audio-clkout {
/*
* This is same as <&rcar_sound 0>
* but needed to avoid cs2000/rcar_sound probe dead-lock
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
index cf272392cebf..b9f36dad17e6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
@@ -50,6 +50,23 @@
chosen {
stdout-path = "serial2:1500000n8";
};
+
+ vcc_phy: vcc-phy-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_phy";
+ regulator-always-on;
+ regulator-boot-on;
+ };
+};
+
+&gmac2phy {
+ phy-supply = <&vcc_phy>;
+ clock_in_out = "output";
+ assigned-clocks = <&cru SCLK_MAC2PHY_SRC>;
+ assigned-clock-rate = <50000000>;
+ assigned-clocks = <&cru SCLK_MAC2PHY>;
+ assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>;
+ status = "okay";
};
&uart2 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 0be96cee27bd..d48bf5d9f8bd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -63,6 +63,8 @@
i2c1 = &i2c1;
i2c2 = &i2c2;
i2c3 = &i2c3;
+ ethernet0 = &gmac2io;
+ ethernet1 = &gmac2phy;
};
cpus {
@@ -424,6 +426,43 @@
status = "disabled";
};
+ gmac2phy: ethernet@ff550000 {
+ compatible = "rockchip,rk3328-gmac";
+ reg = <0x0 0xff550000 0x0 0x10000>;
+ rockchip,grf = <&grf>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ clocks = <&cru SCLK_MAC2PHY_SRC>, <&cru SCLK_MAC2PHY_RXTX>,
+ <&cru SCLK_MAC2PHY_RXTX>, <&cru SCLK_MAC2PHY_REF>,
+ <&cru ACLK_MAC2PHY>, <&cru PCLK_MAC2PHY>,
+ <&cru SCLK_MAC2PHY_OUT>;
+ clock-names = "stmmaceth", "mac_clk_rx",
+ "mac_clk_tx", "clk_mac_ref",
+ "aclk_mac", "pclk_mac",
+ "clk_macphy";
+ resets = <&cru SRST_GMAC2PHY_A>, <&cru SRST_MACPHY>;
+ reset-names = "stmmaceth", "mac-phy";
+ phy-mode = "rmii";
+ phy-handle = <&phy>;
+ status = "disabled";
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy: phy@0 {
+ compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ clocks = <&cru SCLK_MAC2PHY_OUT>;
+ resets = <&cru SRST_MACPHY>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&fephyled_rxm1 &fephyled_linkm1>;
+ phy-is-integrated;
+ };
+ };
+ };
+
gic: interrupt-controller@ff811000 {
compatible = "arm,gic-400";
#interrupt-cells = <3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 69c56f7316c4..5b78ce16a87e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -238,8 +238,10 @@
linux,pci-domain = <0>;
max-link-speed = <1>;
msi-map = <0x0 &its 0x0 0x1000>;
- phys = <&pcie_phy>;
- phy-names = "pcie-phy";
+ phys = <&pcie_phy 0>, <&pcie_phy 1>,
+ <&pcie_phy 2>, <&pcie_phy 3>;
+ phy-names = "pcie-phy-0", "pcie-phy-1",
+ "pcie-phy-2", "pcie-phy-3";
ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x1e00000
0x81000000 0x0 0xfbe00000 0x0 0xfbe00000 0x0 0x100000>;
resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
@@ -1295,7 +1297,7 @@
compatible = "rockchip,rk3399-pcie-phy";
clocks = <&cru SCLK_PCIEPHY_REF>;
clock-names = "refclk";
- #phy-cells = <0>;
+ #phy-cells = <1>;
resets = <&cru SRST_PCIEPHY>;
reset-names = "phy";
status = "disabled";
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index b4ca115b3be1..cdde4f56a281 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -203,6 +203,7 @@ CONFIG_MARVELL_PHY=m
CONFIG_MESON_GXL_PHY=m
CONFIG_MICREL_PHY=y
CONFIG_REALTEK_PHY=m
+CONFIG_ROCKCHIP_PHY=y
CONFIG_USB_PEGASUS=m
CONFIG_USB_RTL8150=m
CONFIG_USB_RTL8152=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index d92293747d63..7ca54a76f6b9 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_HASH
+ select CRYPTO_SHA1
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_HASH
+ select CRYPTO_SHA256_ARM64
config CRYPTO_GHASH_ARM64_CE
- tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
+ depends on KERNEL_MODE_NEON
select CRYPTO_HASH
+ select CRYPTO_GF128MUL
+ select CRYPTO_AES
+ select CRYPTO_AES_ARM64
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
+ select CRYPTO_AES_ARM64
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
+ select CRYPTO_AES_ARM64
select CRYPTO_AEAD
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_CE
+ select CRYPTO_AES_ARM64
select CRYPTO_SIMD
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
+ select CRYPTO_AES_ARM64
select CRYPTO_AES
select CRYPTO_SIMD
@@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
+ select CRYPTO_AES_ARM64
select CRYPTO_SIMD
endif
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 3363560c79b7..e3a375c4cb83 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,7 +1,7 @@
/*
* aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
beq 8f /* out of input? */
cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b
-1: ld1 {v3.16b}, [x4] /* load first round key */
+1: ld1 {v3.4s}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
- ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
+ ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
-4: ld1 {v3.16b}, [x6], #16 /* load next round key */
+4: ld1 {v3.4s}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
-5: ld1 {v4.16b}, [x6], #16 /* load next round key */
+5: ld1 {v4.4s}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
- ld1 {v5.16b}, [x6], #16 /* load next round key */
+ ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
* u32 rounds);
*/
ENTRY(ce_aes_ccm_final)
- ld1 {v3.16b}, [x2], #16 /* load first round key */
+ ld1 {v3.4s}, [x2], #16 /* load first round key */
ld1 {v0.16b}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
mov v5.16b, v3.16b
b 2f
0: mov v4.16b, v3.16b
-1: ld1 {v5.16b}, [x2], #16 /* load next round key */
+1: ld1 {v5.4s}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-2: ld1 {v3.16b}, [x2], #16 /* load next round key */
+2: ld1 {v3.4s}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-3: ld1 {v4.16b}, [x2], #16 /* load next round key */
+3: ld1 {v4.4s}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
- ld1 {v3.16b}, [x3] /* load first round key */
+ ld1 {v3.4s}, [x3] /* load first round key */
add x10, x3, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
b 3f
1: mov v4.16b, v3.16b
- ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
+ ld1 {v5.4s}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-3: ld1 {v3.16b}, [x10], #16 /* load next round key */
+3: ld1 {v3.4s}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-4: ld1 {v4.16b}, [x10], #16 /* load next round key */
+4: ld1 {v4.4s}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
- ld1 {v5.16b}, [x10], #16 /* load next round key */
+ ld1 {v5.4s}, [x10], #16 /* load next round key */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 6a7dbc7c83a6..a1254036f2b1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -1,7 +1,7 @@
/*
* aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
*
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
u32 rounds);
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
return 0;
}
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+ u32 abytes, u32 *macp, bool use_neon)
+{
+ if (likely(use_neon)) {
+ ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+ num_rounds(key));
+ } else {
+ if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+ int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+ crypto_xor(&mac[*macp], in, added);
+
+ *macp += added;
+ in += added;
+ abytes -= added;
+ }
+
+ while (abytes > AES_BLOCK_SIZE) {
+ __aes_arm64_encrypt(key->key_enc, mac, mac,
+ num_rounds(key));
+ crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+ in += AES_BLOCK_SIZE;
+ abytes -= AES_BLOCK_SIZE;
+ }
+
+ if (abytes > 0) {
+ __aes_arm64_encrypt(key->key_enc, mac, mac,
+ num_rounds(key));
+ crypto_xor(mac, in, abytes);
+ *macp = abytes;
+ } else {
+ *macp = 0;
+ }
+ }
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+ bool use_neon)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
ltag.len = 6;
}
- ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
- num_rounds(ctx));
+ ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
scatterwalk_start(&walk, req->src);
do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
n = scatterwalk_clamp(&walk, len);
}
p = scatterwalk_map(&walk);
- ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
- num_rounds(ctx));
+ ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
len -= n;
scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
} while (len);
}
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+ struct crypto_aes_ctx *ctx, bool enc)
+{
+ u8 buf[AES_BLOCK_SIZE];
+ int err = 0;
+
+ while (walk->nbytes) {
+ int blocks = walk->nbytes / AES_BLOCK_SIZE;
+ u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+ u8 *dst = walk->dst.virt.addr;
+ u8 *src = walk->src.virt.addr;
+ u32 nbytes = walk->nbytes;
+
+ if (nbytes == walk->total && tail > 0) {
+ blocks++;
+ tail = 0;
+ }
+
+ do {
+ u32 bsize = AES_BLOCK_SIZE;
+
+ if (nbytes < AES_BLOCK_SIZE)
+ bsize = nbytes;
+
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+ num_rounds(ctx));
+ __aes_arm64_encrypt(ctx->key_enc, mac, mac,
+ num_rounds(ctx));
+ if (enc)
+ crypto_xor(mac, src, bsize);
+ crypto_xor_cpy(dst, src, buf, bsize);
+ if (!enc)
+ crypto_xor(mac, dst, bsize);
+ dst += bsize;
+ src += bsize;
+ nbytes -= bsize;
+ } while (--blocks);
+
+ err = skcipher_walk_done(walk, tail);
+ }
+
+ if (!err) {
+ __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+ __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+ crypto_xor(mac, buf, AES_BLOCK_SIZE);
+ }
+ return err;
+}
+
static int ccm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen;
+ bool use_neon = may_use_simd();
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
- kernel_neon_begin_partial(6);
+ if (likely(use_neon))
+ kernel_neon_begin();
if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ ccm_calculate_auth_mac(req, mac, use_neon);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, true);
- while (walk.nbytes) {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
- if (walk.nbytes == walk.total)
- tail = 0;
+ if (likely(use_neon)) {
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes - tail, ctx->key_enc,
- num_rounds(ctx), mac, walk.iv);
+ if (walk.nbytes == walk.total)
+ tail = 0;
- err = skcipher_walk_done(&walk, tail);
- }
- if (!err)
- ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+ ce_aes_ccm_encrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
- kernel_neon_end();
+ err = skcipher_walk_done(&walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc,
+ num_rounds(ctx));
+ kernel_neon_end();
+ } else {
+ err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+ }
if (err)
return err;
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
u8 __aligned(8) mac[AES_BLOCK_SIZE];
u8 buf[AES_BLOCK_SIZE];
u32 len = req->cryptlen - authsize;
+ bool use_neon = may_use_simd();
int err;
err = ccm_init_mac(req, mac, len);
if (err)
return err;
- kernel_neon_begin_partial(6);
+ if (likely(use_neon))
+ kernel_neon_begin();
if (req->assoclen)
- ccm_calculate_auth_mac(req, mac);
+ ccm_calculate_auth_mac(req, mac, use_neon);
/* preserve the original iv for the final round */
memcpy(buf, req->iv, AES_BLOCK_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, true);
- while (walk.nbytes) {
- u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+ if (likely(use_neon)) {
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
- if (walk.nbytes == walk.total)
- tail = 0;
+ if (walk.nbytes == walk.total)
+ tail = 0;
- ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes - tail, ctx->key_enc,
- num_rounds(ctx), mac, walk.iv);
+ ce_aes_ccm_decrypt(walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
- err = skcipher_walk_done(&walk, tail);
- }
- if (!err)
- ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+ err = skcipher_walk_done(&walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc,
+ num_rounds(ctx));
- kernel_neon_end();
+ kernel_neon_end();
+ } else {
+ err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+ }
if (err)
return err;
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index 50d9fe11d0c8..6a75cd75ed11 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -1,7 +1,7 @@
/*
* aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
*
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,8 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
#include <crypto/aes.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
@@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
@@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
void *dummy0;
int dummy1;
- kernel_neon_begin_partial(4);
+ if (!may_use_simd()) {
+ __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+ return;
+ }
+
+ kernel_neon_begin();
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
+ " ld1 {v1.4s}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
+ " ld1 {v3.4s}, [%[key]], #16 ;"
"1: aese v0.16b, v2.16b ;"
" aesmc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
+ "2: ld1 {v1.4s}, [%[key]], #16 ;"
" aese v0.16b, v3.16b ;"
" aesmc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
+ "3: ld1 {v2.4s}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aese v0.16b, v1.16b ;"
" aesmc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
+ " ld1 {v3.4s}, [%[key]], #16 ;"
" bpl 1b ;"
" aese v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
void *dummy0;
int dummy1;
- kernel_neon_begin_partial(4);
+ if (!may_use_simd()) {
+ __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+ return;
+ }
+
+ kernel_neon_begin();
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
+ " ld1 {v1.4s}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
+ " ld1 {v3.4s}, [%[key]], #16 ;"
"1: aesd v0.16b, v2.16b ;"
" aesimc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
+ "2: ld1 {v1.4s}, [%[key]], #16 ;"
" aesd v0.16b, v3.16b ;"
" aesimc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
+ "3: ld1 {v2.4s}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aesd v0.16b, v1.16b ;"
" aesimc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
+ " ld1 {v3.4s}, [%[key]], #16 ;"
" bpl 1b ;"
" aesd v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
key_len != AES_KEYSIZE_256)
return -EINVAL;
- memcpy(ctx->key_enc, in_key, key_len);
ctx->key_length = key_len;
+ for (i = 0; i < kwords; i++)
+ ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
- kernel_neon_begin_partial(2);
+ kernel_neon_begin();
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
-#ifndef CONFIG_CPU_BIG_ENDIAN
rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
- rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
- rki[0];
-#endif
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
@@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
- __asm__("ld1 {v0.16b}, %[in] ;"
+ __asm__("ld1 {v0.4s}, %[in] ;"
"aesimc v1.16b, v0.16b ;"
- "st1 {v1.16b}, %[out] ;"
+ "st1 {v1.4s}, %[out] ;"
: [out] "=Q"(key_dec[i])
: [in] "Q"(key_enc[j])
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index b46093d567e5..50330f5c3adc 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -2,7 +2,7 @@
* linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
* Crypto Extensions
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
- ld1 {v17.16b-v18.16b}, [\rk], #32
-1111: ld1 {v19.16b-v20.16b}, [\rk], #32
-2222: ld1 {v21.16b-v24.16b}, [\rk], #64
- ld1 {v25.16b-v28.16b}, [\rk], #64
- ld1 {v29.16b-v31.16b}, [\rk]
+ ld1 {v17.4s-v18.4s}, [\rk], #32
+1111: ld1 {v19.4s-v20.4s}, [\rk], #32
+2222: ld1 {v21.4s-v24.4s}, [\rk], #64
+ ld1 {v25.4s-v28.4s}, [\rk], #64
+ ld1 {v29.4s-v31.4s}, [\rk]
.endm
/* prepare for encryption with key in rk[] */
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index f2f9cc519309..6d2445d603cc 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/cache.h>
.text
@@ -17,94 +18,155 @@
out .req x1
in .req x2
rounds .req x3
- tt .req x4
- lt .req x2
+ tt .req x2
- .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
+ .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ .ifc \op\shift, b0
+ ubfiz \reg0, \in0, #2, #8
+ ubfiz \reg1, \in1e, #2, #8
+ .else
ubfx \reg0, \in0, #\shift, #8
- .if \enc
ubfx \reg1, \in1e, #\shift, #8
- .else
- ubfx \reg1, \in1d, #\shift, #8
.endif
+
+ /*
+ * AArch64 cannot do byte size indexed loads from a table containing
+ * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+ * valid instruction. So perform the shift explicitly first for the
+ * high bytes (the low byte is shifted implicitly by using ubfiz rather
+ * than ubfx above)
+ */
+ .ifnc \op, b
ldr \reg0, [tt, \reg0, uxtw #2]
ldr \reg1, [tt, \reg1, uxtw #2]
+ .else
+ .if \shift > 0
+ lsl \reg0, \reg0, #2
+ lsl \reg1, \reg1, #2
+ .endif
+ ldrb \reg0, [tt, \reg0, uxtw]
+ ldrb \reg1, [tt, \reg1, uxtw]
+ .endif
.endm
- .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+ .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+ ubfx \reg0, \in0, #\shift, #8
+ ubfx \reg1, \in1d, #\shift, #8
+ ldr\op \reg0, [tt, \reg0, uxtw #\sz]
+ ldr\op \reg1, [tt, \reg1, uxtw #\sz]
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
ldp \out0, \out1, [rk], #8
- __pair \enc, w13, w14, \in0, \in1, \in3, 0
- __pair \enc, w15, w16, \in1, \in2, \in0, 8
- __pair \enc, w17, w18, \in2, \in3, \in1, 16
- __pair \enc, \t0, \t1, \in3, \in0, \in2, 24
-
- eor \out0, \out0, w13
- eor \out1, \out1, w14
- eor \out0, \out0, w15, ror #24
- eor \out1, \out1, w16, ror #24
- eor \out0, \out0, w17, ror #16
- eor \out1, \out1, w18, ror #16
+ __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0
+ __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8
+ __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16
+ __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+ eor \out0, \out0, w12
+ eor \out1, \out1, w13
+ eor \out0, \out0, w14, ror #24
+ eor \out1, \out1, w15, ror #24
+ eor \out0, \out0, w16, ror #16
+ eor \out1, \out1, w17, ror #16
eor \out0, \out0, \t0, ror #8
eor \out1, \out1, \t1, ror #8
.endm
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
- .macro do_crypt, round, ttab, ltab
- ldp w5, w6, [in]
- ldp w7, w8, [in, #8]
- ldp w9, w10, [rk], #16
- ldp w11, w12, [rk, #-8]
+ .macro do_crypt, round, ttab, ltab, bsz
+ ldp w4, w5, [in]
+ ldp w6, w7, [in, #8]
+ ldp w8, w9, [rk], #16
+ ldp w10, w11, [rk, #-8]
+CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
-CPU_BE( rev w8, w8 )
+ eor w4, w4, w8
eor w5, w5, w9
eor w6, w6, w10
eor w7, w7, w11
- eor w8, w8, w12
adr_l tt, \ttab
- adr_l lt, \ltab
tbnz rounds, #1, 1f
-0: \round w9, w10, w11, w12, w5, w6, w7, w8
- \round w5, w6, w7, w8, w9, w10, w11, w12
+0: \round w8, w9, w10, w11, w4, w5, w6, w7
+ \round w4, w5, w6, w7, w8, w9, w10, w11
1: subs rounds, rounds, #4
- \round w9, w10, w11, w12, w5, w6, w7, w8
- csel tt, tt, lt, hi
- \round w5, w6, w7, w8, w9, w10, w11, w12
- b.hi 0b
-
+ \round w8, w9, w10, w11, w4, w5, w6, w7
+ b.ls 3f
+2: \round w4, w5, w6, w7, w8, w9, w10, w11
+ b 0b
+3: adr_l tt, \ltab
+ \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE( rev w4, w4 )
CPU_BE( rev w5, w5 )
CPU_BE( rev w6, w6 )
CPU_BE( rev w7, w7 )
-CPU_BE( rev w8, w8 )
- stp w5, w6, [out]
- stp w7, w8, [out, #8]
+ stp w4, w5, [out]
+ stp w6, w7, [out, #8]
ret
.endm
- .align 5
+ .align L1_CACHE_SHIFT
+ .type __aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
ENTRY(__aes_arm64_encrypt)
- do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
ENDPROC(__aes_arm64_encrypt)
.align 5
ENTRY(__aes_arm64_decrypt)
- do_crypt iround, crypto_it_tab, crypto_il_tab
+ do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h
new file mode 100644
index 000000000000..c9285717b6b5
--- /dev/null
+++ b/arch/arm64/crypto/aes-ctr-fallback.h
@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+ struct skcipher_request *req)
+{
+ struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, true);
+
+ while (walk.nbytes > 0) {
+ u8 *dst = walk.dst.virt.addr;
+ u8 *src = walk.src.virt.addr;
+ int nbytes = walk.nbytes;
+ int tail = 0;
+
+ if (nbytes < walk.total) {
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+ tail = walk.nbytes % AES_BLOCK_SIZE;
+ }
+
+ do {
+ int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+ __aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+ 6 + ctx->key_length / 4);
+ crypto_xor_cpy(dst, src, buf, bsize);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+ dst += AES_BLOCK_SIZE;
+ src += AES_BLOCK_SIZE;
+ nbytes -= AES_BLOCK_SIZE;
+ } while (nbytes > 0);
+
+ err = skcipher_walk_done(&walk, tail);
+ }
+ return err;
+}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index bcf596b0197e..998ba519a026 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -10,6 +10,7 @@
#include <asm/neon.h>
#include <asm/hwcap.h>
+#include <asm/simd.h>
#include <crypto/aes.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
#include <crypto/xts.h>
#include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
#ifdef USE_V8_CRYPTO_EXTENSIONS
#define MODE "ce"
@@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req)
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
blocks, walk.iv, first);
- if (tdst != tsrc)
- memcpy(tdst, tsrc, nbytes);
- crypto_xor(tdst, tail, nbytes);
+ crypto_xor_cpy(tdst, tsrc, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
@@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
return err;
}
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (!may_use_simd())
+ return aes_ctr_encrypt_fallback(ctx, req);
+
+ return ctr_encrypt(req);
+}
+
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = skcipher_aes_setkey,
- .encrypt = ctr_encrypt,
- .decrypt = ctr_encrypt,
+ .encrypt = ctr_encrypt_sync,
+ .decrypt = ctr_encrypt_sync,
}, {
.base = {
.cra_name = "__xts(aes)",
@@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
return 0;
}
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+ u8 dg[], int enc_before, int enc_after)
+{
+ int rounds = 6 + ctx->key_length / 4;
+
+ if (may_use_simd()) {
+ kernel_neon_begin();
+ aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+ enc_after);
+ kernel_neon_end();
+ } else {
+ if (enc_before)
+ __aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+ while (blocks--) {
+ crypto_xor(dg, in, AES_BLOCK_SIZE);
+ in += AES_BLOCK_SIZE;
+
+ if (blocks || enc_after)
+ __aes_arm64_encrypt(ctx->key_enc, dg, dg,
+ rounds);
+ }
+ }
+}
+
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
- int rounds = 6 + tctx->key.key_length / 4;
while (len > 0) {
unsigned int l;
@@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
len %= AES_BLOCK_SIZE;
- kernel_neon_begin();
- aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
- ctx->dg, (ctx->len != 0), (len != 0));
- kernel_neon_end();
+ mac_do_update(&tctx->key, p, blocks, ctx->dg,
+ (ctx->len != 0), (len != 0));
p += blocks * AES_BLOCK_SIZE;
@@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
- int rounds = 6 + tctx->key.key_length / 4;
- kernel_neon_begin();
- aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
- kernel_neon_end();
+ mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
@@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
{
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
- int rounds = 6 + tctx->key.key_length / 4;
u8 *consts = tctx->consts;
if (ctx->len != AES_BLOCK_SIZE) {
@@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
consts += AES_BLOCK_SIZE;
}
- kernel_neon_begin();
- aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
- kernel_neon_end();
+ mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index db2501d93550..c55d68ccb89f 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -1,7 +1,7 @@
/*
* Bit sliced AES using NEON instructions
*
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <linux/module.h>
+#include "aes-ctr-fallback.h"
+
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
u32 enc[AES_MAX_KEYLENGTH_U32];
};
+struct aesbs_ctr_ctx {
+ struct aesbs_ctx key; /* must be first member */
+ struct crypto_aes_ctx fallback;
+};
+
struct aesbs_xts_ctx {
struct aesbs_ctx key;
u32 twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
return err;
}
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int err;
+
+ err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+ if (err)
+ return err;
+
+ ctx->key.rounds = 6 + key_len / 4;
+
+ kernel_neon_begin();
+ aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+ kernel_neon_end();
+
+ return 0;
+}
+
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
- if (dst != src)
- memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
- crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+ crypto_xor_cpy(dst, src, final,
+ walk.total % AES_BLOCK_SIZE);
err = skcipher_walk_done(&walk, 0);
break;
@@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return aesbs_setkey(tfm, in_key, key_len);
}
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (!may_use_simd())
+ return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+ return ctr_encrypt(req);
+}
+
static int __xts_crypt(struct skcipher_request *req,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]))
@@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_driver_name = "ctr-aes-neonbs",
.base.cra_priority = 250 - 1,
.base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct aesbs_ctx),
+ .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = AES_MIN_KEY_SIZE,
@@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
.chunksize = AES_BLOCK_SIZE,
.walksize = 8 * AES_BLOCK_SIZE,
.ivsize = AES_BLOCK_SIZE,
- .setkey = aesbs_setkey,
- .encrypt = ctr_encrypt,
- .decrypt = ctr_encrypt,
+ .setkey = aesbs_ctr_setkey_sync,
+ .encrypt = ctr_encrypt_sync,
+ .decrypt = ctr_encrypt_sync,
}, {
.base.cra_name = "__xts(aes)",
.base.cra_driver_name = "__xts-aes-neonbs",
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
index a7cd575ea223..cbdb75d15cd0 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -1,7 +1,7 @@
/*
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
*
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
+#include <asm/simd.h>
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
u32 state[16];
int err;
- if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+ if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
return crypto_chacha20_crypt(req);
err = skcipher_walk_virt(&walk, req, true);
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index eccb1ae90064..624f4137918c 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -1,7 +1,7 @@
/*
* Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
*
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
+#include <asm/simd.h>
#include <asm/unaligned.h>
#define PMULL_MIN_LEN 64L /* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
length -= l;
}
- if (length >= PMULL_MIN_LEN) {
+ if (length >= PMULL_MIN_LEN && may_use_simd()) {
l = round_down(length, SCALE_F);
- kernel_neon_begin_partial(10);
+ kernel_neon_begin();
*crc = crc32_pmull_le(data, l, *crc);
kernel_neon_end();
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
length -= l;
}
- if (length >= PMULL_MIN_LEN) {
+ if (length >= PMULL_MIN_LEN && may_use_simd()) {
l = round_down(length, SCALE_F);
- kernel_neon_begin_partial(10);
+ kernel_neon_begin();
*crc = crc32c_pmull_le(data, l, *crc);
kernel_neon_end();
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 60cb590c2590..96f0cae4a022 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -1,7 +1,7 @@
/*
* Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
*
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
#include <crypto/internal/hash.h>
#include <asm/neon.h>
+#include <asm/simd.h>
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
}
if (length > 0) {
- kernel_neon_begin_partial(14);
- *crc = crc_t10dif_pmull(*crc, data, length);
- kernel_neon_end();
+ if (may_use_simd()) {
+ kernel_neon_begin();
+ *crc = crc_t10dif_pmull(*crc, data, length);
+ kernel_neon_end();
+ } else {
+ *crc = crc_t10dif_generic(*crc, data, length);
+ }
}
return 0;
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index f0bb9f0b524f..11ebf1ae248a 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -11,31 +11,215 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
- SHASH .req v0
- SHASH2 .req v1
- T1 .req v2
- T2 .req v3
- MASK .req v4
- XL .req v5
- XM .req v6
- XH .req v7
- IN1 .req v7
+ SHASH .req v0
+ SHASH2 .req v1
+ T1 .req v2
+ T2 .req v3
+ MASK .req v4
+ XL .req v5
+ XM .req v6
+ XH .req v7
+ IN1 .req v7
+
+ k00_16 .req v8
+ k32_48 .req v9
+
+ t3 .req v10
+ t4 .req v11
+ t5 .req v12
+ t6 .req v13
+ t7 .req v14
+ t8 .req v15
+ t9 .req v16
+
+ perm1 .req v17
+ perm2 .req v18
+ perm3 .req v19
+
+ sh1 .req v20
+ sh2 .req v21
+ sh3 .req v22
+ sh4 .req v23
+
+ ss1 .req v24
+ ss2 .req v25
+ ss3 .req v26
+ ss4 .req v27
.text
.arch armv8-a+crypto
- /*
- * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- * struct ghash_key const *k, const char *head)
- */
-ENTRY(pmull_ghash_update)
+ .macro __pmull_p64, rd, rn, rm
+ pmull \rd\().1q, \rn\().1d, \rm\().1d
+ .endm
+
+ .macro __pmull2_p64, rd, rn, rm
+ pmull2 \rd\().1q, \rn\().2d, \rm\().2d
+ .endm
+
+ .macro __pmull_p8, rq, ad, bd
+ ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
+ ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
+ ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
+
+ __pmull_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull2_p8, rq, ad, bd
+ tbl t3.16b, {\ad\().16b}, perm1.16b // A1
+ tbl t5.16b, {\ad\().16b}, perm2.16b // A2
+ tbl t7.16b, {\ad\().16b}, perm3.16b // A3
+
+ __pmull2_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_SHASH2, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+ .endm
+
+ .macro __pmull2_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+ pmull\t t3.8h, t3.\nb, \bd // F = A1*B
+ pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
+ pmull\t t5.8h, t5.\nb, \bd // H = A2*B
+ pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
+ pmull\t t7.8h, t7.\nb, \bd // J = A3*B
+ pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
+ pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
+ pmull\t \rq\().8h, \ad, \bd // D = A*B
+
+ eor t3.16b, t3.16b, t4.16b // L = E + F
+ eor t5.16b, t5.16b, t6.16b // M = G + H
+ eor t7.16b, t7.16b, t8.16b // N = I + J
+
+ uzp1 t4.2d, t3.2d, t5.2d
+ uzp2 t3.2d, t3.2d, t5.2d
+ uzp1 t6.2d, t7.2d, t9.2d
+ uzp2 t7.2d, t7.2d, t9.2d
+
+ // t3 = (L) (P0 + P1) << 8
+ // t5 = (M) (P2 + P3) << 16
+ eor t4.16b, t4.16b, t3.16b
+ and t3.16b, t3.16b, k32_48.16b
+
+ // t7 = (N) (P4 + P5) << 24
+ // t9 = (K) (P6 + P7) << 32
+ eor t6.16b, t6.16b, t7.16b
+ and t7.16b, t7.16b, k00_16.16b
+
+ eor t4.16b, t4.16b, t3.16b
+ eor t6.16b, t6.16b, t7.16b
+
+ zip2 t5.2d, t4.2d, t3.2d
+ zip1 t3.2d, t4.2d, t3.2d
+ zip2 t9.2d, t6.2d, t7.2d
+ zip1 t7.2d, t6.2d, t7.2d
+
+ ext t3.16b, t3.16b, t3.16b, #15
+ ext t5.16b, t5.16b, t5.16b, #14
+ ext t7.16b, t7.16b, t7.16b, #13
+ ext t9.16b, t9.16b, t9.16b, #12
+
+ eor t3.16b, t3.16b, t5.16b
+ eor t7.16b, t7.16b, t9.16b
+ eor \rq\().16b, \rq\().16b, t3.16b
+ eor \rq\().16b, \rq\().16b, t7.16b
+ .endm
+
+ .macro __pmull_pre_p64
+ movi MASK.16b, #0xe1
+ shl MASK.2d, MASK.2d, #57
+ .endm
+
+ .macro __pmull_pre_p8
+ // k00_16 := 0x0000000000000000_000000000000ffff
+ // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+ movi k32_48.2d, #0xffffffff
+ mov k32_48.h[2], k32_48.h[0]
+ ushr k00_16.2d, k32_48.2d, #32
+
+ // prepare the permutation vectors
+ mov_q x5, 0x080f0e0d0c0b0a09
+ movi T1.8b, #8
+ dup perm1.2d, x5
+ eor perm1.16b, perm1.16b, T1.16b
+ ushr perm2.2d, perm1.2d, #8
+ ushr perm3.2d, perm1.2d, #16
+ ushr T1.2d, perm1.2d, #24
+ sli perm2.2d, perm1.2d, #56
+ sli perm3.2d, perm1.2d, #48
+ sli T1.2d, perm1.2d, #40
+
+ // precompute loop invariants
+ tbl sh1.16b, {SHASH.16b}, perm1.16b
+ tbl sh2.16b, {SHASH.16b}, perm2.16b
+ tbl sh3.16b, {SHASH.16b}, perm3.16b
+ tbl sh4.16b, {SHASH.16b}, T1.16b
+ ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
+ ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
+ ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
+ ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
+ .endm
+
+ //
+ // PMULL (64x64->128) based reduction for CPUs that can do
+ // it in a single instruction.
+ //
+ .macro __pmull_reduce_p64
+ pmull T2.1q, XL.1d, MASK.1d
+ eor XM.16b, XM.16b, T1.16b
+
+ mov XH.d[0], XM.d[1]
+ mov XM.d[1], XL.d[0]
+
+ eor XL.16b, XM.16b, T2.16b
+ ext T2.16b, XL.16b, XL.16b, #8
+ pmull XL.1q, XL.1d, MASK.1d
+ .endm
+
+ //
+ // Alternative reduction for CPUs that lack support for the
+ // 64x64->128 PMULL instruction
+ //
+ .macro __pmull_reduce_p8
+ eor XM.16b, XM.16b, T1.16b
+
+ mov XL.d[1], XM.d[0]
+ mov XH.d[0], XM.d[1]
+
+ shl T1.2d, XL.2d, #57
+ shl T2.2d, XL.2d, #62
+ eor T2.16b, T2.16b, T1.16b
+ shl T1.2d, XL.2d, #63
+ eor T2.16b, T2.16b, T1.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor T2.16b, T2.16b, T1.16b
+
+ mov XL.d[1], T2.d[0]
+ mov XH.d[0], T2.d[1]
+
+ ushr T2.2d, XL.2d, #1
+ eor XH.16b, XH.16b, XL.16b
+ eor XL.16b, XL.16b, T2.16b
+ ushr T2.2d, T2.2d, #6
+ ushr XL.2d, XL.2d, #1
+ .endm
+
+ .macro __pmull_ghash, pn
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
- movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
- shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, SHASH.16b
+ __pmull_pre_\pn
+
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {T1.2d}, [x4]
@@ -52,28 +236,209 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T1.16b, T1.16b, T2.16b
eor XL.16b, XL.16b, IN1.16b
+ __pmull2_\pn XH, XL, SHASH // a1 * b1
+ eor T1.16b, T1.16b, XL.16b
+ __pmull_\pn XL, XL, SHASH // a0 * b0
+ __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
+
+ eor T2.16b, XL.16b, XH.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor XM.16b, XM.16b, T2.16b
+
+ __pmull_reduce_\pn
+
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
+
+ cbnz w0, 0b
+
+ st1 {XL.2d}, [x1]
+ ret
+ .endm
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update_p64)
+ __pmull_ghash p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+ __pmull_ghash p8
+ENDPROC(pmull_ghash_update_p8)
+
+ KS .req v8
+ CTR .req v9
+ INP .req v10
+
+ .macro load_round_keys, rounds, rk
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ ld1 {v17.4s-v18.4s}, [\rk], #32
+1111: ld1 {v19.4s-v20.4s}, [\rk], #32
+2222: ld1 {v21.4s-v24.4s}, [\rk], #64
+ ld1 {v25.4s-v28.4s}, [\rk], #64
+ ld1 {v29.4s-v31.4s}, [\rk]
+ .endm
+
+ .macro enc_round, state, key
+ aese \state\().16b, \key\().16b
+ aesmc \state\().16b, \state\().16b
+ .endm
+
+ .macro enc_block, state, rounds
+ cmp \rounds, #12
+ b.lo 2222f /* 128 bits */
+ b.eq 1111f /* 192 bits */
+ enc_round \state, v17
+ enc_round \state, v18
+1111: enc_round \state, v19
+ enc_round \state, v20
+2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ enc_round \state, \key
+ .endr
+ aese \state\().16b, v30.16b
+ eor \state\().16b, \state\().16b, v31.16b
+ .endm
+
+ .macro pmull_gcm_do_crypt, enc
+ ld1 {SHASH.2d}, [x4]
+ ld1 {XL.2d}, [x1]
+ ldr x8, [x5, #8] // load lower counter
+
+ movi MASK.16b, #0xe1
+ ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE( rev x8, x8 )
+ shl MASK.2d, MASK.2d, #57
+ eor SHASH2.16b, SHASH2.16b, SHASH.16b
+
+ .if \enc == 1
+ ld1 {KS.16b}, [x7]
+ .endif
+
+0: ld1 {CTR.8b}, [x5] // load upper counter
+ ld1 {INP.16b}, [x3], #16
+ rev x9, x8
+ add x8, x8, #1
+ sub w0, w0, #1
+ ins CTR.d[1], x9 // set lower counter
+
+ .if \enc == 1
+ eor INP.16b, INP.16b, KS.16b // encrypt input
+ st1 {INP.16b}, [x2], #16
+ .endif
+
+ rev64 T1.16b, INP.16b
+
+ cmp w6, #12
+ b.ge 2f // AES-192/256?
+
+1: enc_round CTR, v21
+
+ ext T2.16b, XL.16b, XL.16b, #8
+ ext IN1.16b, T1.16b, T1.16b, #8
+
+ enc_round CTR, v22
+
+ eor T1.16b, T1.16b, T2.16b
+ eor XL.16b, XL.16b, IN1.16b
+
+ enc_round CTR, v23
+
pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b
+
+ enc_round CTR, v24
+
pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
+ enc_round CTR, v25
+
ext T1.16b, XL.16b, XH.16b, #8
eor T2.16b, XL.16b, XH.16b
eor XM.16b, XM.16b, T1.16b
+
+ enc_round CTR, v26
+
eor XM.16b, XM.16b, T2.16b
pmull T2.1q, XL.1d, MASK.1d
+ enc_round CTR, v27
+
mov XH.d[0], XM.d[1]
mov XM.d[1], XL.d[0]
+ enc_round CTR, v28
+
eor XL.16b, XM.16b, T2.16b
+
+ enc_round CTR, v29
+
ext T2.16b, XL.16b, XL.16b, #8
+
+ aese CTR.16b, v30.16b
+
pmull XL.1q, XL.1d, MASK.1d
eor T2.16b, T2.16b, XH.16b
+
+ eor KS.16b, CTR.16b, v31.16b
+
eor XL.16b, XL.16b, T2.16b
+ .if \enc == 0
+ eor INP.16b, INP.16b, KS.16b
+ st1 {INP.16b}, [x2], #16
+ .endif
+
cbnz w0, 0b
+CPU_LE( rev x8, x8 )
st1 {XL.2d}, [x1]
+ str x8, [x5, #8] // store lower counter
+
+ .if \enc == 1
+ st1 {KS.16b}, [x7]
+ .endif
+
+ ret
+
+2: b.eq 3f // AES-192?
+ enc_round CTR, v17
+ enc_round CTR, v18
+3: enc_round CTR, v19
+ enc_round CTR, v20
+ b 1b
+ .endm
+
+ /*
+ * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u8 ctr[],
+ * int rounds, u8 ks[])
+ */
+ENTRY(pmull_gcm_encrypt)
+ pmull_gcm_do_crypt 1
+ENDPROC(pmull_gcm_encrypt)
+
+ /*
+ * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u8 ctr[],
+ * int rounds)
+ */
+ENTRY(pmull_gcm_decrypt)
+ pmull_gcm_do_crypt 0
+ENDPROC(pmull_gcm_decrypt)
+
+ /*
+ * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+ */
+ENTRY(pmull_gcm_encrypt_block)
+ cbz x2, 0f
+ load_round_keys w3, x2
+0: ld1 {v0.16b}, [x1]
+ enc_block v0, w3
+ st1 {v0.16b}, [x0]
ret
-ENDPROC(pmull_ghash_update)
+ENDPROC(pmull_gcm_encrypt_block)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 833ec1e3f3e9..cfc9c92814fd 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
*
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -9,22 +9,33 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
+#define GCM_IV_SIZE 12
struct ghash_key {
u64 a;
u64 b;
+ be128 k;
};
struct ghash_desc_ctx {
@@ -33,8 +44,35 @@ struct ghash_desc_ctx {
u32 count;
};
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k, const char *head);
+struct gcm_aes_ctx {
+ struct crypto_aes_ctx aes_key;
+ struct ghash_key ghash_key;
+};
+
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k,
+ const char *head);
+
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+ const u8 src[], struct ghash_key const *k,
+ u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+ const u8 src[], struct ghash_key const *k,
+ u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+ u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
static int ghash_init(struct shash_desc *desc)
{
@@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc)
return 0;
}
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key *key, const char *head)
+{
+ if (likely(may_use_simd())) {
+ kernel_neon_begin();
+ pmull_ghash_update(blocks, dg, src, key, head);
+ kernel_neon_end();
+ } else {
+ be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+ do {
+ const u8 *in = src;
+
+ if (head) {
+ in = head;
+ blocks++;
+ head = NULL;
+ } else {
+ src += GHASH_BLOCK_SIZE;
+ }
+
+ crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+ gf128mul_lle(&dst, &key->k);
+ } while (--blocks);
+
+ dg[0] = be64_to_cpu(dst.b);
+ dg[1] = be64_to_cpu(dst.a);
+ }
+}
+
static int ghash_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
@@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
blocks = len / GHASH_BLOCK_SIZE;
len %= GHASH_BLOCK_SIZE;
- kernel_neon_begin_partial(8);
- pmull_ghash_update(blocks, ctx->digest, src, key,
- partial ? ctx->buf : NULL);
- kernel_neon_end();
+ ghash_do_update(blocks, ctx->digest, src, key,
+ partial ? ctx->buf : NULL);
+
src += blocks * GHASH_BLOCK_SIZE;
partial = 0;
}
@@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
- kernel_neon_begin_partial(8);
- pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
- kernel_neon_end();
+ ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
return 0;
}
-static int ghash_setkey(struct crypto_shash *tfm,
- const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+ const u8 *inkey, unsigned int keylen)
{
- struct ghash_key *key = crypto_shash_ctx(tfm);
u64 a, b;
- if (keylen != GHASH_BLOCK_SIZE) {
- crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
+ /* needed for the fallback */
+ memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
/* perform multiplication by 'x' in GF(2^128) */
b = get_unaligned_be64(inkey);
@@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm,
return 0;
}
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ghash_key *key = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ return __ghash_setkey(key, inkey, keylen);
+}
+
static struct shash_alg ghash_alg = {
- .digestsize = GHASH_DIGEST_SIZE,
- .init = ghash_init,
- .update = ghash_update,
- .final = ghash_final,
- .setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
- .base = {
- .cra_name = "ghash",
- .cra_driver_name = "ghash-ce",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
- .cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ghash_key),
- .cra_module = THIS_MODULE,
- },
+ .base.cra_name = "ghash",
+ .base.cra_driver_name = "ghash-ce",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = GHASH_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ u8 key[GHASH_BLOCK_SIZE];
+ int ret;
+
+ ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+ if (ret) {
+ tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+ num_rounds(&ctx->aes_key));
+
+ return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12 ... 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+ int *buf_count, struct gcm_aes_ctx *ctx)
+{
+ if (*buf_count > 0) {
+ int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+ memcpy(&buf[*buf_count], src, buf_added);
+
+ *buf_count += buf_added;
+ src += buf_added;
+ count -= buf_added;
+ }
+
+ if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+ int blocks = count / GHASH_BLOCK_SIZE;
+
+ ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+ *buf_count ? buf : NULL);
+
+ src += blocks * GHASH_BLOCK_SIZE;
+ count %= GHASH_BLOCK_SIZE;
+ *buf_count = 0;
+ }
+
+ if (count > 0) {
+ memcpy(buf, src, count);
+ *buf_count = count;
+ }
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ u8 buf[GHASH_BLOCK_SIZE];
+ struct scatter_walk walk;
+ u32 len = req->assoclen;
+ int buf_count = 0;
+
+ scatterwalk_start(&walk, req->src);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+ p = scatterwalk_map(&walk);
+
+ gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+ len -= n;
+
+ scatterwalk_unmap(p);
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+
+ if (buf_count) {
+ memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+ ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+ }
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+ u64 dg[], u8 tag[], int cryptlen)
+{
+ u8 mac[AES_BLOCK_SIZE];
+ u128 lengths;
+
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64(cryptlen * 8);
+
+ ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+ put_unaligned_be64(dg[1], mac);
+ put_unaligned_be64(dg[0], mac + 8);
+
+ crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct skcipher_walk walk;
+ u8 iv[AES_BLOCK_SIZE];
+ u8 ks[AES_BLOCK_SIZE];
+ u8 tag[AES_BLOCK_SIZE];
+ u64 dg[2] = {};
+ int err;
+
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, dg);
+
+ memcpy(iv, req->iv, GCM_IV_SIZE);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+ if (likely(may_use_simd())) {
+ kernel_neon_begin();
+
+ pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+ num_rounds(&ctx->aes_key));
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ pmull_gcm_encrypt_block(ks, iv, NULL,
+ num_rounds(&ctx->aes_key));
+ put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+ err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+ walk.src.virt.addr, &ctx->ghash_key,
+ iv, num_rounds(&ctx->aes_key), ks);
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+ } else {
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+ num_rounds(&ctx->aes_key));
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+ err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ u8 *dst = walk.dst.virt.addr;
+ u8 *src = walk.src.virt.addr;
+
+ do {
+ __aes_arm64_encrypt(ctx->aes_key.key_enc,
+ ks, iv,
+ num_rounds(&ctx->aes_key));
+ crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+ crypto_inc(iv, AES_BLOCK_SIZE);
+
+ dst += AES_BLOCK_SIZE;
+ src += AES_BLOCK_SIZE;
+ } while (--blocks > 0);
+
+ ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+ walk.dst.virt.addr, &ctx->ghash_key,
+ NULL);
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (walk.nbytes)
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+ num_rounds(&ctx->aes_key));
+ }
+
+ /* handle the tail */
+ if (walk.nbytes) {
+ u8 buf[GHASH_BLOCK_SIZE];
+
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+ walk.nbytes);
+
+ memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+ ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+ err = skcipher_walk_done(&walk, 0);
+ }
+
+ if (err)
+ return err;
+
+ gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct skcipher_walk walk;
+ u8 iv[AES_BLOCK_SIZE];
+ u8 tag[AES_BLOCK_SIZE];
+ u8 buf[GHASH_BLOCK_SIZE];
+ u64 dg[2] = {};
+ int err;
+
+ if (req->assoclen)
+ gcm_calculate_auth_mac(req, dg);
+
+ memcpy(iv, req->iv, GCM_IV_SIZE);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+ if (likely(may_use_simd())) {
+ kernel_neon_begin();
+
+ pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+ num_rounds(&ctx->aes_key));
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+ err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+ pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+ walk.src.virt.addr, &ctx->ghash_key,
+ iv, num_rounds(&ctx->aes_key));
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (walk.nbytes)
+ pmull_gcm_encrypt_block(iv, iv, NULL,
+ num_rounds(&ctx->aes_key));
+
+ kernel_neon_end();
+ } else {
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+ num_rounds(&ctx->aes_key));
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+ err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ u8 *dst = walk.dst.virt.addr;
+ u8 *src = walk.src.virt.addr;
+
+ ghash_do_update(blocks, dg, walk.src.virt.addr,
+ &ctx->ghash_key, NULL);
+
+ do {
+ __aes_arm64_encrypt(ctx->aes_key.key_enc,
+ buf, iv,
+ num_rounds(&ctx->aes_key));
+ crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+ crypto_inc(iv, AES_BLOCK_SIZE);
+
+ dst += AES_BLOCK_SIZE;
+ src += AES_BLOCK_SIZE;
+ } while (--blocks > 0);
+
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (walk.nbytes)
+ __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+ num_rounds(&ctx->aes_key));
+ }
+
+ /* handle the tail */
+ if (walk.nbytes) {
+ memcpy(buf, walk.src.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+ ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+ walk.nbytes);
+
+ err = skcipher_walk_done(&walk, 0);
+ }
+
+ if (err)
+ return err;
+
+ gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(buf, req->src,
+ req->assoclen + req->cryptlen - authsize,
+ authsize, 0);
+
+ if (crypto_memneq(tag, buf, authsize))
+ return -EBADMSG;
+ return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+ .ivsize = GCM_IV_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = gcm_setkey,
+ .setauthsize = gcm_setauthsize,
+ .encrypt = gcm_encrypt,
+ .decrypt = gcm_decrypt,
+
+ .base.cra_name = "gcm(aes)",
+ .base.cra_driver_name = "gcm-aes-ce",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
+ .base.cra_module = THIS_MODULE,
};
static int __init ghash_ce_mod_init(void)
{
- return crypto_register_shash(&ghash_alg);
+ int ret;
+
+ if (!(elf_hwcap & HWCAP_ASIMD))
+ return -ENODEV;
+
+ if (elf_hwcap & HWCAP_PMULL)
+ pmull_ghash_update = pmull_ghash_update_p64;
+
+ else
+ pmull_ghash_update = pmull_ghash_update_p8;
+
+ ret = crypto_register_shash(&ghash_alg);
+ if (ret)
+ return ret;
+
+ if (elf_hwcap & HWCAP_PMULL) {
+ ret = crypto_register_aead(&gcm_aes_alg);
+ if (ret)
+ crypto_unregister_shash(&ghash_alg);
+ }
+ return ret;
}
static void __exit ghash_ce_mod_exit(void)
{
crypto_unregister_shash(&ghash_alg);
+ crypto_unregister_aead(&gcm_aes_alg);
}
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+ { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index ea319c055f5d..efbeb3e0dcfb 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -1,7 +1,7 @@
/*
* sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
*
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+ if (!may_use_simd())
+ return crypto_sha1_update(desc, data, len);
+
sctx->finalize = 0;
- kernel_neon_begin_partial(16);
+ kernel_neon_begin();
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
+ if (!may_use_simd())
+ return crypto_sha1_finup(desc, data, len, out);
+
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
*/
sctx->finalize = finalize;
- kernel_neon_begin_partial(16);
+ kernel_neon_begin();
sha1_base_do_update(desc, data, len,
(sha1_block_fn *)sha1_ce_transform);
if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+ if (!may_use_simd())
+ return crypto_sha1_finup(desc, NULL, 0, out);
+
sctx->finalize = 0;
- kernel_neon_begin_partial(16);
+ kernel_neon_begin();
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
return sha1_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 0ed9486f75dd..fd1ff2b13dfa 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -1,7 +1,7 @@
/*
* sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
*
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
*/
#include <asm/neon.h>
+#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
finalize);
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+ if (!may_use_simd())
+ return sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order);
+
sctx->finalize = 0;
- kernel_neon_begin_partial(28);
+ kernel_neon_begin();
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
+ if (!may_use_simd()) {
+ if (len)
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order);
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha256_block_data_order);
+ return sha256_base_finish(desc, out);
+ }
+
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
*/
sctx->finalize = finalize;
- kernel_neon_begin_partial(28);
+ kernel_neon_begin();
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha2_ce_transform);
if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+ if (!may_use_simd()) {
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha256_block_data_order);
+ return sha256_base_finish(desc, out);
+ }
+
sctx->finalize = 0;
- kernel_neon_begin_partial(28);
+ kernel_neon_begin();
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
return sha256_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index a2226f841960..b064d925fe2a 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
asmlinkage void sha256_block_neon(u32 *digest, const void *data,
unsigned int num_blks);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f81c7b685fc6..2326e39d5892 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
-generic-y += simd.h
generic-y += sizes.h
generic-y += switch_to.h
generic-y += trace_clock.h
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8cef47fa2218..b7e3f74822da 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -116,6 +116,8 @@ static inline void gic_write_bpr1(u32 val)
#define gic_read_typer(c) readq_relaxed(c)
#define gic_write_irouter(v, c) writeq_relaxed(v, c)
+#define gic_read_lpir(c) readq_relaxed(c)
+#define gic_write_lpir(v, c) writeq_relaxed(v, c)
#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
@@ -133,5 +135,10 @@ static inline void gic_write_bpr1(u32 val)
#define gicr_write_pendbaser(v, c) writeq_relaxed(v, c)
#define gicr_read_pendbaser(c) readq_relaxed(c)
+#define gits_write_vpropbaser(v, c) writeq_relaxed(v, c)
+
+#define gits_write_vpendbaser(v, c) writeq_relaxed(v, c)
+#define gits_read_vpendbaser(c) readq_relaxed(c)
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 74d08e44a651..a652ce0a5cb2 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -65,13 +65,13 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
u64 _val; \
if (needs_unstable_timer_counter_workaround()) { \
const struct arch_timer_erratum_workaround *wa; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
wa = __this_cpu_read(timer_unstable_counter_workaround); \
if (wa && wa->read_##reg) \
_val = wa->read_##reg(); \
else \
_val = read_sysreg(reg); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
} else { \
_val = read_sysreg(reg); \
} \
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
new file mode 100644
index 000000000000..636e755bcdca
--- /dev/null
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_ASM_BUG_H
+/*
+ * Copyright (C) 2017 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#define __ASM_ASM_BUG_H
+
+#include <asm/brk-imm.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#define __BUGVERBOSE_LOCATION(file, line) \
+ .pushsection .rodata.str,"aMS",@progbits,1; \
+ 2: .string file; \
+ .popsection; \
+ \
+ .long 2b - 0b; \
+ .short line;
+#else
+#define _BUGVERBOSE_LOCATION(file, line)
+#endif
+
+#ifdef CONFIG_GENERIC_BUG
+
+#define __BUG_ENTRY(flags) \
+ .pushsection __bug_table,"aw"; \
+ .align 2; \
+ 0: .long 1f - 0b; \
+_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
+ .short flags; \
+ .popsection; \
+ 1:
+#else
+#define __BUG_ENTRY(flags)
+#endif
+
+#define ASM_BUG_FLAGS(flags) \
+ __BUG_ENTRY(flags) \
+ brk BUG_BRK_IMM
+
+#define ASM_BUG() ASM_BUG_FLAGS(0)
+
+#endif /* __ASM_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..d58a6253c6ab 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -230,12 +230,18 @@ lr .req x30 // link register
.endm
/*
- * @dst: Result of per_cpu(sym, smp_processor_id())
+ * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for
+ * non-module code
* @sym: The name of the per-cpu variable
* @tmp: scratch register
*/
.macro adr_this_cpu, dst, sym, tmp
+#ifndef MODULE
+ adrp \tmp, \sym
+ add \dst, \tmp, #:lo12:\sym
+#else
adr_l \dst, \sym
+#endif
mrs \tmp, tpidr_el1
add \dst, \dst, \tmp
.endm
@@ -353,6 +359,12 @@ alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
alternative_else
dc civac, \kaddr
alternative_endif
+ .elseif (\op == cvap)
+alternative_if ARM64_HAS_DCPOP
+ sys 3, c7, c12, 1, \kaddr // dc cvap
+alternative_else
+ dc cvac, \kaddr
+alternative_endif
.else
dc \op, \kaddr
.endif
@@ -403,6 +415,17 @@ alternative_endif
.size __pi_##x, . - x; \
ENDPROC(x)
+/*
+ * Annotate a function as being unsuitable for kprobes.
+ */
+#ifdef CONFIG_KPROBES
+#define NOKPROBE(x) \
+ .pushsection "_kprobe_blacklist", "aw"; \
+ .quad x; \
+ .popsection;
+#else
+#define NOKPROBE(x)
+#endif
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index a02a57186f56..d7dc43752705 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,41 +18,12 @@
#ifndef _ARCH_ARM64_ASM_BUG_H
#define _ARCH_ARM64_ASM_BUG_H
-#include <asm/brk-imm.h>
+#include <linux/stringify.h>
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
-#define __BUGVERBOSE_LOCATION(file, line) \
- ".pushsection .rodata.str,\"aMS\",@progbits,1\n" \
- "2: .string \"" file "\"\n\t" \
- ".popsection\n\t" \
- \
- ".long 2b - 0b\n\t" \
- ".short " #line "\n\t"
-#else
-#define _BUGVERBOSE_LOCATION(file, line)
-#endif
-
-#ifdef CONFIG_GENERIC_BUG
-
-#define __BUG_ENTRY(flags) \
- ".pushsection __bug_table,\"aw\"\n\t" \
- ".align 2\n\t" \
- "0: .long 1f - 0b\n\t" \
-_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
- ".short " #flags "\n\t" \
- ".popsection\n" \
- "1: "
-#else
-#define __BUG_ENTRY(flags) ""
-#endif
+#include <asm/asm-bug.h>
#define __BUG_FLAGS(flags) \
- asm volatile ( \
- __BUG_ENTRY(flags) \
- "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \
- );
-
+ asm volatile (__stringify(ASM_BUG_FLAGS(flags)));
#define BUG() do { \
__BUG_FLAGS(0); \
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index d74a284abdc2..76d1cc85d5b1 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -67,7 +67,9 @@
*/
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
+extern void __inval_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
+extern void __clean_dcache_area_pop(void *addr, size_t len);
extern void __clean_dcache_area_pou(void *addr, size_t len);
extern long __flush_cache_user_range(unsigned long start, unsigned long end);
extern void sync_icache_aliases(void *kaddr, unsigned long len);
@@ -150,6 +152,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
}
-int set_memory_valid(unsigned long addr, unsigned long size, int enable);
+int set_memory_valid(unsigned long addr, int numpages, int enable);
#endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8d2272c6822c..8da621627d7c 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -39,7 +39,8 @@
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
#define ARM64_WORKAROUND_858921 19
#define ARM64_WORKAROUND_CAVIUM_30115 20
+#define ARM64_HAS_DCPOP 21
-#define ARM64_NCAPS 21
+#define ARM64_NCAPS 22
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8f3043aba873..b93904b16fc2 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -3,7 +3,9 @@
#include <asm/boot.h>
#include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
#include <asm/io.h>
+#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/neon.h>
#include <asm/ptrace.h>
@@ -20,8 +22,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_setup() \
({ \
- kernel_neon_begin(); \
efi_virtmap_load(); \
+ __efi_fpsimd_begin(); \
})
#define arch_efi_call_virt(p, f, args...) \
@@ -33,8 +35,8 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
#define arch_efi_call_virt_teardown() \
({ \
+ __efi_fpsimd_end(); \
efi_virtmap_unload(); \
- kernel_neon_end(); \
})
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
@@ -48,6 +50,13 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
*/
#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */
+/*
+ * In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
+ * kernel need greater alignment than we require the segments to be padded to.
+ */
+#define EFI_KIMG_ALIGN \
+ (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
+
/* on arm64, the FDT may be located anywhere in system RAM */
static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
{
@@ -81,6 +90,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
#define alloc_screen_info(x...) &screen_info
#define free_screen_info(x...)
+/* redeclare as 'hidden' so the compiler will generate relative references */
+extern struct screen_info screen_info __attribute__((__visibility__("hidden")));
+
static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
{
}
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index acae781f7359..33be513ef24c 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -114,10 +114,10 @@
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
-#define ELF_ET_DYN_BASE 0x100000000UL
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
#ifndef __ASSEMBLY__
@@ -139,7 +139,6 @@ typedef struct user_fpsimd_state elf_fpregset_t;
#define SET_PERSONALITY(ex) \
({ \
- clear_bit(TIF_32BIT, &current->mm->context.flags); \
clear_thread_flag(TIF_32BIT); \
current->personality &= ~READ_IMPLIES_EXEC; \
})
@@ -195,7 +194,6 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
*/
#define COMPAT_SET_PERSONALITY(ex) \
({ \
- set_bit(TIF_32BIT, &current->mm->context.flags); \
set_thread_flag(TIF_32BIT); \
})
#define COMPAT_ARCH_DLINFO
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8cabd57b6348..66ed8b6b9976 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -77,16 +77,23 @@
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
-#define ESR_ELx_IL (UL(1) << 25)
+#define ESR_ELx_IL_SHIFT (25)
+#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
/* ISS field definitions shared by different classes */
-#define ESR_ELx_WNR (UL(1) << 6)
+#define ESR_ELx_WNR_SHIFT (6)
+#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
/* Shared ISS field definitions for Data/Instruction aborts */
-#define ESR_ELx_FnV (UL(1) << 10)
-#define ESR_ELx_EA (UL(1) << 9)
-#define ESR_ELx_S1PTW (UL(1) << 7)
+#define ESR_ELx_SET_SHIFT (11)
+#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
+#define ESR_ELx_FnV_SHIFT (10)
+#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
+#define ESR_ELx_EA_SHIFT (9)
+#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
+#define ESR_ELx_S1PTW_SHIFT (7)
+#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
#define ESR_ELx_FSC (0x3F)
@@ -97,15 +104,20 @@
#define ESR_ELx_FSC_PERM (0x0C)
/* ISS field definitions for Data Aborts */
-#define ESR_ELx_ISV (UL(1) << 24)
+#define ESR_ELx_ISV_SHIFT (24)
+#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
#define ESR_ELx_SAS_SHIFT (22)
#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
-#define ESR_ELx_SSE (UL(1) << 21)
+#define ESR_ELx_SSE_SHIFT (21)
+#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
#define ESR_ELx_SRT_SHIFT (16)
#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
-#define ESR_ELx_SF (UL(1) << 15)
-#define ESR_ELx_AR (UL(1) << 14)
-#define ESR_ELx_CM (UL(1) << 8)
+#define ESR_ELx_SF_SHIFT (15)
+#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
+#define ESR_ELx_AR_SHIFT (14)
+#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_CM_SHIFT (8)
+#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
@@ -157,9 +169,10 @@
/*
* User space cache operations have the following sysreg encoding
* in System instructions.
- * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
*/
#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5
@@ -209,6 +222,13 @@
#ifndef __ASSEMBLY__
#include <asm/types.h>
+static inline bool esr_is_data_abort(u32 esr)
+{
+ const u32 ec = ESR_ELx_EC(esr);
+
+ return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
+}
+
const char *esr_get_class_string(u32 esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 50f559f574fe..410c48163c6a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -41,16 +41,6 @@ struct fpsimd_state {
unsigned int cpu;
};
-/*
- * Struct for stacking the bottom 'n' FP/SIMD registers.
- */
-struct fpsimd_partial_state {
- u32 fpsr;
- u32 fpcr;
- u32 num_regs;
- __uint128_t vregs[32];
-};
-
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -77,9 +67,9 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
-extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
- u32 num_regs);
-extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+/* For use by EFI runtime services calls only */
+extern void __efi_fpsimd_begin(void);
+extern void __efi_fpsimd_end(void);
#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2daf1293028..0f5fdd388b0d 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -75,59 +75,3 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
fpsimd_restore_fpcr x\tmpnr, \state
.endm
-
-.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
- mrs x\tmpnr1, fpsr
- str w\numnr, [\state, #8]
- mrs x\tmpnr2, fpcr
- stp w\tmpnr1, w\tmpnr2, [\state]
- adr x\tmpnr1, 0f
- add \state, \state, x\numnr, lsl #4
- sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
- br x\tmpnr1
- stp q30, q31, [\state, #-16 * 30 - 16]
- stp q28, q29, [\state, #-16 * 28 - 16]
- stp q26, q27, [\state, #-16 * 26 - 16]
- stp q24, q25, [\state, #-16 * 24 - 16]
- stp q22, q23, [\state, #-16 * 22 - 16]
- stp q20, q21, [\state, #-16 * 20 - 16]
- stp q18, q19, [\state, #-16 * 18 - 16]
- stp q16, q17, [\state, #-16 * 16 - 16]
- stp q14, q15, [\state, #-16 * 14 - 16]
- stp q12, q13, [\state, #-16 * 12 - 16]
- stp q10, q11, [\state, #-16 * 10 - 16]
- stp q8, q9, [\state, #-16 * 8 - 16]
- stp q6, q7, [\state, #-16 * 6 - 16]
- stp q4, q5, [\state, #-16 * 4 - 16]
- stp q2, q3, [\state, #-16 * 2 - 16]
- stp q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
-
-.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
- ldp w\tmpnr1, w\tmpnr2, [\state]
- msr fpsr, x\tmpnr1
- fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1
- adr x\tmpnr1, 0f
- ldr w\tmpnr2, [\state, #8]
- add \state, \state, x\tmpnr2, lsl #4
- sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
- br x\tmpnr1
- ldp q30, q31, [\state, #-16 * 30 - 16]
- ldp q28, q29, [\state, #-16 * 28 - 16]
- ldp q26, q27, [\state, #-16 * 26 - 16]
- ldp q24, q25, [\state, #-16 * 24 - 16]
- ldp q22, q23, [\state, #-16 * 22 - 16]
- ldp q20, q21, [\state, #-16 * 20 - 16]
- ldp q18, q19, [\state, #-16 * 18 - 16]
- ldp q16, q17, [\state, #-16 * 16 - 16]
- ldp q14, q15, [\state, #-16 * 14 - 16]
- ldp q12, q13, [\state, #-16 * 12 - 16]
- ldp q10, q11, [\state, #-16 * 10 - 16]
- ldp q8, q9, [\state, #-16 * 8 - 16]
- ldp q6, q7, [\state, #-16 * 6 - 16]
- ldp q4, q5, [\state, #-16 * 4 - 16]
- ldp q2, q3, [\state, #-16 * 2 - 16]
- ldp q0, q1, [\state, #-16 * 0 - 16]
-0:
-.endm
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index f32b42e8725d..5bb2fd4674e7 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,20 +48,10 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (int)(encoded_op << 8) >> 20;
- int cmparg = (int)(encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1U << (oparg & 0x1f);
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
switch (op) {
@@ -91,17 +81,9 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 793bd73b0d07..1dca41bea16a 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -18,7 +18,6 @@
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
-#include <asm-generic/hugetlb.h>
#include <asm/page.h>
static inline pte_t huge_ptep_get(pte_t *ptep)
@@ -82,6 +81,14 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep);
+extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
+#define huge_pte_clear huge_pte_clear
+extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned long sz);
+#define set_huge_swap_pte_at set_huge_swap_pte_at
+
+#include <asm-generic/hugetlb.h>
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static inline bool gigantic_page_supported(void) { return true; }
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b77197d941fc..5e6f77239064 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,45 +1,12 @@
#ifndef __ASM_IRQ_H
#define __ASM_IRQ_H
-#define IRQ_STACK_SIZE THREAD_SIZE
-#define IRQ_STACK_START_SP THREAD_START_SP
-
#ifndef __ASSEMBLER__
-#include <linux/percpu.h>
-
#include <asm-generic/irq.h>
-#include <asm/thread_info.h>
struct pt_regs;
-DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-
-/*
- * The highest address on the stack, and the first to be used. Used to
- * find the dummy-stack frame put down by el?_irq() in entry.S, which
- * is structured as follows:
- *
- * ------------
- * | | <- irq_stack_ptr
- * top ------------
- * | x19 | <- irq_stack_ptr - 0x08
- * ------------
- * | x29 | <- irq_stack_ptr - 0x10
- * ------------
- *
- * where x19 holds a copy of the task stack pointer where the struct pt_regs
- * from kernel_entry can be found.
- *
- */
-#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
-
-/*
- * The offset from irq_stack_ptr where entry.S will store the original
- * stack pointer. Used by unwind_frame() and dump_backtrace().
- */
-#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
-
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
static inline int nr_legacy_irqs(void)
@@ -47,14 +14,5 @@ static inline int nr_legacy_irqs(void)
return 0;
}
-static inline bool on_irq_stack(unsigned long sp, int cpu)
-{
- /* variable names the same as kernel/stacktrace.c */
- unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
- unsigned long high = low + IRQ_STACK_START_SP;
-
- return (low <= sp && sp <= high);
-}
-
#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fe39e6841326..e5df3fce0008 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
-{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA);
-}
-
static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
@@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
}
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+ switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+ case FSC_SEA:
+ case FSC_SEA_TTW0:
+ case FSC_SEA_TTW1:
+ case FSC_SEA_TTW2:
+ case FSC_SEA_TTW3:
+ case FSC_SECC:
+ case FSC_SECC_TTW0:
+ case FSC_SECC_TTW1:
+ case FSC_SECC_TTW2:
+ case FSC_SECC_TTW3:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d68630007b14..e923b58606e2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -326,12 +326,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-/* We do not have shadow page tables, hence the empty hooks */
-static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
-{
-}
-
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index a89cc22abadc..672c8684d5c2 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -175,18 +175,15 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
- pteval_t pteval;
- unsigned long tmp;
-
- asm volatile("// kvm_set_s2pte_readonly\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " and %0, %0, %3 // clear PTE_S2_RDWR\n"
- " orr %0, %0, %4 // set PTE_S2_RDONLY\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
- : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
+ pteval_t old_pteval, pteval;
+
+ pteval = READ_ONCE(pte_val(*pte));
+ do {
+ old_pteval = pteval;
+ pteval &= ~PTE_S2_RDWR;
+ pteval |= PTE_S2_RDONLY;
+ pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+ } while (pteval != old_pteval);
}
static inline bool kvm_s2pte_readonly(pte_t *pte)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index ef39dcb9ca6a..3585a5e26151 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -25,6 +25,7 @@
#include <linux/const.h>
#include <linux/types.h>
#include <asm/bug.h>
+#include <asm/page-def.h>
#include <asm/sizes.h>
/*
@@ -103,6 +104,58 @@
#define KASAN_SHADOW_SIZE (0)
#endif
+#define MIN_THREAD_SHIFT 14
+
+/*
+ * VMAP'd stacks are allocated at page granularity, so we must ensure that such
+ * stacks are a multiple of page size.
+ */
+#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#define THREAD_SHIFT PAGE_SHIFT
+#else
+#define THREAD_SHIFT MIN_THREAD_SHIFT
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
+#endif
+
+#define THREAD_SIZE (UL(1) << THREAD_SHIFT)
+
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN (2 * THREAD_SIZE)
+#else
+#define THREAD_ALIGN THREAD_SIZE
+#endif
+
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+#define OVERFLOW_STACK_SIZE SZ_4K
+
+/*
+ * Alignment of kernel segments (e.g. .text, .data).
+ */
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
+/*
+ * 4 KB granule: 1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule: 32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN SZ_2M
+#else
+/*
+ * 4 KB granule: 16 level 3 entries, with contiguous bit
+ * 16 KB granule: 4 level 3 entries, without contiguous bit
+ * 64 KB granule: 1 level 3 entry
+ */
+#define SEGMENT_ALIGN SZ_64K
+#endif
+
/*
* Memory types available.
*/
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 5468c834b072..0d34bf0a89c7 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -16,6 +16,8 @@
#ifndef __ASM_MMU_H
#define __ASM_MMU_H
+#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
+
typedef struct {
atomic64_t id;
void *vdso;
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index ad4cdc966c0f..f922eaf780f9 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,12 +8,22 @@
* published by the Free Software Foundation.
*/
+#ifndef __ASM_NEON_H
+#define __ASM_NEON_H
+
#include <linux/types.h>
#include <asm/fpsimd.h>
#define cpu_has_neon() system_supports_fpsimd()
-#define kernel_neon_begin() kernel_neon_begin_partial(32)
-
-void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_begin(void);
void kernel_neon_end(void);
+
+/*
+ * Temporary macro to allow the crypto code to compile. Note that the
+ * semantics of kernel_neon_begin_partial() are now different from the
+ * original as it does not allow being called in an interrupt context.
+ */
+#define kernel_neon_begin_partial(num_regs) kernel_neon_begin()
+
+#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index bf466d1876e3..ef7b23863a7c 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -7,9 +7,6 @@
#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
-/* currently, arm64 implements flat NUMA topology */
-#define parent_node(node) (node)
-
int __node_distance(int from, int to);
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
new file mode 100644
index 000000000000..01591a29dc2e
--- /dev/null
+++ b/arch/arm64/include/asm/page-def.h
@@ -0,0 +1,34 @@
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_PAGE_DEF_H
+#define __ASM_PAGE_DEF_H
+
+#include <linux/const.h>
+
+/* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together */
+#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
+#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
+#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK (~(CONT_SIZE-1))
+
+#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8472c6def5ef..60d02c81a3a2 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,17 +19,7 @@
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
-#include <linux/const.h>
-
-/* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together */
-#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
-#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
-
-#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK (~(CONT_SIZE-1))
+#include <asm/page-def.h>
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..0a5635fb0ef9 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -63,23 +63,21 @@
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
+#define __P010 PAGE_READONLY
+#define __P011 PAGE_READONLY
#define __P100 PAGE_EXECONLY
#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
+#define __P110 PAGE_READONLY_EXEC
+#define __P111 PAGE_READONLY_EXEC
#define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6eae342ced6b..bc4e92337d16 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -39,6 +39,7 @@
#ifndef __ASSEMBLY__
+#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <linux/mmdebug.h>
@@ -84,11 +85,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
-#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
-#else
-#define pte_hw_dirty(pte) (0)
-#endif
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
@@ -124,12 +121,16 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
static inline pte_t pte_wrprotect(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
@@ -168,11 +169,6 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
-static inline pte_t pte_clear_rdonly(pte_t pte)
-{
- return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
-}
-
static inline pte_t pte_mkpresent(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
@@ -220,22 +216,15 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_present(pte)) {
- if (pte_sw_dirty(pte) && pte_write(pte))
- pte_val(pte) &= ~PTE_RDONLY;
- else
- pte_val(pte) |= PTE_RDONLY;
- if (pte_user_exec(pte) && !pte_special(pte))
- __sync_icache_dcache(pte, addr);
- }
+ if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte, addr);
/*
* If the existing pte is valid, check for potential race with
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
- pte_valid(*ptep) && pte_valid(pte)) {
+ if (pte_valid(*ptep) && pte_valid(pte)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
@@ -571,7 +560,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
-#ifdef CONFIG_ARM64_HW_AFDBM
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -593,20 +581,17 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int __ptep_test_and_clear_young(pte_t *ptep)
{
- pteval_t pteval;
- unsigned int tmp, res;
+ pte_t old_pte, pte;
- asm volatile("// __ptep_test_and_clear_young\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
- " and %0, %0, %4 // clear PTE_AF\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
- : "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
+ pte = READ_ONCE(*ptep);
+ do {
+ old_pte = pte;
+ pte = pte_mkold(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
- return res;
+ return pte_young(pte);
}
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
@@ -630,17 +615,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- pteval_t old_pteval;
- unsigned int tmp;
-
- asm volatile("// ptep_get_and_clear\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " stxr %w1, xzr, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
-
- return __pte(old_pteval);
+ return __pte(xchg_relaxed(&pte_val(*ptep), 0));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -653,27 +628,32 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ * ptep_set_wrprotect - mark read-only while preserving the hardware update of
+ * the Access Flag.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
- pteval_t pteval;
- unsigned long tmp;
+ pte_t old_pte, pte;
- asm volatile("// ptep_set_wrprotect\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
- " csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
- " orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
- " and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
- : "cc");
+ /*
+ * ptep_set_wrprotect() is only called on CoW mappings which are
+ * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
+ * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
+ * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
+ * protection_map[]. There is no race with the hardware update of the
+ * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
+ * is set.
+ */
+ VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
+ "%s: potential race with hardware DBM", __func__);
+ pte = READ_ONCE(*ptep);
+ do {
+ old_pte = pte;
+ pte = pte_wrprotect(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -684,7 +664,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#endif
-#endif /* CONFIG_ARM64_HW_AFDBM */
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..29adab8138c3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -112,7 +112,7 @@ void tls_preserve_current_state(void);
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
memset(regs, 0, sizeof(*regs));
- regs->syscallno = ~0UL;
+ forget_syscall(regs);
regs->pc = pc;
}
@@ -159,7 +159,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
#define task_pt_regs(p) \
- ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+ ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk))
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 11403fdd0a50..6069d66e0bc2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -72,8 +72,19 @@
#define COMPAT_PT_TEXT_ADDR 0x10000
#define COMPAT_PT_DATA_ADDR 0x10004
#define COMPAT_PT_TEXT_END_ADDR 0x10008
+
+/*
+ * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
+ * a syscall -- i.e., its most recent entry into the kernel from
+ * userspace was not via SVC, or otherwise a tracer cancelled the syscall.
+ *
+ * This must have the value -1, for ABI compatibility with ptrace etc.
+ */
+#define NO_SYSCALL (-1)
+
#ifndef __ASSEMBLY__
#include <linux/bug.h>
+#include <linux/types.h>
/* sizeof(struct user) for AArch32 */
#define COMPAT_USER_SZ 296
@@ -116,11 +127,29 @@ struct pt_regs {
};
};
u64 orig_x0;
- u64 syscallno;
+#ifdef __AARCH64EB__
+ u32 unused2;
+ s32 syscallno;
+#else
+ s32 syscallno;
+ u32 unused2;
+#endif
+
u64 orig_addr_limit;
u64 unused; // maintain 16 byte alignment
+ u64 stackframe[2];
};
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+ return regs->syscallno != NO_SYSCALL;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+ regs->syscallno = NO_SYSCALL;
+}
+
#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
#define arch_has_single_step() (1)
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index eeaa97559bab..81abea0b7650 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -22,8 +22,6 @@
#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500
-extern const compat_ulong_t aarch32_sigret_code[6];
-
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
new file mode 100644
index 000000000000..fa8b3fe932e6
--- /dev/null
+++ b/arch/arm64/include/asm/simd.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+DECLARE_PER_CPU(bool, kernel_neon_busy);
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ * instructions or access the SIMD register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+ /*
+ * The raw_cpu_read() is racy if called with preemption enabled.
+ * This is not a bug: kernel_neon_busy is only set when
+ * preemption is disabled, so we cannot migrate to another CPU
+ * while it is set, nor can we migrate to a CPU where it is set.
+ * So, if we find it clear on some CPU then we're guaranteed to
+ * find it clear on any CPU we could migrate to.
+ *
+ * If we are in between kernel_neon_begin()...kernel_neon_end(),
+ * the flag will be set, but preemption is also disabled, so we
+ * can't migrate to another CPU and spuriously see it become
+ * false.
+ */
+ return !in_irq() && !irqs_disabled() && !in_nmi() &&
+ !raw_cpu_read(kernel_neon_busy);
+}
+
+#else /* ! CONFIG_KERNEL_MODE_NEON */
+
+static __must_check inline bool may_use_simd(void) {
+ return false;
+}
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
+
+#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 55f08c5acfad..f82b447bd34f 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -148,7 +148,7 @@ static inline void cpu_panic_kernel(void)
*/
bool cpus_are_stuck_in_kernel(void);
-extern void smp_send_crash_stop(void);
+extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
#endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..95ad7102b63c 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,58 +26,6 @@
* The memory barriers are implicit with the load-acquire and store-release
* instructions.
*/
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- unsigned int tmp;
- arch_spinlock_t lockval;
- u32 owner;
-
- /*
- * Ensure prior spin_lock operations to other locks have completed
- * on this CPU before we test whether "lock" is locked.
- */
- smp_mb();
- owner = READ_ONCE(lock->owner) << 16;
-
- asm volatile(
-" sevl\n"
-"1: wfe\n"
-"2: ldaxr %w0, %2\n"
- /* Is the lock free? */
-" eor %w1, %w0, %w0, ror #16\n"
-" cbz %w1, 3f\n"
- /* Lock taken -- has there been a subsequent unlock->lock transition? */
-" eor %w1, %w3, %w0, lsl #16\n"
-" cbz %w1, 1b\n"
- /*
- * The owner has been updated, so there was an unlock->lock
- * transition that we missed. That means we can rely on the
- * store-release of the unlock operation paired with the
- * load-acquire of the lock operation to publish any of our
- * previous stores to the new lock owner and therefore don't
- * need to bother with the writeback below.
- */
-" b 4f\n"
-"3:\n"
- /*
- * Serialise against any concurrent lockers by writing back the
- * unlocked lock value
- */
- ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
-" stxr %w1, %w0, %2\n"
- __nops(2),
- /* LSE atomics */
-" mov %w1, %w0\n"
-" cas %w0, %w0, %2\n"
-" eor %w1, %w1, %w0\n")
- /* Somebody else wrote to the lock, GOTO 10 and reload the value */
-" cbnz %w1, 2b\n"
-"4:"
- : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
- : "r" (owner)
- : "memory");
-}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
@@ -176,7 +124,11 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- smp_mb(); /* See arch_spin_unlock_wait */
+ /*
+ * Ensure prior spin_lock operations to other locks have completed
+ * on this CPU before we test whether "lock" is locked.
+ */
+ smp_mb(); /* ^^^ */
return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
@@ -358,14 +310,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
-/*
- * Accesses appearing in program order before a spin_lock() operation
- * can be reordered with accesses inside the critical section, by virtue
- * of arch_spin_lock being constructed using acquire semantics.
- *
- * In cases where this is problematic (e.g. try_to_wake_up), an
- * smp_mb__before_spinlock() can restore the required ordering.
- */
-#define smp_mb__before_spinlock() smp_mb()
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 5b6eafccc5d8..6ad30776e984 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,11 +16,15 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
-struct task_struct;
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/memory.h>
+#include <asm/ptrace.h>
struct stackframe {
unsigned long fp;
- unsigned long sp;
unsigned long pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
unsigned int graph;
@@ -32,4 +36,57 @@ extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
+DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
+
+static inline bool on_irq_stack(unsigned long sp)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
+ unsigned long high = low + IRQ_STACK_SIZE;
+
+ if (!low)
+ return false;
+
+ return (low <= sp && sp < high);
+}
+
+static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp)
+{
+ unsigned long low = (unsigned long)task_stack_page(tsk);
+ unsigned long high = low + THREAD_SIZE;
+
+ return (low <= sp && sp < high);
+}
+
+#ifdef CONFIG_VMAP_STACK
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+
+static inline bool on_overflow_stack(unsigned long sp)
+{
+ unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return (low <= sp && sp < high);
+}
+#else
+static inline bool on_overflow_stack(unsigned long sp) { return false; }
+#endif
+
+/*
+ * We can only safely access per-cpu stacks from current in a non-preemptible
+ * context.
+ */
+static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp)
+{
+ if (on_task_stack(tsk, sp))
+ return true;
+ if (tsk != current || preemptible())
+ return false;
+ if (on_irq_stack(sp))
+ return true;
+ if (on_overflow_stack(sp))
+ return true;
+
+ return false;
+}
+
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index d0aa42907569..dd95d33a5bd5 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -52,6 +52,10 @@ extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t);
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 248339e4aaf5..f707fed5886f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -329,6 +329,7 @@
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
+#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
#define ID_AA64PFR0_GIC_SHIFT 24
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 46c3b93cf865..ddded6497a8a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,19 +23,11 @@
#include <linux/compiler.h>
-#ifdef CONFIG_ARM64_4K_PAGES
-#define THREAD_SIZE_ORDER 2
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define THREAD_SIZE_ORDER 0
-#endif
-
-#define THREAD_SIZE 16384
-#define THREAD_START_SP (THREAD_SIZE - 16)
-
#ifndef __ASSEMBLY__
struct task_struct;
+#include <asm/memory.h>
#include <asm/stack_pointer.h>
#include <asm/types.h>
@@ -68,6 +60,9 @@ struct thread_info {
#define thread_saved_fp(tsk) \
((unsigned long)(tsk->thread.cpu_context.fp))
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+
#endif
/*
@@ -86,6 +81,7 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
+#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -107,11 +103,12 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
- _TIF_UPROBE)
+ _TIF_UPROBE | _TIF_FSCHECK)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 02e9035b0685..d131501c6222 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook);
void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static inline int __in_irqentry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__irqentry_text_start &&
ptr < (unsigned long)&__irqentry_text_end;
}
-#else
-static inline int __in_irqentry_text(unsigned long ptr)
-{
- return 0;
-}
-#endif
static inline int in_exception_text(unsigned long ptr)
{
@@ -60,4 +53,9 @@ static inline int in_exception_text(unsigned long ptr)
return in ? : __in_irqentry_text(ptr);
}
+static inline int in_entry_text(unsigned long ptr)
+{
+ return ptr >= (unsigned long)&__entry_text_start &&
+ ptr < (unsigned long)&__entry_text_end;
+}
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fab46a0ea223..fc0f9eb66039 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -45,6 +45,9 @@ static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
+ /* On user-mode return, check fs is correct */
+ set_thread_flag(TIF_FSCHECK);
+
/*
* Enable/disable UAO so that copy_to_user() etc can access
* kernel memory with the unprivileged instructions.
@@ -347,4 +350,16 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+struct page;
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
+extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
+
+static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
+{
+ kasan_check_write(dst, size);
+ return __copy_user_flushcache(dst, src, size);
+}
+#endif
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 4e187ce2a811..4b9344cba83a 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -35,5 +35,6 @@
#define HWCAP_JSCVT (1 << 13)
#define HWCAP_FCMA (1 << 14)
#define HWCAP_LRCPC (1 << 15)
+#define HWCAP_DCPOP (1 << 16)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e25c11e727fe..b3162715ed78 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -95,7 +95,7 @@ static int __init dt_scan_depth1_nodes(unsigned long node,
* __acpi_map_table() will be called before page_init(), so early_ioremap()
* or early_memremap() should be called here to for ACPI table mapping.
*/
-char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
{
if (!size)
return NULL;
@@ -103,7 +103,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
return early_memremap(phys, size);
}
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
{
if (!map || !size)
return;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..71bf088f1e4b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,6 +75,7 @@ int main(void)
DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
+ DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..cd52d365d1f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -888,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.min_field_value = 0,
.matches = has_no_fpsimd,
},
+#ifdef CONFIG_ARM64_PMEM
+ {
+ .desc = "Data cache clean to Point of Persistence",
+ .capability = ARM64_HAS_DCPOP,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .min_field_value = 1,
+ },
+#endif
{},
};
@@ -916,6 +928,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index f495ee5049fd..311885962830 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -68,6 +68,7 @@ static const char *const hwcap_str[] = {
"jscvt",
"fcma",
"lrcpc",
+ "dcpop",
NULL
};
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f146b1..6a27cd6dbfa6 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-/*
- * Save the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_save_partial_state)
- fpsimd_save_partial x0, 1, 8, 9
- ret
-ENDPROC(fpsimd_save_partial_state)
-
-/*
- * Load the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_load_partial_state)
- fpsimd_restore_partial x0, 8, 9
- ret
-ENDPROC(fpsimd_load_partial_state)
-
-#endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..e1c59d4008a8 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -69,8 +69,55 @@
#define BAD_FIQ 2
#define BAD_ERROR 3
- .macro kernel_entry, el, regsize = 64
+ .macro kernel_ventry label
+ .align 7
sub sp, sp, #S_FRAME_SIZE
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * Test whether the SP has overflowed, without corrupting a GPR.
+ * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
+ */
+ add sp, sp, x0 // sp' = sp + x0
+ sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
+ tbnz x0, #THREAD_SHIFT, 0f
+ sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
+ sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
+ b \label
+
+0:
+ /*
+ * Either we've just detected an overflow, or we've taken an exception
+ * while on the overflow stack. Either way, we won't return to
+ * userspace, and can clobber EL0 registers to free up GPRs.
+ */
+
+ /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+ msr tpidr_el0, x0
+
+ /* Recover the original x0 value and stash it in tpidrro_el0 */
+ sub x0, sp, x0
+ msr tpidrro_el0, x0
+
+ /* Switch to the overflow stack */
+ adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
+
+ /*
+ * Check whether we were already on the overflow stack. This may happen
+ * after panic() re-enables interrupts.
+ */
+ mrs x0, tpidr_el0 // sp of interrupted context
+ sub x0, sp, x0 // delta with top of overflow stack
+ tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range?
+ b.ne __bad_stack // no? -> bad stack pointer
+
+ /* We were already on the overflow stack. Restore sp/x0 and carry on. */
+ sub sp, sp, x0
+ mrs x0, tpidrro_el0
+#endif
+ b \label
+ .endm
+
+ .macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
@@ -111,6 +158,18 @@
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
+ /*
+ * In order to be able to dump the contents of struct pt_regs at the
+ * time the exception was taken (in case we attempt to walk the call
+ * stack later), chain it together with the stack frames.
+ */
+ .if \el == 0
+ stp xzr, xzr, [sp, #S_STACKFRAME]
+ .else
+ stp x29, x22, [sp, #S_STACKFRAME]
+ .endif
+ add x29, sp, #S_STACKFRAME
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Set the TTBR0 PAN bit in SPSR. When the exception is taken from
@@ -138,12 +197,10 @@ alternative_else_nop_endif
stp x22, x23, [sp, #S_PC]
- /*
- * Set syscallno to -1 by default (overridden later if real syscall).
- */
+ /* Not in a syscall by default (el0_svc overwrites for real syscall) */
.if \el == 0
- mvn x21, xzr
- str x21, [sp, #S_SYSCALLNO]
+ mov w21, #NO_SYSCALL
+ str w21, [sp, #S_SYSCALLNO]
.endif
/*
@@ -259,20 +316,12 @@ alternative_else_nop_endif
and x25, x25, #~(THREAD_SIZE - 1)
cbnz x25, 9998f
- adr_this_cpu x25, irq_stack, x26
- mov x26, #IRQ_STACK_START_SP
+ ldr_this_cpu x25, irq_stack_ptr, x26
+ mov x26, #IRQ_STACK_SIZE
add x26, x25, x26
/* switch to the irq stack */
mov sp, x26
-
- /*
- * Add a dummy stack frame, this non-standard format is fixed up
- * by unwind_frame()
- */
- stp x29, x19, [sp, #-16]!
- mov x29, sp
-
9998:
.endm
@@ -290,8 +339,9 @@ alternative_else_nop_endif
*
* x7 is reserved for the system call number in 32-bit mode.
*/
-sc_nr .req x25 // number of system calls
-scno .req x26 // syscall number
+wsc_nr .req w25 // number of system calls
+wscno .req w26 // syscall number
+xscno .req x26 // syscall number (zero-extended)
stbl .req x27 // syscall table pointer
tsk .req x28 // current thread_info
@@ -315,34 +365,62 @@ tsk .req x28 // current thread_info
.align 11
ENTRY(vectors)
- ventry el1_sync_invalid // Synchronous EL1t
- ventry el1_irq_invalid // IRQ EL1t
- ventry el1_fiq_invalid // FIQ EL1t
- ventry el1_error_invalid // Error EL1t
+ kernel_ventry el1_sync_invalid // Synchronous EL1t
+ kernel_ventry el1_irq_invalid // IRQ EL1t
+ kernel_ventry el1_fiq_invalid // FIQ EL1t
+ kernel_ventry el1_error_invalid // Error EL1t
- ventry el1_sync // Synchronous EL1h
- ventry el1_irq // IRQ EL1h
- ventry el1_fiq_invalid // FIQ EL1h
- ventry el1_error_invalid // Error EL1h
+ kernel_ventry el1_sync // Synchronous EL1h
+ kernel_ventry el1_irq // IRQ EL1h
+ kernel_ventry el1_fiq_invalid // FIQ EL1h
+ kernel_ventry el1_error_invalid // Error EL1h
- ventry el0_sync // Synchronous 64-bit EL0
- ventry el0_irq // IRQ 64-bit EL0
- ventry el0_fiq_invalid // FIQ 64-bit EL0
- ventry el0_error_invalid // Error 64-bit EL0
+ kernel_ventry el0_sync // Synchronous 64-bit EL0
+ kernel_ventry el0_irq // IRQ 64-bit EL0
+ kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0
+ kernel_ventry el0_error_invalid // Error 64-bit EL0
#ifdef CONFIG_COMPAT
- ventry el0_sync_compat // Synchronous 32-bit EL0
- ventry el0_irq_compat // IRQ 32-bit EL0
- ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
- ventry el0_error_invalid_compat // Error 32-bit EL0
+ kernel_ventry el0_sync_compat // Synchronous 32-bit EL0
+ kernel_ventry el0_irq_compat // IRQ 32-bit EL0
+ kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
+ kernel_ventry el0_error_invalid_compat // Error 32-bit EL0
#else
- ventry el0_sync_invalid // Synchronous 32-bit EL0
- ventry el0_irq_invalid // IRQ 32-bit EL0
- ventry el0_fiq_invalid // FIQ 32-bit EL0
- ventry el0_error_invalid // Error 32-bit EL0
+ kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0
+ kernel_ventry el0_irq_invalid // IRQ 32-bit EL0
+ kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0
+ kernel_ventry el0_error_invalid // Error 32-bit EL0
#endif
END(vectors)
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * We detected an overflow in kernel_ventry, which switched to the
+ * overflow stack. Stash the exception regs, and head to our overflow
+ * handler.
+ */
+__bad_stack:
+ /* Restore the original x0 value */
+ mrs x0, tpidrro_el0
+
+ /*
+ * Store the original GPRs to the new stack. The orginal SP (minus
+ * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+ */
+ sub sp, sp, #S_FRAME_SIZE
+ kernel_entry 1
+ mrs x0, tpidr_el0
+ add x0, x0, #S_FRAME_SIZE
+ str x0, [sp, #S_SP]
+
+ /* Stash the regs for handle_bad_stack */
+ mov x0, sp
+
+ /* Time to die */
+ bl handle_bad_stack
+ ASM_BUG()
+#endif /* CONFIG_VMAP_STACK */
+
/*
* Invalid mode handlers
*/
@@ -351,7 +429,8 @@ END(vectors)
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1
- b bad_mode
+ bl bad_mode
+ ASM_BUG()
.endm
el0_sync_invalid:
@@ -448,14 +527,16 @@ el1_sp_pc:
mrs x0, far_el1
enable_dbg
mov x2, sp
- b do_sp_pc_abort
+ bl do_sp_pc_abort
+ ASM_BUG()
el1_undef:
/*
* Undefined instruction
*/
enable_dbg
mov x0, sp
- b do_undefinstr
+ bl do_undefinstr
+ ASM_BUG()
el1_dbg:
/*
* Debug exception handling
@@ -473,7 +554,8 @@ el1_inv:
mov x0, sp
mov x2, x1
mov x1, #BAD_SYNC
- b bad_mode
+ bl bad_mode
+ ASM_BUG()
ENDPROC(el1_sync)
.align 6
@@ -577,8 +659,8 @@ el0_svc_compat:
* AArch32 syscall handling
*/
adrp stbl, compat_sys_call_table // load compat syscall table pointer
- uxtw scno, w7 // syscall number in w7 (r7)
- mov sc_nr, #__NR_compat_syscalls
+ mov wscno, w7 // syscall number in w7 (r7)
+ mov wsc_nr, #__NR_compat_syscalls
b el0_svc_naked
.align 6
@@ -707,38 +789,6 @@ el0_irq_naked:
ENDPROC(el0_irq)
/*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- * x0 = previous task_struct (must be preserved across the switch)
- * x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
- mov x10, #THREAD_CPU_CONTEXT
- add x8, x0, x10
- mov x9, sp
- stp x19, x20, [x8], #16 // store callee-saved registers
- stp x21, x22, [x8], #16
- stp x23, x24, [x8], #16
- stp x25, x26, [x8], #16
- stp x27, x28, [x8], #16
- stp x29, x9, [x8], #16
- str lr, [x8]
- add x8, x1, x10
- ldp x19, x20, [x8], #16 // restore callee-saved registers
- ldp x21, x22, [x8], #16
- ldp x23, x24, [x8], #16
- ldp x25, x26, [x8], #16
- ldp x27, x28, [x8], #16
- ldp x29, x9, [x8], #16
- ldr lr, [x8]
- mov sp, x9
- msr sp_el0, x1
- ret
-ENDPROC(cpu_switch_to)
-
-/*
* This is the fast syscall return path. We do as little as possible here,
* and this includes saving x0 back into the kernel stack.
*/
@@ -781,36 +831,24 @@ finish_ret_to_user:
ENDPROC(ret_to_user)
/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
- bl schedule_tail
- cbz x19, 1f // not a kernel thread
- mov x0, x20
- blr x19
-1: get_thread_info tsk
- b ret_to_user
-ENDPROC(ret_from_fork)
-
-/*
* SVC handler.
*/
.align 6
el0_svc:
adrp stbl, sys_call_table // load syscall table pointer
- uxtw scno, w8 // syscall number in w8
- mov sc_nr, #__NR_syscalls
+ mov wscno, w8 // syscall number in w8
+ mov wsc_nr, #__NR_syscalls
el0_svc_naked: // compat entry point
- stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
+ stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number
enable_dbg_and_irq
ct_user_exit 1
ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace
- cmp scno, sc_nr // check upper syscall limit
+ cmp wscno, wsc_nr // check upper syscall limit
b.hs ni_sys
- ldr x16, [stbl, scno, lsl #3] // address in the syscall table
+ ldr x16, [stbl, xscno, lsl #3] // address in the syscall table
blr x16 // call sys_* routine
b ret_fast_syscall
ni_sys:
@@ -824,24 +862,23 @@ ENDPROC(el0_svc)
* switches, and waiting for our parent to respond.
*/
__sys_trace:
- mov w0, #-1 // set default errno for
- cmp scno, x0 // user-issued syscall(-1)
+ cmp wscno, #NO_SYSCALL // user-issued syscall(-1)?
b.ne 1f
- mov x0, #-ENOSYS
+ mov x0, #-ENOSYS // set default errno if so
str x0, [sp, #S_X0]
1: mov x0, sp
bl syscall_trace_enter
- cmp w0, #-1 // skip the syscall?
+ cmp w0, #NO_SYSCALL // skip the syscall?
b.eq __sys_trace_return_skipped
- uxtw scno, w0 // syscall number (possibly new)
+ mov wscno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
- cmp scno, sc_nr // check upper syscall limit
+ cmp wscno, wsc_nr // check upper syscall limit
b.hs __ni_sys_trace
ldp x0, x1, [sp] // restore the syscall args
ldp x2, x3, [sp, #S_X2]
ldp x4, x5, [sp, #S_X4]
ldp x6, x7, [sp, #S_X6]
- ldr x16, [stbl, scno, lsl #3] // address in the syscall table
+ ldr x16, [stbl, xscno, lsl #3] // address in the syscall table
blr x16 // call sys_* routine
__sys_trace_return:
@@ -865,3 +902,49 @@ ENTRY(sys_rt_sigreturn_wrapper)
mov x0, sp
b sys_rt_sigreturn
ENDPROC(sys_rt_sigreturn_wrapper)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ * x0 = previous task_struct (must be preserved across the switch)
+ * x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+ mov x10, #THREAD_CPU_CONTEXT
+ add x8, x0, x10
+ mov x9, sp
+ stp x19, x20, [x8], #16 // store callee-saved registers
+ stp x21, x22, [x8], #16
+ stp x23, x24, [x8], #16
+ stp x25, x26, [x8], #16
+ stp x27, x28, [x8], #16
+ stp x29, x9, [x8], #16
+ str lr, [x8]
+ add x8, x1, x10
+ ldp x19, x20, [x8], #16 // restore callee-saved registers
+ ldp x21, x22, [x8], #16
+ ldp x23, x24, [x8], #16
+ ldp x25, x26, [x8], #16
+ ldp x27, x28, [x8], #16
+ ldp x29, x9, [x8], #16
+ ldr lr, [x8]
+ mov sp, x9
+ msr sp_el0, x1
+ ret
+ENDPROC(cpu_switch_to)
+NOKPROBE(cpu_switch_to)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+ bl schedule_tail
+ cbz x19, 1f // not a kernel thread
+ mov x0, x20
+ blr x19
+1: get_thread_info tsk
+ b ret_to_user
+ENDPROC(ret_from_fork)
+NOKPROBE(ret_from_fork)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 06da8ea16bbe..3a68cf38a6b3 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,16 +17,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bottom_half.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
#include <linux/sched/signal.h>
#include <linux/signal.h>
-#include <linux/hardirq.h>
#include <asm/fpsimd.h>
#include <asm/cputype.h>
+#include <asm/simd.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -62,6 +65,13 @@
* CPU currently contain the most recent userland FPSIMD state of the current
* task.
*
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
+ * save the task's FPSIMD context back to task_struct from softirq context.
+ * To prevent this from racing with the manipulation of the task's FPSIMD state
+ * from task context and thereby corrupting the state, it is necessary to
+ * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
+ * flag with local_bh_disable() unless softirqs are already masked.
+ *
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_state.cpu field
* contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
@@ -161,9 +171,14 @@ void fpsimd_flush_thread(void)
{
if (!system_supports_fpsimd())
return;
+
+ local_bh_disable();
+
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+ local_bh_enable();
}
/*
@@ -174,10 +189,13 @@ void fpsimd_preserve_current_state(void)
{
if (!system_supports_fpsimd())
return;
- preempt_disable();
+
+ local_bh_disable();
+
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
- preempt_enable();
+
+ local_bh_enable();
}
/*
@@ -189,15 +207,18 @@ void fpsimd_restore_current_state(void)
{
if (!system_supports_fpsimd())
return;
- preempt_disable();
+
+ local_bh_disable();
+
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
fpsimd_load_state(st);
- this_cpu_write(fpsimd_last_state, st);
+ __this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
- preempt_enable();
+
+ local_bh_enable();
}
/*
@@ -209,15 +230,18 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
- preempt_disable();
+
+ local_bh_disable();
+
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
- this_cpu_write(fpsimd_last_state, st);
+ __this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
- preempt_enable();
+
+ local_bh_enable();
}
/*
@@ -230,52 +254,122 @@ void fpsimd_flush_task_state(struct task_struct *t)
#ifdef CONFIG_KERNEL_MODE_NEON
-static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
-static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+DEFINE_PER_CPU(bool, kernel_neon_busy);
+EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
/*
* Kernel-side NEON support functions
*/
-void kernel_neon_begin_partial(u32 num_regs)
+
+/*
+ * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the FPSIMD registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_neon_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the FPSIMD registers until kernel_neon_end() is
+ * called.
+ */
+void kernel_neon_begin(void)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
- if (in_interrupt()) {
- struct fpsimd_partial_state *s = this_cpu_ptr(
- in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- BUG_ON(num_regs > 32);
- fpsimd_save_partial_state(s, roundup(num_regs, 2));
- } else {
- /*
- * Save the userland FPSIMD state if we have one and if we
- * haven't done so already. Clear fpsimd_last_state to indicate
- * that there is no longer userland FPSIMD state in the
- * registers.
- */
- preempt_disable();
- if (current->mm &&
- !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(&current->thread.fpsimd_state);
- this_cpu_write(fpsimd_last_state, NULL);
- }
+ BUG_ON(!may_use_simd());
+
+ local_bh_disable();
+
+ __this_cpu_write(kernel_neon_busy, true);
+
+ /* Save unsaved task fpsimd state, if any: */
+ if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(&current->thread.fpsimd_state);
+
+ /* Invalidate any task state remaining in the fpsimd regs: */
+ __this_cpu_write(fpsimd_last_state, NULL);
+
+ preempt_disable();
+
+ local_bh_enable();
}
-EXPORT_SYMBOL(kernel_neon_begin_partial);
+EXPORT_SYMBOL(kernel_neon_begin);
+/*
+ * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
+ *
+ * Must be called from a context in which kernel_neon_begin() was previously
+ * called, with no call to kernel_neon_end() in the meantime.
+ *
+ * The caller must not use the FPSIMD registers after this function is called,
+ * unless kernel_neon_begin() is called again in the meantime.
+ */
void kernel_neon_end(void)
{
+ bool busy;
+
if (!system_supports_fpsimd())
return;
- if (in_interrupt()) {
- struct fpsimd_partial_state *s = this_cpu_ptr(
- in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- fpsimd_load_partial_state(s);
- } else {
- preempt_enable();
- }
+
+ busy = __this_cpu_xchg(kernel_neon_busy, false);
+ WARN_ON(!busy); /* No matching kernel_neon_begin()? */
+
+ preempt_enable();
}
EXPORT_SYMBOL(kernel_neon_end);
+static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+
+/*
+ * EFI runtime services support functions
+ *
+ * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
+ * This means that for EFI (and only for EFI), we have to assume that FPSIMD
+ * is always used rather than being an optional accelerator.
+ *
+ * These functions provide the necessary support for ensuring FPSIMD
+ * save/restore in the contexts from which EFI is used.
+ *
+ * Do not use them for any other purpose -- if tempted to do so, you are
+ * either doing something wrong or you need to propose some refactoring.
+ */
+
+/*
+ * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
+ */
+void __efi_fpsimd_begin(void)
+{
+ if (!system_supports_fpsimd())
+ return;
+
+ WARN_ON(preemptible());
+
+ if (may_use_simd())
+ kernel_neon_begin();
+ else {
+ fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+ __this_cpu_write(efi_fpsimd_state_used, true);
+ }
+}
+
+/*
+ * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
+ */
+void __efi_fpsimd_end(void)
+{
+ if (!system_supports_fpsimd())
+ return;
+
+ if (__this_cpu_xchg(efi_fpsimd_state_used, false))
+ fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+ else
+ kernel_neon_end();
+}
+
#endif /* CONFIG_KERNEL_MODE_NEON */
#ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 973df7de7bf8..7434ec0c7a27 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -143,8 +143,8 @@ preserve_boot_args:
dmb sy // needed before dc ivac with
// MMU off
- add x1, x0, #0x20 // 4 x 8 bytes
- b __inval_cache_range // tail call
+ mov x1, #0x20 // 4 x 8 bytes
+ b __inval_dcache_area // tail call
ENDPROC(preserve_boot_args)
/*
@@ -221,20 +221,20 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
- bl __inval_cache_range
+ ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ bl __inval_dcache_area
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
- adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+ ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
- cmp x0, x6
- b.lo 1b
+ subs x1, x1, #64
+ b.ne 1b
mov x7, SWAPPER_MM_MMUFLAGS
@@ -307,9 +307,9 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+ ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
dmb sy
- bl __inval_cache_range
+ bl __inval_dcache_area
ret x28
ENDPROC(__create_page_tables)
@@ -354,7 +354,6 @@ __primary_switched:
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
b.ne 0f
mov x0, x21 // pass FDT address in x0
- mov x1, x23 // pass modulo offset in x1
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
@@ -362,6 +361,9 @@ __primary_switched:
ret // to __primary_switch()
0:
#endif
+ add sp, sp, #16
+ mov x29, #0
+ mov x30, #0
b start_kernel
ENDPROC(__primary_switched)
@@ -617,6 +619,7 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
msr sp_el0, x2
mov x29, #0
+ mov x30, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a44e13942d30..095d3c170f5d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
* read only (code, rodata). Clear the RDONLY bit from
* the temporary mappings we use during restore.
*/
- set_pte(dst_pte, pte_clear_rdonly(pte));
+ set_pte(dst_pte, pte_mkwrite(pte));
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
@@ -343,7 +343,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
*/
BUG_ON(!pfn_valid(pte_pfn(pte)));
- set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+ set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
}
}
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 2386b26c0712..713561e5bcab 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -23,15 +23,16 @@
#include <linux/kernel_stat.h>
#include <linux/irq.h>
+#include <linux/memory.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
unsigned long irq_err_count;
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
+DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
int arch_show_interrupts(struct seq_file *p, int prec)
{
@@ -50,8 +51,43 @@ void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
handle_arch_irq = handle_irq;
}
+#ifdef CONFIG_VMAP_STACK
+static void init_irq_stacks(void)
+{
+ int cpu;
+ unsigned long *p;
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * To ensure that VMAP'd stack overflow detection works
+ * correctly, the IRQ stacks need to have the same
+ * alignment as other stacks.
+ */
+ p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_stack_ptr, cpu) = p;
+ }
+}
+#else
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
+DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
+static void init_irq_stacks(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
+}
+#endif
+
void __init init_IRQ(void)
{
+ init_irq_stacks();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index a9710efb8c01..47080c49cc7e 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -75,7 +75,7 @@ extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
-u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
+u64 __init kaslr_early_init(u64 dt_phys)
{
void *fdt;
u64 seed, offset, mask, module_range;
@@ -131,15 +131,17 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
/*
* The kernel Image should not extend across a 1GB/32MB/512MB alignment
* boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
- * happens, increase the KASLR offset by the size of the kernel image
- * rounded up by SWAPPER_BLOCK_SIZE.
+ * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT).
+ *
+ * NOTE: The references to _text and _end below will already take the
+ * modulo offset (the physical displacement modulo 2 MB) into
+ * account, given that the physical placement is controlled by
+ * the loader, and will not change as a result of the virtual
+ * mapping we choose.
*/
- if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
- (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) {
- u64 kimg_sz = _end - _text;
- offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE))
- & mask;
- }
+ if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+ (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+ offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT);
if (IS_ENABLED(CONFIG_KASAN))
/*
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 481f54a866c5..11121f608eb5 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -252,7 +252,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
/* shutdown non-crashing cpus */
- smp_send_crash_stop();
+ crash_smp_send_stop();
/* for crashing cpu */
crash_save_cpu(regs, smp_processor_id());
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index e2b7e4f9cc31..0e2ea1c78542 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -22,23 +22,6 @@
#include <linux/pci-ecam.h>
#include <linux/slab.h>
-/*
- * Called after each bus is probed, but before its children are examined
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
- /* nothing to do, expected to be removed in the future */
-}
-
-/*
- * We don't have to worry about legacy ISA devices, so nothing to do here
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-
#ifdef CONFIG_ACPI
/*
* Try to assign the IRQ number when probing a new device
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 713ca824f266..bcafd7dcfe8b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -162,7 +162,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
frame.fp = regs->regs[29];
- frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b5798ba21189..9eaef51f83ff 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -202,55 +202,6 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};
-/* ARM Cortex-A53 HW events mapping. */
-static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-/* ARM Cortex-A57 and Cortex-A72 events mapping. */
-static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
-/* Broadcom Vulcan events mapping */
-static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_BR_RETIRED,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -281,27 +232,10 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
- [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE,
- [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
- [C(LL)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE,
- [C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
};
static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -314,18 +248,26 @@ static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
};
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -340,8 +282,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
@@ -349,13 +289,6 @@ static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -368,22 +301,11 @@ static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
- [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
-
[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
};
@@ -846,17 +768,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct hw_perf_event *hwc = &event->hw;
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
- /* Always place a cycle counter into the cycle counter. */
+ /* Always prefer to place a cycle counter into the cycle counter. */
if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
- if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return -EAGAIN;
-
- return ARMV8_IDX_CYCLE_COUNTER;
+ if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return ARMV8_IDX_CYCLE_COUNTER;
}
/*
- * For anything other than a cycle counter, try and use
- * the events counters
+ * Otherwise use events counters
*/
for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
if (!test_and_set_bit(idx, cpuc->used_mask))
@@ -924,7 +843,13 @@ static void armv8pmu_reset(void *info)
ARMV8_PMU_PMCR_LC);
}
-static int armv8_pmuv3_map_event(struct perf_event *event)
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+ const unsigned (*extra_event_map)
+ [PERF_COUNT_HW_MAX],
+ const unsigned (*extra_cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX])
{
int hw_event_id;
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
@@ -932,44 +857,47 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
&armv8_pmuv3_perf_cache_map,
ARMV8_PMU_EVTYPE_EVENT);
- if (hw_event_id < 0)
- return hw_event_id;
- /* disable micro/arch events not supported by this PMU */
- if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
- !test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
- return -EOPNOTSUPP;
+ /* Onl expose micro/arch events supported by this PMU */
+ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+ && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+ return hw_event_id;
}
- return hw_event_id;
+ return armpmu_map_event(event, extra_event_map, extra_cache_map,
+ ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, NULL);
}
static int armv8_a53_map_event(struct perf_event *event)
{
- return armpmu_map_event(event, &armv8_a53_perf_map,
- &armv8_a53_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
}
static int armv8_a57_map_event(struct perf_event *event)
{
- return armpmu_map_event(event, &armv8_a57_perf_map,
- &armv8_a57_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
+}
+
+static int armv8_a73_map_event(struct perf_event *event)
+{
+ return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
}
static int armv8_thunder_map_event(struct perf_event *event)
{
- return armpmu_map_event(event, &armv8_thunder_perf_map,
- &armv8_thunder_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
+ return __armv8_pmuv3_map_event(event, NULL,
+ &armv8_thunder_perf_cache_map);
}
static int armv8_vulcan_map_event(struct perf_event *event)
{
- return armpmu_map_event(event, &armv8_vulcan_perf_map,
- &armv8_vulcan_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
+ return __armv8_pmuv3_map_event(event, NULL,
+ &armv8_vulcan_perf_cache_map);
}
struct armv8pmu_probe_info {
@@ -1062,6 +990,22 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
return 0;
}
+static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int ret = armv8_pmu_init(cpu_pmu);
+ if (ret)
+ return ret;
+
+ cpu_pmu->name = "armv8_cortex_a35";
+ cpu_pmu->map_event = armv8_a53_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
+
+ return 0;
+}
+
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
int ret = armv8_pmu_init(cpu_pmu);
@@ -1110,6 +1054,22 @@ static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
+static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ int ret = armv8_pmu_init(cpu_pmu);
+ if (ret)
+ return ret;
+
+ cpu_pmu->name = "armv8_cortex_a73";
+ cpu_pmu->map_event = armv8_a73_map_event;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+ &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+ &armv8_pmuv3_format_attr_group;
+
+ return 0;
+}
+
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
int ret = armv8_pmu_init(cpu_pmu);
@@ -1144,9 +1104,11 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
+ {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
+ {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
{.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
{},
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 26c998534dca..636ca0119c0e 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -40,7 +40,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
probe_opcode_t insn;
/* TODO: Currently we do not support AARCH32 instruction probing */
- if (test_bit(TIF_32BIT, &mm->context.flags))
+ if (mm->context.flags & MMCF_AARCH32)
return -ENOTSUPP;
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 659ae8094ed5..2dc0f8482210 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -360,6 +360,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
/*
* Complete any pending TLB or cache maintenance on this CPU in case
* the thread migrates to a different CPU.
+ * This full barrier is also required by the membarrier system
+ * call.
*/
dsb(ish);
@@ -382,15 +384,12 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
frame.fp = thread_saved_fp(p);
- frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
do {
- if (frame.sp < stack_page ||
- frame.sp >= stack_page + THREAD_SIZE ||
- unwind_frame(p, &frame))
+ if (unwind_frame(p, &frame))
goto out;
if (!in_sched_functions(frame.pc)) {
ret = frame.pc;
@@ -417,3 +416,11 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
else
return randomize_page(mm->brk, SZ_1G);
}
+
+/*
+ * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
+ */
+void arch_setup_new_exec(void)
+{
+ current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..9cbb6123208f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -42,6 +42,7 @@
#include <asm/compat.h>
#include <asm/debug-monitors.h>
#include <asm/pgtable.h>
+#include <asm/stacktrace.h>
#include <asm/syscall.h>
#include <asm/traps.h>
#include <asm/system_misc.h>
@@ -127,7 +128,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
{
return ((addr & ~(THREAD_SIZE - 1)) ==
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
- on_irq_stack(addr, raw_smp_processor_id());
+ on_irq_stack(addr);
}
/**
@@ -1363,7 +1364,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
- regs->syscallno = ~0UL;
+ forget_syscall(regs);
regs->regs[regno] = saved_reg;
}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 12a87f2600f2..933adbc0f654 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -42,7 +42,6 @@ void *return_address(unsigned int level)
data.addr = NULL;
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_stack_pointer;
frame.pc = (unsigned long)return_address; /* dummy */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 089c3747995d..c45214f8fb54 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -29,6 +29,7 @@
#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
@@ -36,6 +37,7 @@
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
+#include <asm/ptrace.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
@@ -387,7 +389,7 @@ static int restore_sigframe(struct pt_regs *regs,
/*
* Avoid sys_rt_sigreturn() restarting.
*/
- regs->syscallno = ~0UL;
+ forget_syscall(regs);
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
@@ -673,13 +675,12 @@ static void do_signal(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
- int syscall = (int)regs->syscallno;
struct ksignal ksig;
/*
* If we were from a system call, check for system call restarting...
*/
- if (syscall >= 0) {
+ if (in_syscall(regs)) {
continue_addr = regs->pc;
restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
retval = regs->regs[0];
@@ -687,7 +688,7 @@ static void do_signal(struct pt_regs *regs)
/*
* Avoid additional syscall restarting via ret_to_user.
*/
- regs->syscallno = ~0UL;
+ forget_syscall(regs);
/*
* Prepare for system call restart. We do this here so that a
@@ -731,7 +732,7 @@ static void do_signal(struct pt_regs *regs)
* Handle restarting a different system call. As above, if a debugger
* has chosen to restart at a different PC, ignore the restart.
*/
- if (syscall >= 0 && regs->pc == restart_addr) {
+ if (in_syscall(regs) && regs->pc == restart_addr) {
if (retval == -ERESTART_RESTARTBLOCK)
setup_restart_syscall(regs);
user_rewind_single_step(current);
@@ -749,6 +750,10 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
* Update the trace code with the current status.
*/
trace_hardirqs_off();
+
+ /* Check valid user FS if needed */
+ addr_limit_user_check();
+
do {
if (thread_flags & _TIF_NEED_RESCHED) {
schedule();
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index c747a0fc5d7d..4e5a664be04b 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
/*
* Avoid compat_sys_sigreturn() restarting.
*/
- regs->syscallno = ~0UL;
+ forget_syscall(regs);
err |= !valid_user_regs(&regs->user_regs, current);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc66e6ec3a99..9f7195a5773e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -154,7 +154,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
- secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
+ secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -690,7 +690,7 @@ void __init smp_init_cpus(void)
acpi_parse_gic_cpu_interface, 0);
if (cpu_count > nr_cpu_ids)
- pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
+ pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n",
cpu_count, nr_cpu_ids);
if (!bootcpu_valid) {
@@ -977,11 +977,21 @@ void smp_send_stop(void)
}
#ifdef CONFIG_KEXEC_CORE
-void smp_send_crash_stop(void)
+void crash_smp_send_stop(void)
{
+ static int cpus_stopped;
cpumask_t mask;
unsigned long timeout;
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ cpus_stopped = 1;
+
if (num_online_cpus() == 1)
return;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 09d37d66b630..3144584617e7 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -42,33 +42,17 @@
*/
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
- unsigned long high, low;
unsigned long fp = frame->fp;
- unsigned long irq_stack_ptr;
+
+ if (fp & 0xf)
+ return -EINVAL;
if (!tsk)
tsk = current;
- /*
- * Switching between stacks is valid when tracing current and in
- * non-preemptible context.
- */
- if (tsk == current && !preemptible())
- irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
- else
- irq_stack_ptr = 0;
-
- low = frame->sp;
- /* irq stacks are not THREAD_SIZE aligned */
- if (on_irq_stack(frame->sp, raw_smp_processor_id()))
- high = irq_stack_ptr;
- else
- high = ALIGN(low, THREAD_SIZE) - 0x20;
-
- if (fp < low || fp > high || fp & 0xf)
+ if (!on_accessible_stack(tsk, fp))
return -EINVAL;
- frame->sp = fp + 0x10;
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
@@ -86,34 +70,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
- * Check whether we are going to walk through from interrupt stack
- * to task stack.
- * If we reach the end of the stack - and its an interrupt stack,
- * unpack the dummy frame to find the original elr.
- *
- * Check the frame->fp we read from the bottom of the irq_stack,
- * and the original task stack pointer are both in current->stack.
+ * Frames created upon entry from EL0 have NULL FP and PC values, so
+ * don't bother reporting these. Frames created by __noreturn functions
+ * might have a valid FP even if PC is bogus, so only terminate where
+ * both are NULL.
*/
- if (frame->sp == irq_stack_ptr) {
- struct pt_regs *irq_args;
- unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
-
- if (object_is_on_stack((void *)orig_sp) &&
- object_is_on_stack((void *)frame->fp)) {
- frame->sp = orig_sp;
-
- /* orig_sp is the saved pt_regs, find the elr */
- irq_args = (struct pt_regs *)orig_sp;
- frame->pc = irq_args->pc;
- } else {
- /*
- * This frame has a non-standard format, and we
- * didn't fix it, because the data looked wrong.
- * Refuse to output this frame.
- */
- return -EINVAL;
- }
- }
+ if (!frame->fp && !frame->pc)
+ return -EINVAL;
return 0;
}
@@ -167,7 +130,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
data.no_sched_functions = 0;
frame.fp = regs->regs[29];
- frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = current->curr_ret_stack;
@@ -192,12 +154,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (tsk != current) {
data.no_sched_functions = 1;
frame.fp = thread_saved_fp(tsk);
- frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
} else {
data.no_sched_functions = 0;
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_stack_pointer;
frame.pc = (unsigned long)save_stack_trace_tsk;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index da33c90248e9..a4391280fba9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -50,7 +50,6 @@ unsigned long profile_pc(struct pt_regs *regs)
return regs->pc;
frame.fp = regs->regs[29];
- frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = -1; /* no task info */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a62648848e5..5ea4b85aee0e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -32,6 +32,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <linux/sizes.h>
#include <linux/syscalls.h>
#include <linux/mm_types.h>
@@ -41,6 +42,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
+#include <asm/smp.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
@@ -143,7 +145,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
- unsigned long irq_stack_ptr;
int skip;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
@@ -154,25 +155,14 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!try_get_task_stack(tsk))
return;
- /*
- * Switching between stacks is valid when tracing current and in
- * non-preemptible context.
- */
- if (tsk == current && !preemptible())
- irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
- else
- irq_stack_ptr = 0;
-
if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.sp = current_stack_pointer;
frame.pc = (unsigned long)dump_backtrace;
} else {
/*
* task blocked in __switch_to
*/
frame.fp = thread_saved_fp(tsk);
- frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
skip = !!regs;
printk("Call trace:\n");
while (1) {
- unsigned long where = frame.pc;
unsigned long stack;
int ret;
/* skip until specified stack frame */
if (!skip) {
- dump_backtrace_entry(where);
+ dump_backtrace_entry(frame.pc);
} else if (frame.fp == regs->regs[29]) {
skip = 0;
/*
@@ -203,20 +192,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
ret = unwind_frame(tsk, &frame);
if (ret < 0)
break;
- stack = frame.sp;
- if (in_exception_text(where)) {
- /*
- * If we switched to the irq_stack before calling this
- * exception handler, then the pt_regs will be on the
- * task stack. The easiest way to tell is if the large
- * pt_regs would overlap with the end of the irq_stack.
- */
- if (stack < irq_stack_ptr &&
- (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
- stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+ if (in_entry_text(frame.pc)) {
+ stack = frame.fp - offsetof(struct pt_regs, stackframe);
- dump_mem("", "Exception stack", stack,
- stack + sizeof(struct pt_regs));
+ if (on_accessible_stack(tsk, stack))
+ dump_mem("", "Exception stack", stack,
+ stack + sizeof(struct pt_regs));
}
}
@@ -257,8 +238,6 @@ static int __die(const char *str, int err, struct pt_regs *regs)
end_of_stack(tsk));
if (!user_mode(regs)) {
- dump_mem(KERN_EMERG, "Stack: ", regs->sp,
- THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(KERN_EMERG, regs);
}
@@ -484,6 +463,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */
__user_cache_maint("dc civac", address, ret);
break;
+ case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */
+ __user_cache_maint("sys 3, c7, c12, 1", address, ret);
+ break;
case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */
__user_cache_maint("dc civac", address, ret);
break;
@@ -593,7 +575,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
if (show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: syscall %d\n", current->comm,
- task_pid_nr(current), (int)regs->syscallno);
+ task_pid_nr(current), regs->syscallno);
dump_instr("", regs);
if (user_mode(regs))
__show_regs(regs);
@@ -689,6 +671,43 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
force_sig_info(info.si_signo, &info, current);
}
+#ifdef CONFIG_VMAP_STACK
+
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
+ __aligned(16);
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
+{
+ unsigned long tsk_stk = (unsigned long)current->stack;
+ unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+ unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+ unsigned int esr = read_sysreg(esr_el1);
+ unsigned long far = read_sysreg(far_el1);
+
+ console_verbose();
+ pr_emerg("Insufficient stack space to handle exception!");
+
+ pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+ pr_emerg("FAR: 0x%016lx\n", far);
+
+ pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
+ tsk_stk, tsk_stk + THREAD_SIZE);
+ pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n",
+ irq_stk, irq_stk + THREAD_SIZE);
+ pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
+ ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
+
+ __show_regs(regs);
+
+ /*
+ * We use nmi_panic to limit the potential for recusive overflows, and
+ * to get a better stack trace.
+ */
+ nmi_panic(NULL, "kernel stack overflow");
+ cpu_park_loop();
+}
+#endif
+
void __pte_error(const char *file, int line, unsigned long val)
{
pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e8f759f764f2..2d419006ad43 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
}
#endif /* CONFIG_COMPAT */
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ unsigned long vdso_size = vdso_end - vdso_start;
+
+ if (vdso_size != new_size)
+ return -EINVAL;
+
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
{
.name = "[vvar]",
},
{
.name = "[vdso]",
+ .mremap = vdso_mremap,
},
};
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 987a00ee446c..fe56c268a7d9 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -72,22 +72,6 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#define PECOFF_EDATA_PADDING
#endif
-#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-/*
- * 4 KB granule: 1 level 2 entry
- * 16 KB granule: 128 level 3 entries, with contiguous bit
- * 64 KB granule: 32 level 3 entries, with contiguous bit
- */
-#define SEGMENT_ALIGN SZ_2M
-#else
-/*
- * 4 KB granule: 16 level 3 entries, with contiguous bit
- * 16 KB granule: 4 level 3 entries, without contiguous bit
- * 64 KB granule: 1 level 3 entry
- */
-#define SEGMENT_ALIGN SZ_64K
-#endif
-
SECTIONS
{
/*
@@ -192,7 +176,7 @@ SECTIONS
_data = .;
_sdata = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
/*
* Data written with the MMU off but read with the MMU on requires
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 17d8a1677a0b..7debb74843a0 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
vcpu->stat.wfe_exit_stat++;
- kvm_vcpu_on_spin(vcpu);
+ kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
} else {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..a81f5e10fc8c 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -70,7 +70,7 @@ u32 __hyp_text __init_stage2_translation(void)
* Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
- if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
+ if (tmp)
val |= VTCR_EL2_HA;
/*
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 116786d2e8e8..c77d508b7462 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r, u8 apr)
{
- struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
u8 idx = r->Op2 & 3;
- /*
- * num_pri_bits are initialized with HW supported values.
- * We can rely safely on num_pri_bits even if VM has not
- * restored ICC_CTLR_EL1 before restoring APnR registers.
- */
- switch (vgic_v3_cpu->num_pri_bits) {
- case 7:
- vgic_v3_access_apr_reg(vcpu, p, apr, idx);
- break;
- case 6:
- if (idx > 1)
- goto err;
- vgic_v3_access_apr_reg(vcpu, p, apr, idx);
- break;
- default:
- if (idx > 0)
- goto err;
- vgic_v3_access_apr_reg(vcpu, p, apr, idx);
- }
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ goto err;
+ vgic_v3_access_apr_reg(vcpu, p, apr, idx);
return true;
err:
if (!p->is_write)
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index c86b7909ef31..a0abc142c92b 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -17,3 +17,5 @@ CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
-fcall-saved-x18
+
+lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
new file mode 100644
index 000000000000..b6ceafdb8b72
--- /dev/null
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/uaccess.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+
+void memcpy_flushcache(void *dst, const void *src, size_t cnt)
+{
+ /*
+ * We assume this should not be called with @dst pointing to
+ * non-cacheable memory, such that we don't need an explicit
+ * barrier to order the cache maintenance against the memcpy.
+ */
+ memcpy(dst, src, cnt);
+ __clean_dcache_area_pop(dst, cnt);
+}
+EXPORT_SYMBOL_GPL(memcpy_flushcache);
+
+void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
+ size_t len)
+{
+ memcpy_flushcache(to, page_address(page) + offset, len);
+}
+
+unsigned long __copy_user_flushcache(void *to, const void __user *from,
+ unsigned long n)
+{
+ unsigned long rc = __arch_copy_from_user(to, from, n);
+
+ /* See above */
+ __clean_dcache_area_pop(to, n - rc);
+ return rc;
+}
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 83c27b6e6dca..7f1dbe962cf5 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -109,20 +109,25 @@ ENTRY(__clean_dcache_area_pou)
ENDPROC(__clean_dcache_area_pou)
/*
- * __dma_inv_area(start, size)
- * - start - virtual start address of region
+ * __inval_dcache_area(kaddr, size)
+ *
+ * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * are invalidated. Any partial lines at the ends of the interval are
+ * also cleaned to PoC to prevent data loss.
+ *
+ * - kaddr - kernel address
* - size - size in question
*/
-__dma_inv_area:
- add x1, x1, x0
+ENTRY(__inval_dcache_area)
/* FALLTHROUGH */
/*
- * __inval_cache_range(start, end)
- * - start - start address of region
- * - end - end address of region
+ * __dma_inv_area(start, size)
+ * - start - virtual start address of region
+ * - size - size in question
*/
-ENTRY(__inval_cache_range)
+__dma_inv_area:
+ add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
tst x1, x3 // end cache line aligned?
@@ -140,7 +145,7 @@ ENTRY(__inval_cache_range)
b.lo 2b
dsb sy
ret
-ENDPIPROC(__inval_cache_range)
+ENDPIPROC(__inval_dcache_area)
ENDPROC(__dma_inv_area)
/*
@@ -167,6 +172,20 @@ ENDPIPROC(__clean_dcache_area_poc)
ENDPROC(__dma_clean_area)
/*
+ * __clean_dcache_area_pop(kaddr, size)
+ *
+ * Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * are cleaned to the PoP.
+ *
+ * - kaddr - kernel address
+ * - size - size in question
+ */
+ENTRY(__clean_dcache_area_pop)
+ dcache_by_line_op cvap, sy, x0, x1, x2, x3
+ ret
+ENDPIPROC(__clean_dcache_area_pop)
+
+/*
* __dma_flush_area(start, size)
*
* clean & invalidate D / U line
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index f27d4dd04384..614af886b7ef 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -42,7 +42,7 @@ static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
return prot;
}
-static struct gen_pool *atomic_pool;
+static struct gen_pool *atomic_pool __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
@@ -425,7 +425,7 @@ static int __init atomic_pool_init(void)
gen_pool_set_algo(atomic_pool,
gen_pool_first_fit_order_align,
- (void *)PAGE_SHIFT);
+ NULL);
pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
atomic_pool_size / 1024);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2509e4fe6992..89993c4be1be 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/hugetlb.h>
#include <asm/bug.h>
+#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
@@ -82,6 +83,49 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
}
#endif
+static void data_abort_decode(unsigned int esr)
+{
+ pr_alert("Data abort info:\n");
+
+ if (esr & ESR_ELx_ISV) {
+ pr_alert(" Access size = %u byte(s)\n",
+ 1U << ((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT));
+ pr_alert(" SSE = %lu, SRT = %lu\n",
+ (esr & ESR_ELx_SSE) >> ESR_ELx_SSE_SHIFT,
+ (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT);
+ pr_alert(" SF = %lu, AR = %lu\n",
+ (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
+ (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
+ } else {
+ pr_alert(" ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK);
+ }
+
+ pr_alert(" CM = %lu, WnR = %lu\n",
+ (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
+ (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+}
+
+/*
+ * Decode mem abort information
+ */
+static void mem_abort_decode(unsigned int esr)
+{
+ pr_alert("Mem abort info:\n");
+
+ pr_alert(" Exception class = %s, IL = %u bits\n",
+ esr_get_class_string(esr),
+ (esr & ESR_ELx_IL) ? 32 : 16);
+ pr_alert(" SET = %lu, FnV = %lu\n",
+ (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
+ (esr & ESR_ELx_FnV) >> ESR_ELx_FnV_SHIFT);
+ pr_alert(" EA = %lu, S1PTW = %lu\n",
+ (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
+ (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+
+ if (esr_is_data_abort(esr))
+ data_abort_decode(esr);
+}
+
/*
* Dump out the page tables associated with 'addr' in the currently active mm.
*/
@@ -139,7 +183,6 @@ void show_pte(unsigned long addr)
pr_cont("\n");
}
-#ifdef CONFIG_ARM64_HW_AFDBM
/*
* This function sets the access flags (dirty, accessed), as well as write
* permission, and only to a more permissive setting.
@@ -154,18 +197,13 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
- pteval_t old_pteval;
- unsigned int tmp;
+ pteval_t old_pteval, pteval;
if (pte_same(*ptep, entry))
return 0;
/* only preserve the access flags and write permission */
- pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
-
- /* set PTE_RDONLY if actual read-only or clean PTE */
- if (!pte_write(entry) || !pte_sw_dirty(entry))
- pte_val(entry) |= PTE_RDONLY;
+ pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY;
/*
* Setting the flags must be done atomically to avoid racing with the
@@ -174,21 +212,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* (calculated as: a & b == ~(~a | ~b)).
*/
pte_val(entry) ^= PTE_RDONLY;
- asm volatile("// ptep_set_access_flags\n"
- " prfm pstl1strm, %2\n"
- "1: ldxr %0, %2\n"
- " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n"
- " orr %0, %0, %4 // set flags\n"
- " eor %0, %0, %3 // negate final PTE_RDONLY\n"
- " stxr %w1, %0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "L" (PTE_RDONLY), "r" (pte_val(entry)));
+ pteval = READ_ONCE(pte_val(*ptep));
+ do {
+ old_pteval = pteval;
+ pteval ^= PTE_RDONLY;
+ pteval |= pte_val(entry);
+ pteval ^= PTE_RDONLY;
+ pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
+ } while (pteval != old_pteval);
flush_tlb_fix_spurious_fault(vma, address);
return 1;
}
-#endif
static bool is_el1_instruction_abort(unsigned int esr)
{
@@ -248,6 +283,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
+ mem_abort_decode(esr);
+
show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
@@ -435,8 +472,11 @@ retry:
* the mmap_sem because it would already be released
* in __lock_page_or_retry in mm/filemap.c.
*/
- if (fatal_signal_pending(current))
+ if (fatal_signal_pending(current)) {
+ if (!user_mode(regs))
+ goto no_context;
return 0;
+ }
/*
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
@@ -702,6 +742,8 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr);
+ mem_abort_decode(esr);
+
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 21a8d828cbf4..e36ed5087b5c 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -83,3 +83,19 @@ EXPORT_SYMBOL(flush_dcache_page);
* Additional functions defined in assembly.
*/
EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+ /* Ensure order against any prior non-cacheable writes */
+ dmb(osh);
+ __clean_dcache_area_pop(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+ __inval_dcache_area(addr, size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+#endif
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 656e0ece2289..6cb0fa92a651 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,6 +41,16 @@ int pud_huge(pud_t pud)
#endif
}
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
@@ -58,15 +68,107 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
return CONT_PTES;
}
+static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
+{
+ int contig_ptes = 0;
+
+ *pgsize = size;
+
+ switch (size) {
+#ifdef CONFIG_ARM64_4K_PAGES
+ case PUD_SIZE:
+#endif
+ case PMD_SIZE:
+ contig_ptes = 1;
+ break;
+ case CONT_PMD_SIZE:
+ *pgsize = PMD_SIZE;
+ contig_ptes = CONT_PMDS;
+ break;
+ case CONT_PTE_SIZE:
+ *pgsize = PAGE_SIZE;
+ contig_ptes = CONT_PTES;
+ break;
+ }
+
+ return contig_ptes;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step.
+ */
+static pte_t get_clear_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ struct vm_area_struct vma = { .vm_mm = mm };
+ pte_t orig_pte = huge_ptep_get(ptep);
+ bool valid = pte_valid(orig_pte);
+ unsigned long i, saddr = addr;
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+ pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+ /*
+ * If HW_AFDBM is enabled, then the HW could turn on
+ * the dirty bit for any page in the set, so check
+ * them all. All hugetlb entries are already young.
+ */
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+ }
+
+ if (valid)
+ flush_tlb_range(&vma, saddr, addr);
+ return orig_pte;
+}
+
+/*
+ * Changing some bits of contiguous entries requires us to follow a
+ * Break-Before-Make approach, breaking the whole contiguous set
+ * before we can change any entries. See ARM DDI 0487A.k_iss10775,
+ * "Misprogramming of the Contiguous bit", page D4-1762.
+ *
+ * This helper performs the break step for use cases where the
+ * original pte is not needed.
+ */
+static void clear_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ struct vm_area_struct vma = { .vm_mm = mm };
+ unsigned long i, saddr = addr;
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+ pte_clear(mm, addr, ptep);
+
+ flush_tlb_range(&vma, saddr, addr);
+}
+
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
size_t pgsize;
int i;
int ncontig;
- unsigned long pfn;
+ unsigned long pfn, dpfn;
pgprot_t hugeprot;
+ /*
+ * Code needs to be expanded to handle huge swap and migration
+ * entries. Needed for HUGETLB and MEMORY_FAILURE.
+ */
+ WARN_ON(!pte_present(pte));
+
if (!pte_cont(pte)) {
set_pte_at(mm, addr, ptep, pte);
return;
@@ -74,17 +176,30 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
pfn = pte_pfn(pte);
- hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
- for (i = 0; i < ncontig; i++) {
+ dpfn = pgsize >> PAGE_SHIFT;
+ hugeprot = pte_pgprot(pte);
+
+ clear_flush(mm, addr, ptep, pgsize, ncontig);
+
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
pte_val(pfn_pte(pfn, hugeprot)));
set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
- ptep++;
- pfn += pgsize >> PAGE_SHIFT;
- addr += pgsize;
}
}
+void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned long sz)
+{
+ int i, ncontig;
+ size_t pgsize;
+
+ ncontig = num_contig_ptes(sz, &pgsize);
+
+ for (i = 0; i < ncontig; i++, ptep++)
+ set_pte(ptep, pte);
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
@@ -144,19 +259,28 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ if (sz != PUD_SIZE && pud_none(*pud))
return NULL;
- /* swap or huge page */
- if (!pud_present(*pud) || pud_huge(*pud))
+ /* hugepage or swap? */
+ if (pud_huge(*pud) || !pud_present(*pud))
return (pte_t *)pud;
/* table; check the next level */
+ if (sz == CONT_PMD_SIZE)
+ addr &= CONT_PMD_MASK;
+
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
+ pmd_none(*pmd))
return NULL;
- if (!pmd_present(*pmd) || pmd_huge(*pmd))
+ if (pmd_huge(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
+ if (sz == CONT_PTE_SIZE) {
+ pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
+ return pte;
+ }
+
return NULL;
}
@@ -176,111 +300,133 @@ pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
return entry;
}
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
+{
+ int i, ncontig;
+ size_t pgsize;
+
+ ncontig = num_contig_ptes(sz, &pgsize);
+
+ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
+ pte_clear(mm, addr, ptep);
+}
+
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte;
+ int ncontig;
+ size_t pgsize;
+ pte_t orig_pte = huge_ptep_get(ptep);
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize;
- bool is_dirty = false;
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- /* save the 1st pte to return */
- pte = ptep_get_and_clear(mm, addr, ptep);
- for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
- /*
- * If HW_AFDBM is enabled, then the HW could
- * turn on the dirty bit for any of the page
- * in the set, so check them all.
- */
- ++ptep;
- if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
- is_dirty = true;
- }
- if (is_dirty)
- return pte_mkdirty(pte);
- else
- return pte;
- } else {
+ if (!pte_cont(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
- }
+
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+
+ return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- if (pte_cont(pte)) {
- int ncontig, i, changed = 0;
- size_t pgsize = 0;
- unsigned long pfn = pte_pfn(pte);
- /* Select all bits except the pfn */
- pgprot_t hugeprot =
- __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
- pte_val(pte));
-
- pfn = pte_pfn(pte);
- ncontig = find_num_contig(vma->vm_mm, addr, ptep,
- &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
- changed |= ptep_set_access_flags(vma, addr, ptep,
- pfn_pte(pfn,
- hugeprot),
- dirty);
- pfn += pgsize >> PAGE_SHIFT;
- }
- return changed;
- } else {
+ int ncontig, i, changed = 0;
+ size_t pgsize = 0;
+ unsigned long pfn = pte_pfn(pte), dpfn;
+ pgprot_t hugeprot;
+ pte_t orig_pte;
+
+ if (!pte_cont(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- }
+
+ ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+ dpfn = pgsize >> PAGE_SHIFT;
+
+ orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
+ if (!pte_same(orig_pte, pte))
+ changed = 1;
+
+ /* Make sure we don't lose the dirty state */
+ if (pte_dirty(orig_pte))
+ pte = pte_mkdirty(pte);
+
+ hugeprot = pte_pgprot(pte);
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+ set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
+
+ return changed;
}
void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize = 0;
-
- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
- ptep_set_wrprotect(mm, addr, ptep);
- } else {
+ unsigned long pfn, dpfn;
+ pgprot_t hugeprot;
+ int ncontig, i;
+ size_t pgsize;
+ pte_t pte;
+
+ if (!pte_cont(*ptep)) {
ptep_set_wrprotect(mm, addr, ptep);
+ return;
}
+
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ dpfn = pgsize >> PAGE_SHIFT;
+
+ pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig);
+ pte = pte_wrprotect(pte);
+
+ hugeprot = pte_pgprot(pte);
+ pfn = pte_pfn(pte);
+
+ for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
+ set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
}
void huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- if (pte_cont(*ptep)) {
- int ncontig, i;
- size_t pgsize = 0;
-
- ncontig = find_num_contig(vma->vm_mm, addr, ptep,
- &pgsize);
- for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
- ptep_clear_flush(vma, addr, ptep);
- } else {
+ size_t pgsize;
+ int ncontig;
+
+ if (!pte_cont(*ptep)) {
ptep_clear_flush(vma, addr, ptep);
+ return;
}
+
+ ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+ clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
}
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
- if (ps == PMD_SIZE) {
- hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (ps == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else {
- hugetlb_bad_size();
- pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
- return 0;
+ switch (ps) {
+#ifdef CONFIG_ARM64_4K_PAGES
+ case PUD_SIZE:
+#endif
+ case PMD_SIZE * CONT_PMDS:
+ case PMD_SIZE:
+ case PAGE_SIZE * CONT_PTES:
+ hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
+ return 1;
}
- return 1;
+
+ hugetlb_bad_size();
+ pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
+ return 0;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+ if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+ hugetlb_add_hstate(CONT_PTE_SHIFT);
+ return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b02a9268dfbf..783de51a6c4e 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -44,8 +44,12 @@
#define A64_COND_NE AARCH64_INSN_COND_NE /* != */
#define A64_COND_CS AARCH64_INSN_COND_CS /* unsigned >= */
#define A64_COND_HI AARCH64_INSN_COND_HI /* unsigned > */
+#define A64_COND_LS AARCH64_INSN_COND_LS /* unsigned <= */
+#define A64_COND_CC AARCH64_INSN_COND_CC /* unsigned < */
#define A64_COND_GE AARCH64_INSN_COND_GE /* signed >= */
#define A64_COND_GT AARCH64_INSN_COND_GT /* signed > */
+#define A64_COND_LE AARCH64_INSN_COND_LE /* signed <= */
+#define A64_COND_LT AARCH64_INSN_COND_LT /* signed < */
#define A64_B_(cond, imm19) A64_COND_BRANCH(cond, (imm19) << 2)
/* Unconditional branch (immediate) */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f32144b2e07f..ba38d403abb2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -527,10 +527,14 @@ emit_bswap_uxt:
/* IF (dst COND src) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
emit(A64_CMP(1, dst, src), ctx);
emit_cond_jmp:
jmp_offset = bpf2a64_offset(i + off, i, ctx);
@@ -542,9 +546,15 @@ emit_cond_jmp:
case BPF_JGT:
jmp_cond = A64_COND_HI;
break;
+ case BPF_JLT:
+ jmp_cond = A64_COND_CC;
+ break;
case BPF_JGE:
jmp_cond = A64_COND_CS;
break;
+ case BPF_JLE:
+ jmp_cond = A64_COND_LS;
+ break;
case BPF_JSET:
case BPF_JNE:
jmp_cond = A64_COND_NE;
@@ -552,9 +562,15 @@ emit_cond_jmp:
case BPF_JSGT:
jmp_cond = A64_COND_GT;
break;
+ case BPF_JSLT:
+ jmp_cond = A64_COND_LT;
+ break;
case BPF_JSGE:
jmp_cond = A64_COND_GE;
break;
+ case BPF_JSLE:
+ jmp_cond = A64_COND_LE;
+ break;
default:
return -EFAULT;
}
@@ -566,10 +582,14 @@ emit_cond_jmp:
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
emit_a64_mov_i(1, tmp, imm, ctx);
emit(A64_CMP(1, dst, tmp), ctx);
goto emit_cond_jmp;
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index c58f4a83ed6f..f6431439d15d 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -48,11 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
__raw_spin_unlock_asm(&lock->lock);
}
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
static inline int arch_read_can_lock(arch_rwlock_t *rw)
{
return __raw_uncached_fetch_asm(&rw->lock) > 0;
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 0188c933b155..15af5768c403 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -4,8 +4,6 @@
* Licensed under the GPL-2 or later
*/
-#define pr_fmt(fmt) "module %s: " fmt, mod->name
-
#include <linux/moduleloader.h>
#include <linux/elf.h>
#include <linux/vmalloc.h>
@@ -16,6 +14,11 @@
#include <asm/cacheflush.h>
#include <linux/uaccess.h>
+#define mod_err(mod, fmt, ...) \
+ pr_err("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
+#define mod_debug(mod, fmt, ...) \
+ pr_debug("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
+
/* Transfer the section to the L1 memory */
int
module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
@@ -44,7 +47,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l1_inst_sram_alloc(s->sh_size);
mod->arch.text_l1 = dest;
if (dest == NULL) {
- pr_err("L1 inst memory allocation failed\n");
+ mod_err(mod, "L1 inst memory allocation failed\n");
return -1;
}
dma_memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -56,7 +59,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l1_data_sram_alloc(s->sh_size);
mod->arch.data_a_l1 = dest;
if (dest == NULL) {
- pr_err("L1 data memory allocation failed\n");
+ mod_err(mod, "L1 data memory allocation failed\n");
return -1;
}
memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -68,7 +71,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l1_data_sram_zalloc(s->sh_size);
mod->arch.bss_a_l1 = dest;
if (dest == NULL) {
- pr_err("L1 data memory allocation failed\n");
+ mod_err(mod, "L1 data memory allocation failed\n");
return -1;
}
@@ -77,7 +80,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l1_data_B_sram_alloc(s->sh_size);
mod->arch.data_b_l1 = dest;
if (dest == NULL) {
- pr_err("L1 data memory allocation failed\n");
+ mod_err(mod, "L1 data memory allocation failed\n");
return -1;
}
memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -87,7 +90,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l1_data_B_sram_alloc(s->sh_size);
mod->arch.bss_b_l1 = dest;
if (dest == NULL) {
- pr_err("L1 data memory allocation failed\n");
+ mod_err(mod, "L1 data memory allocation failed\n");
return -1;
}
memset(dest, 0, s->sh_size);
@@ -99,7 +102,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l2_sram_alloc(s->sh_size);
mod->arch.text_l2 = dest;
if (dest == NULL) {
- pr_err("L2 SRAM allocation failed\n");
+ mod_err(mod, "L2 SRAM allocation failed\n");
return -1;
}
memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -111,7 +114,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l2_sram_alloc(s->sh_size);
mod->arch.data_l2 = dest;
if (dest == NULL) {
- pr_err("L2 SRAM allocation failed\n");
+ mod_err(mod, "L2 SRAM allocation failed\n");
return -1;
}
memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -123,7 +126,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
dest = l2_sram_zalloc(s->sh_size);
mod->arch.bss_l2 = dest;
if (dest == NULL) {
- pr_err("L2 SRAM allocation failed\n");
+ mod_err(mod, "L2 SRAM allocation failed\n");
return -1;
}
@@ -157,8 +160,8 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
Elf32_Sym *sym;
unsigned long location, value, size;
- pr_debug("applying relocate section %u to %u\n",
- relsec, sechdrs[relsec].sh_info);
+ mod_debug(mod, "applying relocate section %u to %u\n",
+ relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
@@ -174,14 +177,14 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
#ifdef CONFIG_SMP
if (location >= COREB_L1_DATA_A_START) {
- pr_err("cannot relocate in L1: %u (SMP kernel)\n",
+ mod_err(mod, "cannot relocate in L1: %u (SMP kernel)\n",
ELF32_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
#endif
- pr_debug("location is %lx, value is %lx type is %d\n",
- location, value, ELF32_R_TYPE(rel[i].r_info));
+ mod_debug(mod, "location is %lx, value is %lx type is %d\n",
+ location, value, ELF32_R_TYPE(rel[i].r_info));
switch (ELF32_R_TYPE(rel[i].r_info)) {
@@ -200,12 +203,12 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
case R_BFIN_PCREL12_JUMP:
case R_BFIN_PCREL12_JUMP_S:
case R_BFIN_PCREL10:
- pr_err("unsupported relocation: %u (no -mlong-calls?)\n",
+ mod_err(mod, "unsupported relocation: %u (no -mlong-calls?)\n",
ELF32_R_TYPE(rel[i].r_info));
return -ENOEXEC;
default:
- pr_err("unknown relocation: %u\n",
+ mod_err(mod, "unknown relocation: %u\n",
ELF32_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
@@ -222,7 +225,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
isram_memcpy((void *)location, &value, size);
break;
default:
- pr_err("invalid relocation for %#lx\n", location);
+ mod_err(mod, "invalid relocation for %#lx\n", location);
return -ENOEXEC;
}
}
diff --git a/arch/c6x/configs/dsk6455_defconfig b/arch/c6x/configs/dsk6455_defconfig
index 4663487c67a1..d764ea4cce7f 100644
--- a/arch/c6x/configs/dsk6455_defconfig
+++ b/arch/c6x/configs/dsk6455_defconfig
@@ -1,5 +1,4 @@
CONFIG_SOC_TMS320C6455=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_SPARSE_IRQ=y
@@ -25,7 +24,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6457_defconfig b/arch/c6x/configs/evmc6457_defconfig
index bba40e195ec4..05d0b4a25ab1 100644
--- a/arch/c6x/configs/evmc6457_defconfig
+++ b/arch/c6x/configs/evmc6457_defconfig
@@ -1,5 +1,4 @@
CONFIG_SOC_TMS320C6457=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_SPARSE_IRQ=y
@@ -26,7 +25,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6472_defconfig b/arch/c6x/configs/evmc6472_defconfig
index 8c46155f6d31..8d81fcf86b0e 100644
--- a/arch/c6x/configs/evmc6472_defconfig
+++ b/arch/c6x/configs/evmc6472_defconfig
@@ -1,5 +1,4 @@
CONFIG_SOC_TMS320C6472=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6474_defconfig b/arch/c6x/configs/evmc6474_defconfig
index 15533f632313..8156a98f3958 100644
--- a/arch/c6x/configs/evmc6474_defconfig
+++ b/arch/c6x/configs/evmc6474_defconfig
@@ -1,5 +1,4 @@
CONFIG_SOC_TMS320C6474=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6678_defconfig b/arch/c6x/configs/evmc6678_defconfig
index 5f126d4905b1..c4f433c25b69 100644
--- a/arch/c6x/configs/evmc6678_defconfig
+++ b/arch/c6x/configs/evmc6678_defconfig
@@ -1,5 +1,4 @@
CONFIG_SOC_TMS320C6678=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c
index 43afc03e4125..9519fa5f97d0 100644
--- a/arch/c6x/platforms/megamod-pic.c
+++ b/arch/c6x/platforms/megamod-pic.c
@@ -208,14 +208,14 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
pic = kzalloc(sizeof(struct megamod_pic), GFP_KERNEL);
if (!pic) {
- pr_err("%s: Could not alloc PIC structure.\n", np->full_name);
+ pr_err("%pOF: Could not alloc PIC structure.\n", np);
return NULL;
}
pic->irqhost = irq_domain_add_linear(np, NR_COMBINERS * 32,
&megamod_domain_ops, pic);
if (!pic->irqhost) {
- pr_err("%s: Could not alloc host.\n", np->full_name);
+ pr_err("%pOF: Could not alloc host.\n", np);
goto error_free;
}
@@ -225,7 +225,7 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
pic->regs = of_iomap(np, 0);
if (!pic->regs) {
- pr_err("%s: Could not map registers.\n", np->full_name);
+ pr_err("%pOF: Could not map registers.\n", np);
goto error_free;
}
@@ -253,8 +253,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
irq_data = irq_get_irq_data(irq);
if (!irq_data) {
- pr_err("%s: combiner-%d no irq_data for virq %d!\n",
- np->full_name, i, irq);
+ pr_err("%pOF: combiner-%d no irq_data for virq %d!\n",
+ np, i, irq);
continue;
}
@@ -265,16 +265,16 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
* of the core priority interrupts (4 - 15).
*/
if (hwirq < 4 || hwirq >= NR_PRIORITY_IRQS) {
- pr_err("%s: combiner-%d core irq %ld out of range!\n",
- np->full_name, i, hwirq);
+ pr_err("%pOF: combiner-%d core irq %ld out of range!\n",
+ np, i, hwirq);
continue;
}
/* record the mapping */
mapping[hwirq - 4] = i;
- pr_debug("%s: combiner-%d cascading to hwirq %ld\n",
- np->full_name, i, hwirq);
+ pr_debug("%pOF: combiner-%d cascading to hwirq %ld\n",
+ np, i, hwirq);
cascade_data[i].pic = pic;
cascade_data[i].index = i;
@@ -290,8 +290,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
/* Finally, set up the MUX registers */
for (i = 0; i < NR_MUX_OUTPUTS; i++) {
if (mapping[i] != IRQ_UNMAPPED) {
- pr_debug("%s: setting mux %d to priority %d\n",
- np->full_name, mapping[i], i + 4);
+ pr_debug("%pOF: setting mux %d to priority %d\n",
+ np, mapping[i], i + 4);
set_megamod_mux(pic, mapping[i], i);
}
}
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
index 755359eb6286..e8b6cc6a7b5a 100644
--- a/arch/c6x/platforms/plldata.c
+++ b/arch/c6x/platforms/plldata.c
@@ -436,8 +436,8 @@ void __init c64x_setup_clocks(void)
err = of_property_read_u32(node, "clock-frequency", &val);
if (err || val == 0) {
- pr_err("%s: no clock-frequency found! Using %dMHz\n",
- node->full_name, (int)val / 1000000);
+ pr_err("%pOF: no clock-frequency found! Using %dMHz\n",
+ node, (int)val / 1000000);
val = 25000000;
}
clkin1.rate = val;
diff --git a/arch/c6x/platforms/timer64.c b/arch/c6x/platforms/timer64.c
index 0bd0452ded80..241a9a607193 100644
--- a/arch/c6x/platforms/timer64.c
+++ b/arch/c6x/platforms/timer64.c
@@ -204,14 +204,14 @@ void __init timer64_init(void)
timer = of_iomap(np, 0);
if (!timer) {
- pr_debug("%s: Cannot map timer registers.\n", np->full_name);
+ pr_debug("%pOF: Cannot map timer registers.\n", np);
goto out;
}
- pr_debug("%s: Timer registers=%p.\n", np->full_name, timer);
+ pr_debug("%pOF: Timer registers=%p.\n", np, timer);
cd->irq = irq_of_parse_and_map(np, 0);
if (cd->irq == NO_IRQ) {
- pr_debug("%s: Cannot find interrupt.\n", np->full_name);
+ pr_debug("%pOF: Cannot find interrupt.\n", np);
iounmap(timer);
goto out;
}
@@ -229,7 +229,7 @@ void __init timer64_init(void)
dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED);
}
- pr_debug("%s: Timer irq=%d.\n", np->full_name, cd->irq);
+ pr_debug("%pOF: Timer irq=%d.\n", np, cd->irq);
clockevents_calc_mult_shift(cd, c6x_core_freq / TIMER_DIVISOR, 5);
diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c
index 394c2a73d5e2..5cc622c0225e 100644
--- a/arch/cris/arch-v32/drivers/pci/bios.c
+++ b/arch/cris/arch-v32/drivers/pci/bios.c
@@ -2,10 +2,6 @@
#include <linux/kernel.h>
#include <hwregs/intr_vect.h>
-void pcibios_fixup_bus(struct pci_bus *b)
-{
-}
-
void pcibios_set_master(struct pci_dev *dev)
{
u8 lat;
diff --git a/arch/cris/arch-v32/mach-a3/arbiter.c b/arch/cris/arch-v32/mach-a3/arbiter.c
index ab5c421a4de8..735a9b0abdb8 100644
--- a/arch/cris/arch-v32/mach-a3/arbiter.c
+++ b/arch/cris/arch-v32/mach-a3/arbiter.c
@@ -227,7 +227,7 @@ static void crisv32_arbiter_config(int arbiter, int region, int unused_slots)
}
}
-extern char _stext, _etext;
+extern char _stext[], _etext[];
static void crisv32_arbiter_init(void)
{
@@ -265,7 +265,7 @@ static void crisv32_arbiter_init(void)
#ifndef CONFIG_ETRAX_KGDB
/* Global watch for writes to kernel text segment. */
- crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+ crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext,
MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients),
arbiter_all_write, NULL);
#endif
diff --git a/arch/cris/arch-v32/mach-fs/arbiter.c b/arch/cris/arch-v32/mach-fs/arbiter.c
index c97f4d8120f9..047c70bdbb23 100644
--- a/arch/cris/arch-v32/mach-fs/arbiter.c
+++ b/arch/cris/arch-v32/mach-fs/arbiter.c
@@ -158,7 +158,7 @@ static void crisv32_arbiter_config(int region, int unused_slots)
}
}
-extern char _stext, _etext;
+extern char _stext[], _etext[];
static void crisv32_arbiter_init(void)
{
@@ -190,7 +190,7 @@ static void crisv32_arbiter_init(void)
#ifndef CONFIG_ETRAX_KGDB
/* Global watch for writes to kernel text segment. */
- crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
+ crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext,
arbiter_all_clients, arbiter_all_write, NULL);
#endif
}
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index a01636a12a6e..d98131c45bb5 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -42,7 +42,7 @@ void (*nmi_handler)(struct pt_regs *);
void show_trace(unsigned long *stack)
{
unsigned long addr, module_start, module_end;
- extern char _stext, _etext;
+ extern char _stext[], _etext[];
int i;
pr_err("\nCall Trace: ");
@@ -69,8 +69,8 @@ void show_trace(unsigned long *stack)
* down the cause of the crash will be able to figure
* out the call path that was taken.
*/
- if (((addr >= (unsigned long)&_stext) &&
- (addr <= (unsigned long)&_etext)) ||
+ if (((addr >= (unsigned long)_stext) &&
+ (addr <= (unsigned long)_etext)) ||
((addr >= module_start) && (addr <= module_end))) {
#ifdef CONFIG_KALLSYMS
print_ip_sym(addr);
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index eefd9a4ed156..1cce8243449e 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -17,6 +17,9 @@ config FRV
select HAVE_DEBUG_STACKOVERFLOW
select ARCH_NO_COHERENT_DMA_MMAP
+config CPU_BIG_ENDIAN
+ def_bool y
+
config ZONE_DMA
bool
default y
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 2e1da71e27a4..ab346f5f8820 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,7 +7,8 @@
#include <asm/errno.h>
#include <linux/uaccess.h>
-extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
+extern int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr);
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index f1e3b20dce9f..9abf02d6855a 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -102,5 +102,7 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index d155ca9e5098..37f7b2bf7f73 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -186,20 +186,10 @@ static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_o
/*
* do the futex operations
*/
-int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
switch (op) {
@@ -225,18 +215,9 @@ int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS; break;
- }
- }
+ if (!ret)
+ *oval = oldval;
return ret;
-} /* end futex_atomic_op_inuser() */
+} /* end arch_futex_atomic_op_inuser() */
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 6e3d36f37a02..3089f7fe2abd 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -23,6 +23,9 @@ config H8300
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
+config CPU_BIG_ENDIAN
+ def_bool y
+
config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/h8300/include/asm/traps.h b/arch/h8300/include/asm/traps.h
index 15e701130b27..1c5a30ec2df8 100644
--- a/arch/h8300/include/asm/traps.h
+++ b/arch/h8300/include/asm/traps.h
@@ -33,9 +33,9 @@ extern unsigned long *_interrupt_redirect_table;
#define TRAP2_VEC 10
#define TRAP3_VEC 11
-extern char _start, _etext;
+extern char _start[], _etext[];
#define check_kernel_text(addr) \
- ((addr >= (unsigned long)(&_start)) && \
- (addr < (unsigned long)(&_etext)) && !(addr & 1))
+ ((addr >= (unsigned long)(_start)) && \
+ (addr < (unsigned long)(_etext)) && !(addr & 1))
#endif /* _H8300_TRAPS_H */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index a62ba368b27d..fb3dfb2a667e 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new)
);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
/**
* atomic_read - reads a word, atomically
* @v: pointer to atomic value
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index 7e597f8434da..c607b77c8215 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -31,18 +31,9 @@
static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
- return -EFAULT;
pagefault_disable();
@@ -72,30 +63,9 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (oldval == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (oldval != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (oldval < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (oldval >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (oldval <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (oldval > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index a1c55788c5d6..53a8d5885887 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -179,11 +179,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
*/
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
#define arch_spin_is_locked(x) ((x)->lock != 0)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index a3d0211970e9..c86a947f5368 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,8 +112,6 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
}
-#define acpi_unlazy_tlb(x)
-
#ifdef CONFIG_ACPI_NUMA
extern cpumask_t early_cpu_possible_map;
#define for_each_possible_early_cpu(cpu) \
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index 76acbcd5c060..6d67dc1eaf2b 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -45,18 +45,9 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
pagefault_disable();
@@ -84,17 +75,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index ca9e76149a4a..df2c121164b8 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -76,22 +76,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
ACCESS_ONCE(*p) = (tmp + 2) & ~1;
}
-static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
-{
- int *p = (int *)&lock->lock, ticket;
-
- ia64_invala();
-
- for (;;) {
- asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
- if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
- return;
- cpu_relax();
- }
-
- smp_acquire__after_ctrl_dep();
-}
-
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
long tmp = ACCESS_ONCE(lock->lock);
@@ -143,11 +127,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
arch_spin_lock(lock);
}
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- __ticket_spin_unlock_wait(lock);
-}
-
#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0)
#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0)
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index fced197b9626..cbe5ac3699bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
tlb->mm = mm;
tlb->max = ARRAY_SIZE(tlb->local);
@@ -185,8 +186,11 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
* collected.
*/
static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
{
+ if (force)
+ tlb->need_flush = 1;
/*
* Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
* tlb->end_addr.
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 5dd5c5d0d642..002eb85a6941 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -111,4 +111,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 7508c306aa9e..1d29b2f8726b 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -159,12 +159,12 @@ int acpi_request_vector(u32 int_type)
return vector;
}
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
{
- return __va(phys_addr);
+ return __va(phys);
}
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
{
}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 121295637d0d..81416000c5e0 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -757,14 +757,14 @@ efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
return 0;
}
-u32
+int
efi_mem_type (unsigned long phys_addr)
{
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
if (md)
return md->type;
- return 0;
+ return -EINVAL;
}
u64
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 4068bde623dc..f5ec736100ee 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -411,13 +411,6 @@ pcibios_disable_device (struct pci_dev *dev)
acpi_pci_irq_disable(dev);
}
-resource_size_t
-pcibios_align_resource (void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-
/**
* ia64_pci_get_legacy_mem - generic legacy mem routine
* @bus: bus to get legacy memory base address for
diff --git a/arch/m32r/configs/m32104ut_defconfig b/arch/m32r/configs/m32104ut_defconfig
index be30e094db71..4aa42acbd512 100644
--- a/arch/m32r/configs/m32104ut_defconfig
+++ b/arch/m32r/configs/m32104ut_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -40,7 +39,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_SCTP=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -48,7 +46,6 @@ CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
@@ -106,7 +103,6 @@ CONFIG_SENSORS_SMSC47M1=m
CONFIG_SENSORS_W83781D=m
CONFIG_SENSORS_W83L785TS=m
CONFIG_SENSORS_W83627HF=m
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
diff --git a/arch/m32r/configs/m32700ut.smp_defconfig b/arch/m32r/configs/m32700ut.smp_defconfig
index a3d727ed6a16..41a0495b65df 100644
--- a/arch/m32r/configs/m32700ut.smp_defconfig
+++ b/arch/m32r/configs/m32700ut.smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -30,7 +29,6 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=m
@@ -63,7 +61,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_SERIAL_M32R_PLDSIO=y
CONFIG_HW_RANDOM=y
CONFIG_DS1302=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_S1D13XXX=y
diff --git a/arch/m32r/configs/m32700ut.up_defconfig b/arch/m32r/configs/m32700ut.up_defconfig
index b8334163099d..20078a866f45 100644
--- a/arch/m32r/configs/m32700ut.up_defconfig
+++ b/arch/m32r/configs/m32700ut.up_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -29,7 +28,6 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=m
@@ -62,7 +60,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_SERIAL_M32R_PLDSIO=y
CONFIG_HW_RANDOM=y
CONFIG_DS1302=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_S1D13XXX=y
diff --git a/arch/m32r/configs/mappi.nommu_defconfig b/arch/m32r/configs/mappi.nommu_defconfig
index 7c90ce2fc42b..4bf3820e054a 100644
--- a/arch/m32r/configs/mappi.nommu_defconfig
+++ b/arch/m32r/configs/mappi.nommu_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_LOG_BUF_SHIFT=14
@@ -39,7 +38,6 @@ CONFIG_NETDEVICES=y
# CONFIG_VT is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_NFS_FS=y
diff --git a/arch/m32r/configs/mappi.smp_defconfig b/arch/m32r/configs/mappi.smp_defconfig
index 367d07cebcd3..f9ed7bdbf4de 100644
--- a/arch/m32r/configs/mappi.smp_defconfig
+++ b/arch/m32r/configs/mappi.smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -31,9 +30,7 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
# CONFIG_STANDALONE is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
@@ -50,7 +47,6 @@ CONFIG_NETDEVICES=y
# CONFIG_VT is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/m32r/configs/mappi.up_defconfig b/arch/m32r/configs/mappi.up_defconfig
index cb11384386ce..289ae7421e12 100644
--- a/arch/m32r/configs/mappi.up_defconfig
+++ b/arch/m32r/configs/mappi.up_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -29,9 +28,7 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
# CONFIG_STANDALONE is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
@@ -48,7 +45,6 @@ CONFIG_NETDEVICES=y
# CONFIG_VT is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/m32r/configs/mappi2.opsp_defconfig b/arch/m32r/configs/mappi2.opsp_defconfig
index 3bff779259b4..2852f6e7e246 100644
--- a/arch/m32r/configs/mappi2.opsp_defconfig
+++ b/arch/m32r/configs/mappi2.opsp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -50,7 +49,6 @@ CONFIG_SMC91X=y
# CONFIG_SERIO_I8042 is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_VGA_CONSOLE is not set
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
diff --git a/arch/m32r/configs/mappi2.vdec2_defconfig b/arch/m32r/configs/mappi2.vdec2_defconfig
index 75246c9c1af8..8da4dbad8510 100644
--- a/arch/m32r/configs/mappi2.vdec2_defconfig
+++ b/arch/m32r/configs/mappi2.vdec2_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -49,7 +48,6 @@ CONFIG_SMC91X=y
# CONFIG_SERIO_I8042 is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_VGA_CONSOLE is not set
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
diff --git a/arch/m32r/configs/mappi3.smp_defconfig b/arch/m32r/configs/mappi3.smp_defconfig
index 27cefd41ac1f..5605b23e2faf 100644
--- a/arch/m32r/configs/mappi3.smp_defconfig
+++ b/arch/m32r/configs/mappi3.smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -29,9 +28,7 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
@@ -50,7 +47,6 @@ CONFIG_SMC91X=y
# CONFIG_VT is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_ISO9660_FS=y
diff --git a/arch/m32r/configs/oaks32r_defconfig b/arch/m32r/configs/oaks32r_defconfig
index 5087a510ca4f..5ccab127f6ad 100644
--- a/arch/m32r/configs/oaks32r_defconfig
+++ b/arch/m32r/configs/oaks32r_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -37,7 +36,6 @@ CONFIG_NETDEVICES=y
# CONFIG_VT is not set
CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
diff --git a/arch/m32r/configs/opsput_defconfig b/arch/m32r/configs/opsput_defconfig
index 50c6f525db20..3ce1d08355e5 100644
--- a/arch/m32r/configs/opsput_defconfig
+++ b/arch/m32r/configs/opsput_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -46,7 +45,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y
CONFIG_SERIAL_M32R_PLDSIO=y
CONFIG_HW_RANDOM=y
CONFIG_DS1302=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_ISO9660_FS=m
diff --git a/arch/m32r/configs/usrv_defconfig b/arch/m32r/configs/usrv_defconfig
index a3cfaaedab60..cb8c051c3d46 100644
--- a/arch/m32r/configs/usrv_defconfig
+++ b/arch/m32r/configs/usrv_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -34,9 +33,6 @@ CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -62,7 +58,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_M32R_SIO is not set
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h
index 455ce7ddbf14..dfcb0e4eb256 100644
--- a/arch/m32r/include/asm/flat.h
+++ b/arch/m32r/include/asm/flat.h
@@ -95,7 +95,7 @@ static inline unsigned long m32r_flat_get_addr_from_rp (u32 *rp,
return ~0; /* bogus value */
}
-static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
+static inline int flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
{
unsigned int reloc = flat_m32r_get_reloc_type (relval);
if (reloc & 0xf0) {
@@ -133,6 +133,7 @@ static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval)
break;
}
}
+ return 0;
}
// kludge - text_len is a local variable in the only user.
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 323c7fc953cd..a56825592b90 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -30,11 +30,6 @@
#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->slock, VAL > 0);
-}
-
/**
* arch_spin_trylock - Try spin lock and return a result
* @lock: Pointer to the lock variable
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index f8f7b47e247f..e268e51a38d1 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 5abb548f0e70..353d90487c2b 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -24,6 +24,9 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
+config CPU_BIG_ENDIAN
+ def_bool y
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 6a640be48568..3097fa2ca746 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -243,6 +243,13 @@ static struct resource mcf_pci_io = {
.flags = IORESOURCE_IO,
};
+static struct resource busn_resource = {
+ .name = "PCI busn",
+ .start = 0,
+ .end = 255,
+ .flags = IORESOURCE_BUS,
+};
+
/*
* Interrupt mapping and setting.
*/
@@ -258,6 +265,13 @@ static int mcf_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
static int __init mcf_pci_init(void)
{
+ struct pci_host_bridge *bridge;
+ int ret;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return -ENOMEM;
+
pr_info("ColdFire: PCI bus initialization...\n");
/* Reset the external PCI bus */
@@ -312,14 +326,28 @@ static int __init mcf_pci_init(void)
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(200));
- rootbus = pci_scan_bus(0, &mcf_pci_ops, NULL);
- if (!rootbus)
- return -ENODEV;
+
+ pci_add_resource(&bridge->windows, &ioport_resource);
+ pci_add_resource(&bridge->windows, &iomem_resource);
+ pci_add_resource(&bridge->windows, &busn_resource);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = NULL;
+ bridge->busnr = 0;
+ bridge->ops = &mcf_pci_ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = mcf_pci_map_irq;
+
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
+ return ret;
+ }
+
+ rootbus = bridge->bus;
rootbus->resource[0] = &mcf_pci_io;
rootbus->resource[1] = &mcf_pci_mem;
- pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
pci_bus_size_bridges(rootbus);
pci_bus_assign_resources(rootbus);
pci_bus_add_devices(rootbus);
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index ddff1164aff0..54191f6fc715 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -49,6 +49,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -465,8 +466,10 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_MSM6242=m
CONFIG_RTC_DRV_RP5C01=m
# CONFIG_IOMMU_SUPPORT is not set
@@ -477,7 +480,6 @@ CONFIG_SERIAL_CONSOLE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -587,12 +589,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 17384dc959a5..fb4663904428 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -427,8 +428,10 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_HEARTBEAT=y
@@ -436,7 +439,6 @@ CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -546,12 +548,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 53a641d62f85..4ab393e86e52 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -442,7 +443,10 @@ CONFIG_DMASOUND_ATARI=m
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
+# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_HEARTBEAT=y
@@ -457,7 +461,6 @@ CONFIG_ATARI_DSP56K=m
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -567,12 +570,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 3925ae3a5eb3..1dd8d697545b 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index f4a134b390b4..02b39f50076e 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -47,6 +47,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -430,15 +431,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -548,12 +550,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 9ed0cef632b7..044dcb2bf8fb 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -46,6 +46,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -452,15 +453,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -570,12 +572,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index efed0d48fd53..3ad04682077a 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -56,6 +56,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -520,8 +521,10 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_MSM6242=m
CONFIG_RTC_DRV_RP5C01=m
CONFIG_RTC_DRV_GENERIC=m
@@ -540,7 +543,6 @@ CONFIG_SERIAL_CONSOLE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -650,12 +652,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 9040457c7f9c..dc2dd61948cd 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -44,6 +44,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 8b17f00e0484..54e7b523fc3d 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -420,15 +421,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -538,12 +540,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5f3718c62c85..d63d8a15f6db 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -45,6 +45,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -442,8 +443,10 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_HEARTBEAT=y
@@ -451,7 +454,6 @@ CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -561,12 +563,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8c979a68fca5..d0924c22f52a 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -42,6 +42,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -422,15 +423,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -540,12 +542,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a1e79530e806..3001ee1e5dc5 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -42,6 +42,7 @@ CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
+CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_INET=y
@@ -422,15 +423,16 @@ CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
# CONFIG_HID_GENERIC is not set
+# CONFIG_HID_ITE is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
+# CONFIG_RTC_NVMEM is not set
CONFIG_RTC_DRV_GENERIC=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_PROC_HARDWARE=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
-CONFIG_XFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FS_ENCRYPTION=m
@@ -540,12 +542,13 @@ CONFIG_TEST_BITMAP=m
CONFIG_TEST_UUID=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
-CONFIG_TEST_LKM=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
diff --git a/arch/m68k/include/asm/asm-prototypes.h b/arch/m68k/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..22ccb9c97576
--- /dev/null
+++ b/arch/m68k/include/asm/asm-prototypes.h
@@ -0,0 +1,5 @@
+extern int __divsi3(int, int);
+extern int __modsi3(int, int);
+extern int __mulsi3(int, int);
+extern unsigned int __udivsi3(unsigned int, unsigned int);
+extern unsigned int __umodsi3(unsigned int, unsigned int);
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 8aa8792e3174..d96348a52362 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -357,6 +357,17 @@ static void cuda_shutdown(void)
struct adb_request req;
if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN) < 0)
return;
+
+ /* Avoid infinite polling loop when PSU is not under Cuda control */
+ switch (macintosh_config->ident) {
+ case MAC_MODEL_C660:
+ case MAC_MODEL_Q605:
+ case MAC_MODEL_Q605_ACC:
+ case MAC_MODEL_P475:
+ case MAC_MODEL_P475F:
+ return;
+ }
+
while (!req.complete)
cuda_poll();
}
@@ -463,8 +474,9 @@ void mac_poweroff(void)
pmu_shutdown();
#endif
}
- local_irq_enable();
+
pr_crit("It is now safe to turn off your Macintosh.\n");
+ local_irq_disable();
while(1);
}
@@ -554,8 +566,8 @@ void mac_reset(void)
}
/* should never get here */
- local_irq_enable();
pr_crit("Restart failed. Please restart manually.\n");
+ local_irq_disable();
while(1);
}
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index 5b7a45d99cfb..7d8b322e5101 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -26,6 +26,7 @@ config METAG
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNDERSCORE_SYMBOL_PREFIX
select IRQ_DOMAIN
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK
select MODULES_USE_ELF_RELA
select OF
select OF_EARLY_FLATTREE
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 6c1380a8a0d4..eee779f26cc4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i)
return i;
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#define ATOMIC_OP(op, c_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index c0c7a22be1ae..ddf7fe5708a6 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -15,11 +15,6 @@
* locked.
*/
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
index e95f874ded1b..707c7f7b6bea 100644
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -4,7 +4,6 @@
#ifdef CONFIG_NUMA
#define cpu_to_node(cpu) ((void)(cpu), 0)
-#define parent_node(node) ((void)(node), 0)
#define cpumask_of_node(node) ((void)node, cpu_online_mask)
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4ed8ebf33509..9d26abdf0dc1 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -36,6 +36,22 @@ config MICROBLAZE
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
+# Endianness selection
+choice
+ prompt "Endianness selection"
+ default CPU_BIG_ENDIAN
+ help
+ microblaze architectures can be configured for either little or
+ big endian formats. Be sure to select the appropriate mode.
+
+config CPU_BIG_ENDIAN
+ bool "Big endian"
+
+config CPU_LITTLE_ENDIAN
+ bool "Little endian"
+
+endchoice
+
config SWAP
def_bool n
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 740f2b82a182..1f6c486826a0 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -35,6 +35,8 @@ endif
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift
CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
+CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian
+CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian
CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h
index f23c3d266bae..3d2747d4c967 100644
--- a/arch/microblaze/include/asm/flat.h
+++ b/arch/microblaze/include/asm/flat.h
@@ -60,7 +60,7 @@ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags,
* unaligned.
*/
-static inline void
+static inline int
flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 relval)
{
u32 *p = (__force u32 *)rp;
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index 01848f056f43..a9dad9e5e132 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,18 +29,9 @@
})
static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
pagefault_disable();
@@ -66,30 +57,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (oldval == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (oldval != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (oldval < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (oldval >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (oldval <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (oldval > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index efd4983cb697..114b93488193 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -81,9 +81,6 @@ extern pgprot_t pci_phys_mem_access_prot(struct file *file,
#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-extern void pcibios_setup_bus_devices(struct pci_bus *bus);
-extern void pcibios_setup_bus_self(struct pci_bus *bus);
-
/* This part of code was originally in xilinx-pci.h */
#ifdef CONFIG_PCI_XILINX
extern void __init xilinx_pci_init(void);
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index ea2d83f1f4bb..7de941cbbd94 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -293,7 +293,7 @@ static int __init xilinx_timer_init(struct device_node *timer)
return -EINVAL;
}
- pr_info("%s: irq=%d\n", timer->full_name, irq);
+ pr_info("%pOF: irq=%d\n", timer, irq);
clk = of_clk_get(timer, 0);
if (IS_ERR(clk)) {
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 404fb38d06b7..ae79e8638d50 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -508,8 +508,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct of_pci_range range;
struct of_pci_range_parser parser;
- pr_info("PCI host bridge %s %s ranges:\n",
- dev->full_name, primary ? "(primary)" : "");
+ pr_info("PCI host bridge %pOF %s ranges:\n",
+ dev, primary ? "(primary)" : "");
/* Check for ranges property */
if (of_pci_range_parser_init(&parser, dev))
@@ -678,144 +678,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
-/* This function tries to figure out if a bridge resource has been initialized
- * by the firmware or not. It doesn't have to be absolutely bullet proof, but
- * things go more smoothly when it gets it right. It should covers cases such
- * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges
- */
-static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
- struct resource *res)
-{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pci_dev *dev = bus->self;
- resource_size_t offset;
- u16 command;
- int i;
-
- /* Job is a bit different between memory and IO */
- if (res->flags & IORESOURCE_MEM) {
- /* If the BAR is non-0 (res != pci_mem_offset) then it's
- * probably been initialized by somebody
- */
- if (res->start != hose->pci_mem_offset)
- return 0;
-
- /* The BAR is 0, let's check if memory decoding is enabled on
- * the bridge. If not, we consider it unassigned
- */
- pci_read_config_word(dev, PCI_COMMAND, &command);
- if ((command & PCI_COMMAND_MEMORY) == 0)
- return 1;
-
- /* Memory decoding is enabled and the BAR is 0. If any of
- * the bridge resources covers that starting address (0 then
- * it's good enough for us for memory
- */
- for (i = 0; i < 3; i++) {
- if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
- hose->mem_resources[i].start == hose->pci_mem_offset)
- return 0;
- }
-
- /* Well, it starts at 0 and we know it will collide so we may as
- * well consider it as unassigned. That covers the Apple case.
- */
- return 1;
- } else {
- /* If the BAR is non-0, then we consider it assigned */
- offset = (unsigned long)hose->io_base_virt - _IO_BASE;
- if (((res->start - offset) & 0xfffffffful) != 0)
- return 0;
-
- /* Here, we are a bit different than memory as typically IO
- * space starting at low addresses -is- valid. What we do
- * instead if that we consider as unassigned anything that
- * doesn't have IO enabled in the PCI command register,
- * and that's it.
- */
- pci_read_config_word(dev, PCI_COMMAND, &command);
- if (command & PCI_COMMAND_IO)
- return 0;
-
- /* It's starting at 0 and IO is disabled in the bridge, consider
- * it unassigned
- */
- return 1;
- }
-}
-
-/* Fixup resources of a PCI<->PCI bridge */
-static void pcibios_fixup_bridge(struct pci_bus *bus)
-{
- struct resource *res;
- int i;
-
- struct pci_dev *dev = bus->self;
-
- pci_bus_for_each_resource(bus, res, i) {
- if (!res)
- continue;
- if (!res->flags)
- continue;
- if (i >= 3 && bus->self->transparent)
- continue;
-
- pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n",
- pci_name(dev), i,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned int)res->flags);
-
- /* Try to detect uninitialized P2P bridge resources,
- * and clear them out so they get re-assigned later
- */
- if (pcibios_uninitialized_bridge_resource(bus, res)) {
- res->flags = 0;
- pr_debug("PCI:%s (unassigned)\n",
- pci_name(dev));
- } else {
- pr_debug("PCI:%s %016llx-%016llx\n",
- pci_name(dev),
- (unsigned long long)res->start,
- (unsigned long long)res->end);
- }
- }
-}
-
-void pcibios_setup_bus_self(struct pci_bus *bus)
-{
- /* Fix up the bus resources for P2P bridges */
- if (bus->self != NULL)
- pcibios_fixup_bridge(bus);
-}
-
-void pcibios_setup_bus_devices(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- pr_debug("PCI: Fixup bus devices %d (%s)\n",
- bus->number, bus->self ? pci_name(bus->self) : "PHB");
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Setup OF node pointer in archdata */
- dev->dev.of_node = pci_device_to_OF_node(dev);
-
- /* Fixup NUMA node as it may not be setup yet by the generic
- * code and is needed by the DMA init
- */
- set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
-
- /* Read default IRQs and fixup if necessary */
- dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
- }
-}
-
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
- /* nothing to do */
-}
-EXPORT_SYMBOL(pcibios_fixup_bus);
-
/*
* We need to avoid collisions with `mirrored' VGA ports
* and other strange ISA hardware, so we always want the
@@ -829,13 +691,6 @@ EXPORT_SYMBOL(pcibios_fixup_bus);
* but we want to try to avoid allocating at 0x2900-0x2bff
* which might have be mirrored at 0x0100-0x03ff..
*/
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
int pcibios_add_device(struct pci_dev *dev)
{
dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
@@ -1219,8 +1074,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
if (!res->flags) {
pr_warn("PCI: I/O resource not set for host ");
- pr_cont("bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ pr_cont("bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
/* Workaround for lack of IO resource only on 32-bit */
res->start = (unsigned long)hose->io_base_virt - isa_io_base;
res->end = res->start + IO_SPACE_LIMIT;
@@ -1241,8 +1096,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
if (i > 0)
continue;
pr_err("PCI: Memory resource 0 not set for ");
- pr_cont("host bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ pr_cont("host bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
/* Workaround for lack of MEM resource only on 32-bit */
res->start = hose->pci_mem_offset;
@@ -1270,7 +1125,7 @@ static void pcibios_scan_phb(struct pci_controller *hose)
struct pci_bus *bus;
struct device_node *node = hose->dn;
- pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+ pr_debug("PCI: Scanning PHB %pOF\n", node);
pcibios_setup_phb_resources(hose, &resources);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8dd20358464f..48d91d5be4e9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2260,7 +2260,7 @@ config CPU_R4K_CACHE_TLB
config MIPS_MT_SMP
bool "MIPS MT SMP support (1 TC on each available VPE)"
- depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6
+ depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 && !CPU_MICROMIPS
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select SYNC_R4K
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 04343625b929..bc2708c9ada4 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -243,8 +243,21 @@ include arch/mips/Kbuild.platforms
ifdef CONFIG_PHYSICAL_START
load-y = $(CONFIG_PHYSICAL_START)
endif
-entry-y = 0x$(shell $(NM) vmlinux 2>/dev/null \
+
+entry-noisa-y = 0x$(shell $(NM) vmlinux 2>/dev/null \
| grep "\bkernel_entry\b" | cut -f1 -d \ )
+ifdef CONFIG_CPU_MICROMIPS
+ #
+ # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
+ # clear which would lead to images containing addresses which bootloaders may
+ # jump to as MIPS32 code.
+ #
+ entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
+ $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
+ $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
+else
+ entry-y = $(entry-noisa-y)
+endif
cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
drivers-$(CONFIG_PCI) += arch/mips/pci/
diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore
new file mode 100644
index 000000000000..ebae133f1d00
--- /dev/null
+++ b/arch/mips/boot/compressed/.gitignore
@@ -0,0 +1,2 @@
+ashldi3.c
+bswapsi.c
diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c
index 542be1cd0f32..bfdfaf32d2c4 100644
--- a/arch/mips/cavium-octeon/octeon-usb.c
+++ b/arch/mips/cavium-octeon/octeon-usb.c
@@ -13,9 +13,9 @@
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/of_platform.h>
+#include <linux/io.h>
#include <asm/octeon/octeon.h>
-#include <asm/octeon/cvmx-gpio-defs.h>
/* USB Control Register */
union cvm_usbdrd_uctl_ctl {
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
index 7d32fbbca962..3598d58aac30 100644
--- a/arch/mips/configs/pistachio_defconfig
+++ b/arch/mips/configs/pistachio_defconfig
@@ -207,7 +207,7 @@ CONFIG_IMGPDC_WDT=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_REGULATOR_GPIO=y
CONFIG_MEDIA_SUPPORT=y
-CONFIG_MEDIA_RC_SUPPORT=y
+CONFIG_RC_CORE=y
# CONFIG_RC_DECODERS is not set
CONFIG_RC_DEVICES=y
CONFIG_IR_IMG=y
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index 1910223a9c02..cea2bb1621e6 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -147,23 +147,12 @@
* Find irq with highest priority
*/
# open coded PTR_LA t1, cpu_mask_nr_tbl
-#if (_MIPS_SZPTR == 32)
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
# open coded la t1, cpu_mask_nr_tbl
lui t1, %hi(cpu_mask_nr_tbl)
addiu t1, %lo(cpu_mask_nr_tbl)
-
-#endif
-#if (_MIPS_SZPTR == 64)
- # open coded dla t1, cpu_mask_nr_tbl
- .set push
- .set noat
- lui t1, %highest(cpu_mask_nr_tbl)
- lui AT, %hi(cpu_mask_nr_tbl)
- daddiu t1, t1, %higher(cpu_mask_nr_tbl)
- daddiu AT, AT, %lo(cpu_mask_nr_tbl)
- dsll t1, 32
- daddu t1, t1, AT
- .set pop
+#else
+#error GCC `-msym32' option required for 64-bit DECstation builds
#endif
1: lw t2,(t1)
nop
@@ -214,23 +203,12 @@
* Find irq with highest priority
*/
# open coded PTR_LA t1,asic_mask_nr_tbl
-#if (_MIPS_SZPTR == 32)
+#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
# open coded la t1, asic_mask_nr_tbl
lui t1, %hi(asic_mask_nr_tbl)
addiu t1, %lo(asic_mask_nr_tbl)
-
-#endif
-#if (_MIPS_SZPTR == 64)
- # open coded dla t1, asic_mask_nr_tbl
- .set push
- .set noat
- lui t1, %highest(asic_mask_nr_tbl)
- lui AT, %hi(asic_mask_nr_tbl)
- daddiu t1, t1, %higher(asic_mask_nr_tbl)
- daddiu AT, AT, %lo(asic_mask_nr_tbl)
- dsll t1, 32
- daddu t1, t1, AT
- .set pop
+#else
+#error GCC `-msym32' option required for 64-bit DECstation builds
#endif
2: lw t2,(t1)
nop
diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index fc67947ed658..8b14c2706aa5 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -9,6 +9,8 @@
#ifndef _ASM_CACHE_H
#define _ASM_CACHE_H
+#include <kmalloc.h>
+
#define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 8baa9033b181..721b698bfe3c 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -428,6 +428,9 @@
#ifndef cpu_scache_line_size
#define cpu_scache_line_size() cpu_data[0].scache.linesz
#endif
+#ifndef cpu_tcache_line_size
+#define cpu_tcache_line_size() cpu_data[0].tcache.linesz
+#endif
#ifndef cpu_hwrena_impl_bits
#define cpu_hwrena_impl_bits 0
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 1de190bdfb9c..a9e61ea54ca9 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -83,18 +83,9 @@
}
static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
pagefault_disable();
@@ -125,17 +116,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 2998479fd4e8..a9af1d2dcd69 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -938,11 +938,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
-{
-}
-
/* Emulation */
int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
index d045973ddb33..3ea84acf1814 100644
--- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
+++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h
@@ -33,6 +33,10 @@
#define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull))
#define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull))
#define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull))
+#define CVMX_L2C_ERR_TDTX(block_id) \
+ (CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull)
+#define CVMX_L2C_ERR_TTGX(block_id) \
+ (CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull)
#define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull))
#define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull))
#define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull))
@@ -66,9 +70,40 @@
((offset) & 1) * 8)
#define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull) + \
((offset) & 31) * 8)
-#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
+union cvmx_l2c_err_tdtx {
+ uint64_t u64;
+ struct cvmx_l2c_err_tdtx_s {
+ __BITFIELD_FIELD(uint64_t dbe:1,
+ __BITFIELD_FIELD(uint64_t sbe:1,
+ __BITFIELD_FIELD(uint64_t vdbe:1,
+ __BITFIELD_FIELD(uint64_t vsbe:1,
+ __BITFIELD_FIELD(uint64_t syn:10,
+ __BITFIELD_FIELD(uint64_t reserved_22_49:28,
+ __BITFIELD_FIELD(uint64_t wayidx:18,
+ __BITFIELD_FIELD(uint64_t reserved_2_3:2,
+ __BITFIELD_FIELD(uint64_t type:2,
+ ;)))))))))
+ } s;
+};
+
+union cvmx_l2c_err_ttgx {
+ uint64_t u64;
+ struct cvmx_l2c_err_ttgx_s {
+ __BITFIELD_FIELD(uint64_t dbe:1,
+ __BITFIELD_FIELD(uint64_t sbe:1,
+ __BITFIELD_FIELD(uint64_t noway:1,
+ __BITFIELD_FIELD(uint64_t reserved_56_60:5,
+ __BITFIELD_FIELD(uint64_t syn:6,
+ __BITFIELD_FIELD(uint64_t reserved_22_49:28,
+ __BITFIELD_FIELD(uint64_t wayidx:15,
+ __BITFIELD_FIELD(uint64_t reserved_2_6:5,
+ __BITFIELD_FIELD(uint64_t type:2,
+ ;)))))))))
+ } s;
+};
+
union cvmx_l2c_cfg {
uint64_t u64;
struct cvmx_l2c_cfg_s {
diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
new file mode 100644
index 000000000000..a951ad5d65ad
--- /dev/null
+++ b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h
@@ -0,0 +1,60 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2017 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+#ifndef __CVMX_L2D_DEFS_H__
+#define __CVMX_L2D_DEFS_H__
+
+#define CVMX_L2D_ERR (CVMX_ADD_IO_SEG(0x0001180080000010ull))
+#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull))
+
+
+union cvmx_l2d_err {
+ uint64_t u64;
+ struct cvmx_l2d_err_s {
+ __BITFIELD_FIELD(uint64_t reserved_6_63:58,
+ __BITFIELD_FIELD(uint64_t bmhclsel:1,
+ __BITFIELD_FIELD(uint64_t ded_err:1,
+ __BITFIELD_FIELD(uint64_t sec_err:1,
+ __BITFIELD_FIELD(uint64_t ded_intena:1,
+ __BITFIELD_FIELD(uint64_t sec_intena:1,
+ __BITFIELD_FIELD(uint64_t ecc_ena:1,
+ ;)))))))
+ } s;
+};
+
+union cvmx_l2d_fus3 {
+ uint64_t u64;
+ struct cvmx_l2d_fus3_s {
+ __BITFIELD_FIELD(uint64_t reserved_40_63:24,
+ __BITFIELD_FIELD(uint64_t ema_ctl:3,
+ __BITFIELD_FIELD(uint64_t reserved_34_36:3,
+ __BITFIELD_FIELD(uint64_t q3fus:34,
+ ;))))
+ } s;
+};
+
+#endif
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 9742202f2a32..e638735cc3ac 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -62,6 +62,7 @@ enum cvmx_mips_space {
#include <asm/octeon/cvmx-iob-defs.h>
#include <asm/octeon/cvmx-ipd-defs.h>
#include <asm/octeon/cvmx-l2c-defs.h>
+#include <asm/octeon/cvmx-l2d-defs.h>
#include <asm/octeon/cvmx-l2t-defs.h>
#include <asm/octeon/cvmx-led-defs.h>
#include <asm/octeon/cvmx-mio-defs.h>
diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
index f82c83749a08..975ff51f80c4 100644
--- a/arch/mips/include/asm/vga.h
+++ b/arch/mips/include/asm/vga.h
@@ -6,6 +6,7 @@
#ifndef _ASM_VGA_H
#define _ASM_VGA_H
+#include <linux/string.h>
#include <asm/addrspace.h>
#include <asm/byteorder.h>
@@ -40,9 +41,15 @@ static inline u16 scr_readw(volatile const u16 *addr)
return le16_to_cpu(*addr);
}
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int count)
+{
+ memset16(s, cpu_to_le16(v), count / 2);
+}
+
#define scr_memcpyw(d, s, c) memcpy(d, s, c)
#define scr_memmovew(d, s, c) memmove(d, s, c)
#define VT_BUF_HAVE_MEMCPYW
#define VT_BUF_HAVE_MEMMOVEW
+#define VT_BUF_HAVE_MEMSETW
#endif /* _ASM_VGA_H */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 655e2fb5395b..da3216007fe0 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -91,20 +91,12 @@
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
/* compatibility flags */
#define MAP_FILE 0
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 882823bec153..6c755bc07975 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -120,4 +120,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 6dd13641a418..1395654cfc8d 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -872,15 +872,13 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
if (unlikely(test_thread_flag(TIF_SECCOMP))) {
int ret, i;
struct seccomp_data sd;
+ unsigned long args[6];
sd.nr = syscall;
sd.arch = syscall_get_arch();
- for (i = 0; i < 6; i++) {
- unsigned long v, r;
-
- r = mips_get_syscall_arg(&v, current, regs, i);
- sd.args[i] = r ? 0 : v;
- }
+ syscall_get_arguments(current, regs, 0, 6, args);
+ for (i = 0; i < 6; i++)
+ sd.args[i] = args[i];
sd.instruction_pointer = KSTK_EIP(current);
ret = __secure_computing(&sd);
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 27c2f90eeb21..a9a7d78803cd 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -190,12 +190,6 @@ illegal_syscall:
sll t1, t0, 2
beqz v0, einval
lw t2, sys_call_table(t1) # syscall routine
- sw a0, PT_R2(sp) # call routine directly on restart
-
- /* Some syscalls like execve get their arguments from struct pt_regs
- and claim zero arguments in the syscall table. Thus we have to
- assume the worst case and shuffle around all potential arguments.
- If you want performance, don't use indirect syscalls. */
move a0, a1 # shift argument registers
move a1, a2
@@ -207,11 +201,6 @@ illegal_syscall:
sw t4, 16(sp)
sw t5, 20(sp)
sw t6, 24(sp)
- sw a0, PT_R4(sp) # .. and push back a0 - a3, some
- sw a1, PT_R5(sp) # syscalls expect them there
- sw a2, PT_R6(sp)
- sw a3, PT_R7(sp)
- sw a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index c30bc520885f..9ebe3e2403b1 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -198,7 +198,6 @@ LEAF(sys32_syscall)
dsll t1, t0, 3
beqz v0, einval
ld t2, sys32_call_table(t1) # syscall routine
- sd a0, PT_R2(sp) # call routine directly on restart
move a0, a1 # shift argument registers
move a1, a2
@@ -207,11 +206,6 @@ LEAF(sys32_syscall)
move a4, a5
move a5, a6
move a6, a7
- sd a0, PT_R4(sp) # ... and push back a0 - a3, some
- sd a1, PT_R5(sp) # syscalls expect them there
- sd a2, PT_R6(sp)
- sd a3, PT_R7(sp)
- sd a3, PT_R26(sp) # update a3 for syscall restarting
jr t2
/* Unreached */
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 770d4d1516cb..c7cbddfcdc3b 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -376,9 +376,6 @@ asmlinkage void start_secondary(void)
cpumask_set_cpu(cpu, &cpu_coherent_mask);
notify_cpu_starting(cpu);
- complete(&cpu_running);
- synchronise_count_slave(cpu);
-
set_cpu_online(cpu, true);
set_cpu_sibling_map(cpu);
@@ -386,6 +383,9 @@ asmlinkage void start_secondary(void)
calculate_cpu_foreign_map();
+ complete(&cpu_running);
+ synchronise_count_slave(cpu);
+
/*
* irq will be enabled in ->smp_finish(), enabling it too early
* is dangerous.
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
-static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd);
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
void tick_broadcast(const struct cpumask *mask)
{
atomic_t *count;
- struct call_single_data *csd;
+ call_single_data_t *csd;
int cpu;
for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
static int __init tick_broadcast_init(void)
{
- struct call_single_data *csd;
+ call_single_data_t *csd;
int cpu;
for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d4b2ad18eef2..bce2a6431430 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return !!(vcpu->arch.pending_exceptions);
}
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return 1;
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 3f74f6c1f065..9fea6c6bbf49 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -48,7 +48,7 @@
#include "uasm.c"
-static const struct insn const insn_table[insn_invalid] = {
+static const struct insn insn_table[insn_invalid] = {
[insn_addiu] = {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
[insn_addu] = {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD},
[insn_and] = {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD},
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
new file mode 100644
index 000000000000..7646891c4e9b
--- /dev/null
+++ b/arch/mips/net/ebpf_jit.c
@@ -0,0 +1,1995 @@
+/*
+ * Just-In-Time compiler for eBPF filters on MIPS
+ *
+ * Copyright (c) 2017 Cavium, Inc.
+ *
+ * Based on code from:
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-features.h>
+#include <asm/uasm.h>
+
+/* Registers used by JIT */
+#define MIPS_R_ZERO 0
+#define MIPS_R_AT 1
+#define MIPS_R_V0 2 /* BPF_R0 */
+#define MIPS_R_V1 3
+#define MIPS_R_A0 4 /* BPF_R1 */
+#define MIPS_R_A1 5 /* BPF_R2 */
+#define MIPS_R_A2 6 /* BPF_R3 */
+#define MIPS_R_A3 7 /* BPF_R4 */
+#define MIPS_R_A4 8 /* BPF_R5 */
+#define MIPS_R_T4 12 /* BPF_AX */
+#define MIPS_R_T5 13
+#define MIPS_R_T6 14
+#define MIPS_R_T7 15
+#define MIPS_R_S0 16 /* BPF_R6 */
+#define MIPS_R_S1 17 /* BPF_R7 */
+#define MIPS_R_S2 18 /* BPF_R8 */
+#define MIPS_R_S3 19 /* BPF_R9 */
+#define MIPS_R_S4 20 /* BPF_TCC */
+#define MIPS_R_S5 21
+#define MIPS_R_S6 22
+#define MIPS_R_S7 23
+#define MIPS_R_T8 24
+#define MIPS_R_T9 25
+#define MIPS_R_SP 29
+#define MIPS_R_RA 31
+
+/* eBPF flags */
+#define EBPF_SAVE_S0 BIT(0)
+#define EBPF_SAVE_S1 BIT(1)
+#define EBPF_SAVE_S2 BIT(2)
+#define EBPF_SAVE_S3 BIT(3)
+#define EBPF_SAVE_S4 BIT(4)
+#define EBPF_SAVE_RA BIT(5)
+#define EBPF_SEEN_FP BIT(6)
+#define EBPF_SEEN_TC BIT(7)
+#define EBPF_TCC_IN_V1 BIT(8)
+
+/*
+ * For the mips64 ISA, we need to track the value range or type for
+ * each JIT register. The BPF machine requires zero extended 32-bit
+ * values, but the mips64 ISA requires sign extended 32-bit values.
+ * At each point in the BPF program we track the state of every
+ * register so that we can zero extend or sign extend as the BPF
+ * semantics require.
+ */
+enum reg_val_type {
+ /* uninitialized */
+ REG_UNKNOWN,
+ /* not known to be 32-bit compatible. */
+ REG_64BIT,
+ /* 32-bit compatible, no truncation needed for 64-bit ops. */
+ REG_64BIT_32BIT,
+ /* 32-bit compatible, need truncation for 64-bit ops. */
+ REG_32BIT,
+ /* 32-bit zero extended. */
+ REG_32BIT_ZERO_EX,
+ /* 32-bit no sign/zero extension needed. */
+ REG_32BIT_POS
+};
+
+/*
+ * high bit of offsets indicates if long branch conversion done at
+ * this insn.
+ */
+#define OFFSETS_B_CONV BIT(31)
+
+/**
+ * struct jit_ctx - JIT context
+ * @skf: The sk_filter
+ * @stack_size: eBPF stack size
+ * @tmp_offset: eBPF $sp offset to 8-byte temporary memory
+ * @idx: Instruction index
+ * @flags: JIT flags
+ * @offsets: Instruction offsets
+ * @target: Memory location for the compiled filter
+ * @reg_val_types Packed enum reg_val_type for each register.
+ */
+struct jit_ctx {
+ const struct bpf_prog *skf;
+ int stack_size;
+ int tmp_offset;
+ u32 idx;
+ u32 flags;
+ u32 *offsets;
+ u32 *target;
+ u64 *reg_val_types;
+ unsigned int long_b_conversion:1;
+ unsigned int gen_b_offsets:1;
+ unsigned int use_bbit_insns:1;
+};
+
+static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
+{
+ *rvt &= ~(7ull << (reg * 3));
+ *rvt |= ((u64)type << (reg * 3));
+}
+
+static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx,
+ int index, int reg)
+{
+ return (ctx->reg_val_types[index] >> (reg * 3)) & 7;
+}
+
+/* Simply emit the instruction if the JIT memory space has been allocated */
+#define emit_instr(ctx, func, ...) \
+do { \
+ if ((ctx)->target != NULL) { \
+ u32 *p = &(ctx)->target[ctx->idx]; \
+ uasm_i_##func(&p, ##__VA_ARGS__); \
+ } \
+ (ctx)->idx++; \
+} while (0)
+
+static unsigned int j_target(struct jit_ctx *ctx, int target_idx)
+{
+ unsigned long target_va, base_va;
+ unsigned int r;
+
+ if (!ctx->target)
+ return 0;
+
+ base_va = (unsigned long)ctx->target;
+ target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV);
+
+ if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful))
+ return (unsigned int)-1;
+ r = target_va & 0x0ffffffful;
+ return r;
+}
+
+/* Compute the immediate value for PC-relative branches. */
+static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
+{
+ if (!ctx->gen_b_offsets)
+ return 0;
+
+ /*
+ * We want a pc-relative branch. tgt is the instruction offset
+ * we want to jump to.
+
+ * Branch on MIPS:
+ * I: target_offset <- sign_extend(offset)
+ * I+1: PC += target_offset (delay slot)
+ *
+ * ctx->idx currently points to the branch instruction
+ * but the offset is added to the delay slot so we need
+ * to subtract 4.
+ */
+ return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) -
+ (ctx->idx * 4) - 4;
+}
+
+int bpf_jit_enable __read_mostly;
+
+enum which_ebpf_reg {
+ src_reg,
+ src_reg_no_fp,
+ dst_reg,
+ dst_reg_fp_ok
+};
+
+/*
+ * For eBPF, the register mapping naturally falls out of the
+ * requirements of eBPF and the MIPS n64 ABI. We don't maintain a
+ * separate frame pointer, so BPF_REG_10 relative accesses are
+ * adjusted to be $sp relative.
+ */
+int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
+ enum which_ebpf_reg w)
+{
+ int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
+ insn->src_reg : insn->dst_reg;
+
+ switch (ebpf_reg) {
+ case BPF_REG_0:
+ return MIPS_R_V0;
+ case BPF_REG_1:
+ return MIPS_R_A0;
+ case BPF_REG_2:
+ return MIPS_R_A1;
+ case BPF_REG_3:
+ return MIPS_R_A2;
+ case BPF_REG_4:
+ return MIPS_R_A3;
+ case BPF_REG_5:
+ return MIPS_R_A4;
+ case BPF_REG_6:
+ ctx->flags |= EBPF_SAVE_S0;
+ return MIPS_R_S0;
+ case BPF_REG_7:
+ ctx->flags |= EBPF_SAVE_S1;
+ return MIPS_R_S1;
+ case BPF_REG_8:
+ ctx->flags |= EBPF_SAVE_S2;
+ return MIPS_R_S2;
+ case BPF_REG_9:
+ ctx->flags |= EBPF_SAVE_S3;
+ return MIPS_R_S3;
+ case BPF_REG_10:
+ if (w == dst_reg || w == src_reg_no_fp)
+ goto bad_reg;
+ ctx->flags |= EBPF_SEEN_FP;
+ /*
+ * Needs special handling, return something that
+ * cannot be clobbered just in case.
+ */
+ return MIPS_R_ZERO;
+ case BPF_REG_AX:
+ return MIPS_R_T4;
+ default:
+bad_reg:
+ WARN(1, "Illegal bpf reg: %d\n", ebpf_reg);
+ return -EINVAL;
+ }
+}
+/*
+ * eBPF stack frame will be something like:
+ *
+ * Entry $sp ------> +--------------------------------+
+ * | $ra (optional) |
+ * +--------------------------------+
+ * | $s0 (optional) |
+ * +--------------------------------+
+ * | $s1 (optional) |
+ * +--------------------------------+
+ * | $s2 (optional) |
+ * +--------------------------------+
+ * | $s3 (optional) |
+ * +--------------------------------+
+ * | $s4 (optional) |
+ * +--------------------------------+
+ * | tmp-storage (if $ra saved) |
+ * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10
+ * | BPF_REG_10 relative storage |
+ * | MAX_BPF_STACK (optional) |
+ * | . |
+ * | . |
+ * | . |
+ * $sp --------> +--------------------------------+
+ *
+ * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized
+ * area is not allocated.
+ */
+static int gen_int_prologue(struct jit_ctx *ctx)
+{
+ int stack_adjust = 0;
+ int store_offset;
+ int locals_size;
+
+ if (ctx->flags & EBPF_SAVE_RA)
+ /*
+ * If RA we are doing a function call and may need
+ * extra 8-byte tmp area.
+ */
+ stack_adjust += 16;
+ if (ctx->flags & EBPF_SAVE_S0)
+ stack_adjust += 8;
+ if (ctx->flags & EBPF_SAVE_S1)
+ stack_adjust += 8;
+ if (ctx->flags & EBPF_SAVE_S2)
+ stack_adjust += 8;
+ if (ctx->flags & EBPF_SAVE_S3)
+ stack_adjust += 8;
+ if (ctx->flags & EBPF_SAVE_S4)
+ stack_adjust += 8;
+
+ BUILD_BUG_ON(MAX_BPF_STACK & 7);
+ locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
+
+ stack_adjust += locals_size;
+ ctx->tmp_offset = locals_size;
+
+ ctx->stack_size = stack_adjust;
+
+ /*
+ * First instruction initializes the tail call count (TCC).
+ * On tail call we skip this instruction, and the TCC is
+ * passed in $v1 from the caller.
+ */
+ emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
+ if (stack_adjust)
+ emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust);
+ else
+ return 0;
+
+ store_offset = stack_adjust - 8;
+
+ if (ctx->flags & EBPF_SAVE_RA) {
+ emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S0) {
+ emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S1) {
+ emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S2) {
+ emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S3) {
+ emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S4) {
+ emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+
+ if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1))
+ emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO);
+
+ return 0;
+}
+
+static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
+{
+ const struct bpf_prog *prog = ctx->skf;
+ int stack_adjust = ctx->stack_size;
+ int store_offset = stack_adjust - 8;
+ int r0 = MIPS_R_V0;
+
+ if (dest_reg == MIPS_R_RA &&
+ get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX)
+ /* Don't let zero extended value escape. */
+ emit_instr(ctx, sll, r0, r0, 0);
+
+ if (ctx->flags & EBPF_SAVE_RA) {
+ emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S0) {
+ emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S1) {
+ emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S2) {
+ emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S3) {
+ emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ if (ctx->flags & EBPF_SAVE_S4) {
+ emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP);
+ store_offset -= 8;
+ }
+ emit_instr(ctx, jr, dest_reg);
+
+ if (stack_adjust)
+ emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust);
+ else
+ emit_instr(ctx, nop);
+
+ return 0;
+}
+
+static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
+ struct jit_ctx *ctx)
+{
+ if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
+ emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm);
+ } else {
+ int lower = (s16)(insn->imm & 0xffff);
+ int upper = insn->imm - lower;
+
+ emit_instr(ctx, lui, reg, upper >> 16);
+ emit_instr(ctx, addiu, reg, reg, lower);
+ }
+
+}
+
+static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ int idx)
+{
+ int upper_bound, lower_bound;
+ int dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+
+ if (dst < 0)
+ return dst;
+
+ switch (BPF_OP(insn->code)) {
+ case BPF_MOV:
+ case BPF_ADD:
+ upper_bound = S16_MAX;
+ lower_bound = S16_MIN;
+ break;
+ case BPF_SUB:
+ upper_bound = -(int)S16_MIN;
+ lower_bound = -(int)S16_MAX;
+ break;
+ case BPF_AND:
+ case BPF_OR:
+ case BPF_XOR:
+ upper_bound = 0xffff;
+ lower_bound = 0;
+ break;
+ case BPF_RSH:
+ case BPF_LSH:
+ case BPF_ARSH:
+ /* Shift amounts are truncated, no need for bounds */
+ upper_bound = S32_MAX;
+ lower_bound = S32_MIN;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * Immediate move clobbers the register, so no sign/zero
+ * extension needed.
+ */
+ if (BPF_CLASS(insn->code) == BPF_ALU64 &&
+ BPF_OP(insn->code) != BPF_MOV &&
+ get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ /* BPF_ALU | BPF_LSH doesn't need separate sign extension */
+ if (BPF_CLASS(insn->code) == BPF_ALU &&
+ BPF_OP(insn->code) != BPF_LSH &&
+ BPF_OP(insn->code) != BPF_MOV &&
+ get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT)
+ emit_instr(ctx, sll, dst, dst, 0);
+
+ if (insn->imm >= lower_bound && insn->imm <= upper_bound) {
+ /* single insn immediate case */
+ switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+ case BPF_ALU64 | BPF_MOV:
+ emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm);
+ break;
+ case BPF_ALU64 | BPF_AND:
+ case BPF_ALU | BPF_AND:
+ emit_instr(ctx, andi, dst, dst, insn->imm);
+ break;
+ case BPF_ALU64 | BPF_OR:
+ case BPF_ALU | BPF_OR:
+ emit_instr(ctx, ori, dst, dst, insn->imm);
+ break;
+ case BPF_ALU64 | BPF_XOR:
+ case BPF_ALU | BPF_XOR:
+ emit_instr(ctx, xori, dst, dst, insn->imm);
+ break;
+ case BPF_ALU64 | BPF_ADD:
+ emit_instr(ctx, daddiu, dst, dst, insn->imm);
+ break;
+ case BPF_ALU64 | BPF_SUB:
+ emit_instr(ctx, daddiu, dst, dst, -insn->imm);
+ break;
+ case BPF_ALU64 | BPF_RSH:
+ emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f);
+ break;
+ case BPF_ALU | BPF_RSH:
+ emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f);
+ break;
+ case BPF_ALU64 | BPF_LSH:
+ emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f);
+ break;
+ case BPF_ALU | BPF_LSH:
+ emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f);
+ break;
+ case BPF_ALU64 | BPF_ARSH:
+ emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f);
+ break;
+ case BPF_ALU | BPF_ARSH:
+ emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f);
+ break;
+ case BPF_ALU | BPF_MOV:
+ emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm);
+ break;
+ case BPF_ALU | BPF_ADD:
+ emit_instr(ctx, addiu, dst, dst, insn->imm);
+ break;
+ case BPF_ALU | BPF_SUB:
+ emit_instr(ctx, addiu, dst, dst, -insn->imm);
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ /* multi insn immediate case */
+ if (BPF_OP(insn->code) == BPF_MOV) {
+ gen_imm_to_reg(insn, dst, ctx);
+ } else {
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+ case BPF_ALU64 | BPF_AND:
+ case BPF_ALU | BPF_AND:
+ emit_instr(ctx, and, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU64 | BPF_OR:
+ case BPF_ALU | BPF_OR:
+ emit_instr(ctx, or, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU64 | BPF_XOR:
+ case BPF_ALU | BPF_XOR:
+ emit_instr(ctx, xor, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU64 | BPF_ADD:
+ emit_instr(ctx, daddu, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU64 | BPF_SUB:
+ emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU | BPF_ADD:
+ emit_instr(ctx, addu, dst, dst, MIPS_R_AT);
+ break;
+ case BPF_ALU | BPF_SUB:
+ emit_instr(ctx, subu, dst, dst, MIPS_R_AT);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void * __must_check
+ool_skb_header_pointer(const struct sk_buff *skb, int offset,
+ int len, void *buffer)
+{
+ return skb_header_pointer(skb, offset, len, buffer);
+}
+
+static int size_to_len(const struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_B:
+ return 1;
+ case BPF_H:
+ return 2;
+ case BPF_W:
+ return 4;
+ case BPF_DW:
+ return 8;
+ }
+ return 0;
+}
+
+static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
+{
+ if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
+ emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value);
+ } else if (value >= 0xffffffff80000000ull ||
+ (value < 0x80000000 && value > 0xffff)) {
+ emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16));
+ emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff));
+ } else {
+ int i;
+ bool seen_part = false;
+ int needed_shift = 0;
+
+ for (i = 0; i < 4; i++) {
+ u64 part = (value >> (16 * (3 - i))) & 0xffff;
+
+ if (seen_part && needed_shift > 0 && (part || i == 3)) {
+ emit_instr(ctx, dsll_safe, dst, dst, needed_shift);
+ needed_shift = 0;
+ }
+ if (part) {
+ if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) {
+ emit_instr(ctx, lui, dst, (s32)(s16)part);
+ needed_shift = -16;
+ } else {
+ emit_instr(ctx, ori, dst,
+ seen_part ? dst : MIPS_R_ZERO,
+ (unsigned int)part);
+ }
+ seen_part = true;
+ }
+ if (seen_part)
+ needed_shift += 16;
+ }
+ }
+}
+
+static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
+{
+ int off, b_off;
+
+ ctx->flags |= EBPF_SEEN_TC;
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ off = offsetof(struct bpf_array, map.max_entries);
+ emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1);
+ emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2);
+ b_off = b_imm(this_idx + 1, ctx);
+ emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off);
+ /*
+ * if (--TCC < 0)
+ * goto out;
+ */
+ /* Delay slot */
+ emit_instr(ctx, daddiu, MIPS_R_T5,
+ (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1);
+ b_off = b_imm(this_idx + 1, ctx);
+ emit_instr(ctx, bltz, MIPS_R_T5, b_off);
+ /*
+ * prog = array->ptrs[index];
+ * if (prog == NULL)
+ * goto out;
+ */
+ /* Delay slot */
+ emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3);
+ emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1);
+ off = offsetof(struct bpf_array, ptrs);
+ emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8);
+ b_off = b_imm(this_idx + 1, ctx);
+ emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off);
+ /* Delay slot */
+ emit_instr(ctx, nop);
+
+ /* goto *(prog->bpf_func + 4); */
+ off = offsetof(struct bpf_prog, bpf_func);
+ emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT);
+ /* All systems are go... propagate TCC */
+ emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO);
+ /* Skip first instruction (TCC initialization) */
+ emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4);
+ return build_int_epilogue(ctx, MIPS_R_T9);
+}
+
+static bool is_bad_offset(int b_off)
+{
+ return b_off > 0x1ffff || b_off < -0x20000;
+}
+
+/* Returns the number of insn slots consumed. */
+static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+ int this_idx, int exit_idx)
+{
+ int src, dst, r, td, ts, mem_off, b_off;
+ bool need_swap, did_move, cmp_eq;
+ unsigned int target;
+ u64 t64;
+ s64 t64s;
+ int bpf_op = BPF_OP(insn->code);
+
+ switch (insn->code) {
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */
+ case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */
+ case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */
+ case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */
+ case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */
+ r = gen_imm_insn(insn, ctx, this_idx);
+ if (r < 0)
+ return r;
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ if (insn->imm == 1) /* Mult by 1 is a nop */
+ break;
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ emit_instr(ctx, dmultu, MIPS_R_AT, dst);
+ emit_instr(ctx, mflo, dst);
+ break;
+ case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+ /* sign extend */
+ emit_instr(ctx, sll, dst, dst, 0);
+ }
+ if (insn->imm == 1) /* Mult by 1 is a nop */
+ break;
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ emit_instr(ctx, multu, dst, MIPS_R_AT);
+ emit_instr(ctx, mflo, dst);
+ break;
+ case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+ /* sign extend */
+ emit_instr(ctx, sll, dst, dst, 0);
+ }
+ emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst);
+ break;
+ case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
+ case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ if (insn->imm == 0) { /* Div by zero */
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
+ }
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
+ /* sign extend */
+ emit_instr(ctx, sll, dst, dst, 0);
+ if (insn->imm == 1) {
+ /* div by 1 is a nop, mod by 1 is zero */
+ if (bpf_op == BPF_MOD)
+ emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+ break;
+ }
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ emit_instr(ctx, divu, dst, MIPS_R_AT);
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, mflo, dst);
+ else
+ emit_instr(ctx, mfhi, dst);
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ if (insn->imm == 0) { /* Div by zero */
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
+ }
+ if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+
+ if (insn->imm == 1) {
+ /* div by 1 is a nop, mod by 1 is zero */
+ if (bpf_op == BPF_MOD)
+ emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+ break;
+ }
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ emit_instr(ctx, ddivu, dst, MIPS_R_AT);
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, mflo, dst);
+ else
+ emit_instr(ctx, mfhi, dst);
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */
+ src = ebpf_to_mips_reg(ctx, insn, src_reg);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (src < 0 || dst < 0)
+ return -EINVAL;
+ if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+ did_move = false;
+ if (insn->src_reg == BPF_REG_10) {
+ if (bpf_op == BPF_MOV) {
+ emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK);
+ did_move = true;
+ } else {
+ emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK);
+ src = MIPS_R_AT;
+ }
+ } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+ int tmp_reg = MIPS_R_AT;
+
+ if (bpf_op == BPF_MOV) {
+ tmp_reg = dst;
+ did_move = true;
+ }
+ emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO);
+ emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
+ src = MIPS_R_AT;
+ }
+ switch (bpf_op) {
+ case BPF_MOV:
+ if (!did_move)
+ emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
+ break;
+ case BPF_ADD:
+ emit_instr(ctx, daddu, dst, dst, src);
+ break;
+ case BPF_SUB:
+ emit_instr(ctx, dsubu, dst, dst, src);
+ break;
+ case BPF_XOR:
+ emit_instr(ctx, xor, dst, dst, src);
+ break;
+ case BPF_OR:
+ emit_instr(ctx, or, dst, dst, src);
+ break;
+ case BPF_AND:
+ emit_instr(ctx, and, dst, dst, src);
+ break;
+ case BPF_MUL:
+ emit_instr(ctx, dmultu, dst, src);
+ emit_instr(ctx, mflo, dst);
+ break;
+ case BPF_DIV:
+ case BPF_MOD:
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
+ emit_instr(ctx, ddivu, dst, src);
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, mflo, dst);
+ else
+ emit_instr(ctx, mfhi, dst);
+ break;
+ case BPF_LSH:
+ emit_instr(ctx, dsllv, dst, dst, src);
+ break;
+ case BPF_RSH:
+ emit_instr(ctx, dsrlv, dst, dst, src);
+ break;
+ case BPF_ARSH:
+ emit_instr(ctx, dsrav, dst, dst, src);
+ break;
+ default:
+ pr_err("ALU64_REG NOT HANDLED\n");
+ return -EINVAL;
+ }
+ break;
+ case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */
+ case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */
+ src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (src < 0 || dst < 0)
+ return -EINVAL;
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) {
+ /* sign extend */
+ emit_instr(ctx, sll, dst, dst, 0);
+ }
+ did_move = false;
+ ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+ if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) {
+ int tmp_reg = MIPS_R_AT;
+
+ if (bpf_op == BPF_MOV) {
+ tmp_reg = dst;
+ did_move = true;
+ }
+ /* sign extend */
+ emit_instr(ctx, sll, tmp_reg, src, 0);
+ src = MIPS_R_AT;
+ }
+ switch (bpf_op) {
+ case BPF_MOV:
+ if (!did_move)
+ emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
+ break;
+ case BPF_ADD:
+ emit_instr(ctx, addu, dst, dst, src);
+ break;
+ case BPF_SUB:
+ emit_instr(ctx, subu, dst, dst, src);
+ break;
+ case BPF_XOR:
+ emit_instr(ctx, xor, dst, dst, src);
+ break;
+ case BPF_OR:
+ emit_instr(ctx, or, dst, dst, src);
+ break;
+ case BPF_AND:
+ emit_instr(ctx, and, dst, dst, src);
+ break;
+ case BPF_MUL:
+ emit_instr(ctx, mul, dst, dst, src);
+ break;
+ case BPF_DIV:
+ case BPF_MOD:
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
+ emit_instr(ctx, divu, dst, src);
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, mflo, dst);
+ else
+ emit_instr(ctx, mfhi, dst);
+ break;
+ case BPF_LSH:
+ emit_instr(ctx, sllv, dst, dst, src);
+ break;
+ case BPF_RSH:
+ emit_instr(ctx, srlv, dst, dst, src);
+ break;
+ default:
+ pr_err("ALU_REG NOT HANDLED\n");
+ return -EINVAL;
+ }
+ break;
+ case BPF_JMP | BPF_EXIT:
+ if (this_idx + 1 < exit_idx) {
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, nop);
+ }
+ break;
+ case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
+ case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
+ cmp_eq = (bpf_op == BPF_JEQ);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+ if (dst < 0)
+ return dst;
+ if (insn->imm == 0) {
+ src = MIPS_R_ZERO;
+ } else {
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ src = MIPS_R_AT;
+ }
+ goto jeq_common;
+ case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (src < 0 || dst < 0)
+ return -EINVAL;
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+ if (td == REG_32BIT && ts != REG_32BIT) {
+ emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+ src = MIPS_R_AT;
+ } else if (ts == REG_32BIT && td != REG_32BIT) {
+ emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
+ dst = MIPS_R_AT;
+ }
+ if (bpf_op == BPF_JSET) {
+ emit_instr(ctx, and, MIPS_R_AT, dst, src);
+ cmp_eq = false;
+ dst = MIPS_R_AT;
+ src = MIPS_R_ZERO;
+ } else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) {
+ emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
+ if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ if (bpf_op == BPF_JSGT)
+ emit_instr(ctx, blez, MIPS_R_AT, b_off);
+ else
+ emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+ emit_instr(ctx, nop);
+ return 2; /* We consumed the exit. */
+ }
+ b_off = b_imm(this_idx + insn->off + 1, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ if (bpf_op == BPF_JSGT)
+ emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+ else
+ emit_instr(ctx, blez, MIPS_R_AT, b_off);
+ emit_instr(ctx, nop);
+ break;
+ } else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) {
+ emit_instr(ctx, slt, MIPS_R_AT, dst, src);
+ cmp_eq = bpf_op == BPF_JSGE;
+ dst = MIPS_R_AT;
+ src = MIPS_R_ZERO;
+ } else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) {
+ /* dst or src could be AT */
+ emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
+ emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+ /* SP known to be non-zero, movz becomes boolean not */
+ emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8);
+ emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8);
+ emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
+ cmp_eq = bpf_op == BPF_JGT;
+ dst = MIPS_R_AT;
+ src = MIPS_R_ZERO;
+ } else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) {
+ emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+ cmp_eq = bpf_op == BPF_JGE;
+ dst = MIPS_R_AT;
+ src = MIPS_R_ZERO;
+ } else { /* JNE/JEQ case */
+ cmp_eq = (bpf_op == BPF_JEQ);
+ }
+jeq_common:
+ /*
+ * If the next insn is EXIT and we are jumping arround
+ * only it, invert the sense of the compare and
+ * conditionally jump to the exit. Poor man's branch
+ * chaining.
+ */
+ if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off)) {
+ target = j_target(ctx, exit_idx);
+ if (target == (unsigned int)-1)
+ return -E2BIG;
+ cmp_eq = !cmp_eq;
+ b_off = 4 * 3;
+ if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+ ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+ ctx->long_b_conversion = 1;
+ }
+ }
+
+ if (cmp_eq)
+ emit_instr(ctx, bne, dst, src, b_off);
+ else
+ emit_instr(ctx, beq, dst, src, b_off);
+ emit_instr(ctx, nop);
+ if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+ emit_instr(ctx, j, target);
+ emit_instr(ctx, nop);
+ }
+ return 2; /* We consumed the exit. */
+ }
+ b_off = b_imm(this_idx + insn->off + 1, ctx);
+ if (is_bad_offset(b_off)) {
+ target = j_target(ctx, this_idx + insn->off + 1);
+ if (target == (unsigned int)-1)
+ return -E2BIG;
+ cmp_eq = !cmp_eq;
+ b_off = 4 * 3;
+ if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+ ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+ ctx->long_b_conversion = 1;
+ }
+ }
+
+ if (cmp_eq)
+ emit_instr(ctx, beq, dst, src, b_off);
+ else
+ emit_instr(ctx, bne, dst, src, b_off);
+ emit_instr(ctx, nop);
+ if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+ emit_instr(ctx, j, target);
+ emit_instr(ctx, nop);
+ }
+ break;
+ case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
+ case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
+ case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */
+ case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */
+ cmp_eq = (bpf_op == BPF_JSGE);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+ if (dst < 0)
+ return dst;
+
+ if (insn->imm == 0) {
+ if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ switch (bpf_op) {
+ case BPF_JSGT:
+ emit_instr(ctx, blez, dst, b_off);
+ break;
+ case BPF_JSGE:
+ emit_instr(ctx, bltz, dst, b_off);
+ break;
+ case BPF_JSLT:
+ emit_instr(ctx, bgez, dst, b_off);
+ break;
+ case BPF_JSLE:
+ emit_instr(ctx, bgtz, dst, b_off);
+ break;
+ }
+ emit_instr(ctx, nop);
+ return 2; /* We consumed the exit. */
+ }
+ b_off = b_imm(this_idx + insn->off + 1, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ switch (bpf_op) {
+ case BPF_JSGT:
+ emit_instr(ctx, bgtz, dst, b_off);
+ break;
+ case BPF_JSGE:
+ emit_instr(ctx, bgez, dst, b_off);
+ break;
+ case BPF_JSLT:
+ emit_instr(ctx, bltz, dst, b_off);
+ break;
+ case BPF_JSLE:
+ emit_instr(ctx, blez, dst, b_off);
+ break;
+ }
+ emit_instr(ctx, nop);
+ break;
+ }
+ /*
+ * only "LT" compare available, so we must use imm + 1
+ * to generate "GT" and imm -1 to generate LE
+ */
+ if (bpf_op == BPF_JSGT)
+ t64s = insn->imm + 1;
+ else if (bpf_op == BPF_JSLE)
+ t64s = insn->imm + 1;
+ else
+ t64s = insn->imm;
+
+ cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE;
+ if (t64s >= S16_MIN && t64s <= S16_MAX) {
+ emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
+ src = MIPS_R_AT;
+ dst = MIPS_R_ZERO;
+ goto jeq_common;
+ }
+ emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+ emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
+ src = MIPS_R_AT;
+ dst = MIPS_R_ZERO;
+ goto jeq_common;
+
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ cmp_eq = (bpf_op == BPF_JGE);
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+ if (dst < 0)
+ return dst;
+ /*
+ * only "LT" compare available, so we must use imm + 1
+ * to generate "GT" and imm -1 to generate LE
+ */
+ if (bpf_op == BPF_JGT)
+ t64s = (u64)(u32)(insn->imm) + 1;
+ else if (bpf_op == BPF_JLE)
+ t64s = (u64)(u32)(insn->imm) + 1;
+ else
+ t64s = (u64)(u32)(insn->imm);
+
+ cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE;
+
+ emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+ emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
+ src = MIPS_R_AT;
+ dst = MIPS_R_ZERO;
+ goto jeq_common;
+
+ case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
+ if (dst < 0)
+ return dst;
+
+ if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) {
+ if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off);
+ emit_instr(ctx, nop);
+ return 2; /* We consumed the exit. */
+ }
+ b_off = b_imm(this_idx + insn->off + 1, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off);
+ emit_instr(ctx, nop);
+ break;
+ }
+ t64 = (u32)insn->imm;
+ emit_const_to_reg(ctx, MIPS_R_AT, t64);
+ emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT);
+ src = MIPS_R_AT;
+ dst = MIPS_R_ZERO;
+ cmp_eq = false;
+ goto jeq_common;
+
+ case BPF_JMP | BPF_JA:
+ /*
+ * Prefer relative branch for easier debugging, but
+ * fall back if needed.
+ */
+ b_off = b_imm(this_idx + insn->off + 1, ctx);
+ if (is_bad_offset(b_off)) {
+ target = j_target(ctx, this_idx + insn->off + 1);
+ if (target == (unsigned int)-1)
+ return -E2BIG;
+ emit_instr(ctx, j, target);
+ } else {
+ emit_instr(ctx, b, b_off);
+ }
+ emit_instr(ctx, nop);
+ break;
+ case BPF_LD | BPF_DW | BPF_IMM:
+ if (insn->src_reg != 0)
+ return -EINVAL;
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32);
+ emit_const_to_reg(ctx, dst, t64);
+ return 2; /* Double slot insn */
+
+ case BPF_JMP | BPF_CALL:
+ ctx->flags |= EBPF_SAVE_RA;
+ t64s = (s64)insn->imm + (s64)__bpf_call_base;
+ emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s);
+ emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
+ /* delay slot */
+ emit_instr(ctx, nop);
+ break;
+
+ case BPF_JMP | BPF_TAIL_CALL:
+ if (emit_bpf_tail_call(ctx, this_idx))
+ return -EINVAL;
+ break;
+
+ case BPF_LD | BPF_B | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_DW | BPF_ABS:
+ ctx->flags |= EBPF_SAVE_RA;
+
+ gen_imm_to_reg(insn, MIPS_R_A1, ctx);
+ emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
+
+ if (insn->imm < 0) {
+ emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
+ } else {
+ emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
+ emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
+ }
+ goto ld_skb_common;
+
+ case BPF_LD | BPF_B | BPF_IND:
+ case BPF_LD | BPF_H | BPF_IND:
+ case BPF_LD | BPF_W | BPF_IND:
+ case BPF_LD | BPF_DW | BPF_IND:
+ ctx->flags |= EBPF_SAVE_RA;
+ src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+ if (src < 0)
+ return src;
+ ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+ if (ts == REG_32BIT_ZERO_EX) {
+ /* sign extend */
+ emit_instr(ctx, sll, MIPS_R_A1, src, 0);
+ src = MIPS_R_A1;
+ }
+ if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
+ emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
+ } else {
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
+ }
+ /* truncate to 32-bit int */
+ emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
+ emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
+ emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
+
+ emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
+ emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
+ emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
+ emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
+
+ld_skb_common:
+ emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
+ /* delay slot move */
+ emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
+
+ /* Check the error value */
+ b_off = b_imm(exit_idx, ctx);
+ if (is_bad_offset(b_off)) {
+ target = j_target(ctx, exit_idx);
+ if (target == (unsigned int)-1)
+ return -E2BIG;
+
+ if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+ ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+ ctx->long_b_conversion = 1;
+ }
+ emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
+ emit_instr(ctx, nop);
+ emit_instr(ctx, j, target);
+ emit_instr(ctx, nop);
+ } else {
+ emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
+ emit_instr(ctx, nop);
+ }
+
+#ifdef __BIG_ENDIAN
+ need_swap = false;
+#else
+ need_swap = true;
+#endif
+ dst = MIPS_R_V0;
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_B:
+ emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
+ break;
+ case BPF_H:
+ emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
+ if (need_swap)
+ emit_instr(ctx, wsbh, dst, dst);
+ break;
+ case BPF_W:
+ emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
+ if (need_swap) {
+ emit_instr(ctx, wsbh, dst, dst);
+ emit_instr(ctx, rotr, dst, dst, 16);
+ }
+ break;
+ case BPF_DW:
+ emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
+ if (need_swap) {
+ emit_instr(ctx, dsbh, dst, dst);
+ emit_instr(ctx, dshd, dst, dst);
+ }
+ break;
+ }
+
+ break;
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+ if (insn->imm == 64 && td == REG_32BIT)
+ emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+
+ if (insn->imm != 64 &&
+ (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) {
+ /* sign extend */
+ emit_instr(ctx, sll, dst, dst, 0);
+ }
+
+#ifdef __BIG_ENDIAN
+ need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE);
+#else
+ need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE);
+#endif
+ if (insn->imm == 16) {
+ if (need_swap)
+ emit_instr(ctx, wsbh, dst, dst);
+ emit_instr(ctx, andi, dst, dst, 0xffff);
+ } else if (insn->imm == 32) {
+ if (need_swap) {
+ emit_instr(ctx, wsbh, dst, dst);
+ emit_instr(ctx, rotr, dst, dst, 16);
+ }
+ } else { /* 64-bit*/
+ if (need_swap) {
+ emit_instr(ctx, dsbh, dst, dst);
+ emit_instr(ctx, dshd, dst, dst);
+ }
+ }
+ break;
+
+ case BPF_ST | BPF_B | BPF_MEM:
+ case BPF_ST | BPF_H | BPF_MEM:
+ case BPF_ST | BPF_W | BPF_MEM:
+ case BPF_ST | BPF_DW | BPF_MEM:
+ if (insn->dst_reg == BPF_REG_10) {
+ ctx->flags |= EBPF_SEEN_FP;
+ dst = MIPS_R_SP;
+ mem_off = insn->off + MAX_BPF_STACK;
+ } else {
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ mem_off = insn->off;
+ }
+ gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_B:
+ emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst);
+ break;
+ case BPF_H:
+ emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst);
+ break;
+ case BPF_W:
+ emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst);
+ break;
+ case BPF_DW:
+ emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst);
+ break;
+ }
+ break;
+
+ case BPF_LDX | BPF_B | BPF_MEM:
+ case BPF_LDX | BPF_H | BPF_MEM:
+ case BPF_LDX | BPF_W | BPF_MEM:
+ case BPF_LDX | BPF_DW | BPF_MEM:
+ if (insn->src_reg == BPF_REG_10) {
+ ctx->flags |= EBPF_SEEN_FP;
+ src = MIPS_R_SP;
+ mem_off = insn->off + MAX_BPF_STACK;
+ } else {
+ src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+ if (src < 0)
+ return src;
+ mem_off = insn->off;
+ }
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_B:
+ emit_instr(ctx, lbu, dst, mem_off, src);
+ break;
+ case BPF_H:
+ emit_instr(ctx, lhu, dst, mem_off, src);
+ break;
+ case BPF_W:
+ emit_instr(ctx, lw, dst, mem_off, src);
+ break;
+ case BPF_DW:
+ emit_instr(ctx, ld, dst, mem_off, src);
+ break;
+ }
+ break;
+
+ case BPF_STX | BPF_B | BPF_MEM:
+ case BPF_STX | BPF_H | BPF_MEM:
+ case BPF_STX | BPF_W | BPF_MEM:
+ case BPF_STX | BPF_DW | BPF_MEM:
+ case BPF_STX | BPF_W | BPF_XADD:
+ case BPF_STX | BPF_DW | BPF_XADD:
+ if (insn->dst_reg == BPF_REG_10) {
+ ctx->flags |= EBPF_SEEN_FP;
+ dst = MIPS_R_SP;
+ mem_off = insn->off + MAX_BPF_STACK;
+ } else {
+ dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
+ if (dst < 0)
+ return dst;
+ mem_off = insn->off;
+ }
+ src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
+ if (src < 0)
+ return dst;
+ if (BPF_MODE(insn->code) == BPF_XADD) {
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_W:
+ if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+ emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+ src = MIPS_R_AT;
+ }
+ emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst);
+ emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src);
+ emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst);
+ /*
+ * On failure back up to LL (-4
+ * instructions of 4 bytes each
+ */
+ emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+ emit_instr(ctx, nop);
+ break;
+ case BPF_DW:
+ if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+ emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+ emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+ src = MIPS_R_AT;
+ }
+ emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst);
+ emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src);
+ emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst);
+ emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+ emit_instr(ctx, nop);
+ break;
+ }
+ } else { /* BPF_MEM */
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_B:
+ emit_instr(ctx, sb, src, mem_off, dst);
+ break;
+ case BPF_H:
+ emit_instr(ctx, sh, src, mem_off, dst);
+ break;
+ case BPF_W:
+ emit_instr(ctx, sw, src, mem_off, dst);
+ break;
+ case BPF_DW:
+ if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+ emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+ emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+ src = MIPS_R_AT;
+ }
+ emit_instr(ctx, sd, src, mem_off, dst);
+ break;
+ }
+ }
+ break;
+
+ default:
+ pr_err("NOT HANDLED %d - (%02x)\n",
+ this_idx, (unsigned int)insn->code);
+ return -EINVAL;
+ }
+ return 1;
+}
+
+#define RVT_VISITED_MASK 0xc000000000000000ull
+#define RVT_FALL_THROUGH 0x4000000000000000ull
+#define RVT_BRANCH_TAKEN 0x8000000000000000ull
+#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN)
+
+static int build_int_body(struct jit_ctx *ctx)
+{
+ const struct bpf_prog *prog = ctx->skf;
+ const struct bpf_insn *insn;
+ int i, r;
+
+ for (i = 0; i < prog->len; ) {
+ insn = prog->insnsi + i;
+ if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) {
+ /* dead instruction, don't emit it. */
+ i++;
+ continue;
+ }
+
+ if (ctx->target == NULL)
+ ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4);
+
+ r = build_one_insn(insn, ctx, i, prog->len);
+ if (r < 0)
+ return r;
+ i += r;
+ }
+ /* epilogue offset */
+ if (ctx->target == NULL)
+ ctx->offsets[i] = ctx->idx * 4;
+
+ /*
+ * All exits have an offset of the epilogue, some offsets may
+ * not have been set due to banch-around threading, so set
+ * them now.
+ */
+ if (ctx->target == NULL)
+ for (i = 0; i < prog->len; i++) {
+ insn = prog->insnsi + i;
+ if (insn->code == (BPF_JMP | BPF_EXIT))
+ ctx->offsets[i] = ctx->idx * 4;
+ }
+ return 0;
+}
+
+/* return the last idx processed, or negative for error */
+static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
+ int start_idx, bool follow_taken)
+{
+ const struct bpf_prog *prog = ctx->skf;
+ const struct bpf_insn *insn;
+ u64 exit_rvt = initial_rvt;
+ u64 *rvt = ctx->reg_val_types;
+ int idx;
+ int reg;
+
+ for (idx = start_idx; idx < prog->len; idx++) {
+ rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt;
+ insn = prog->insnsi + idx;
+ switch (BPF_CLASS(insn->code)) {
+ case BPF_ALU:
+ switch (BPF_OP(insn->code)) {
+ case BPF_ADD:
+ case BPF_SUB:
+ case BPF_MUL:
+ case BPF_DIV:
+ case BPF_OR:
+ case BPF_AND:
+ case BPF_LSH:
+ case BPF_RSH:
+ case BPF_NEG:
+ case BPF_MOD:
+ case BPF_XOR:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ break;
+ case BPF_MOV:
+ if (BPF_SRC(insn->code)) {
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ } else {
+ /* IMM to REG move*/
+ if (insn->imm >= 0)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ else
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ }
+ break;
+ case BPF_END:
+ if (insn->imm == 64)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ else if (insn->imm == 32)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ else /* insn->imm == 16 */
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ break;
+ }
+ rvt[idx] |= RVT_DONE;
+ break;
+ case BPF_ALU64:
+ switch (BPF_OP(insn->code)) {
+ case BPF_MOV:
+ if (BPF_SRC(insn->code)) {
+ /* REG to REG move*/
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ } else {
+ /* IMM to REG move*/
+ if (insn->imm >= 0)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ else
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+ }
+ break;
+ default:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ }
+ rvt[idx] |= RVT_DONE;
+ break;
+ case BPF_LD:
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_IMM) {
+ s64 val;
+
+ val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32));
+ if (val > 0 && val <= S32_MAX)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ else if (val >= S32_MIN && val <= S32_MAX)
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+ else
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ rvt[idx] |= RVT_DONE;
+ idx++;
+ } else {
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ }
+ break;
+ case BPF_B:
+ case BPF_H:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ break;
+ case BPF_W:
+ if (BPF_MODE(insn->code) == BPF_IMM)
+ set_reg_val_type(&exit_rvt, insn->dst_reg,
+ insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT);
+ else
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ break;
+ }
+ rvt[idx] |= RVT_DONE;
+ break;
+ case BPF_LDX:
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+ break;
+ case BPF_B:
+ case BPF_H:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+ break;
+ case BPF_W:
+ set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+ break;
+ }
+ rvt[idx] |= RVT_DONE;
+ break;
+ case BPF_JMP:
+ switch (BPF_OP(insn->code)) {
+ case BPF_EXIT:
+ rvt[idx] = RVT_DONE | exit_rvt;
+ rvt[prog->len] = exit_rvt;
+ return idx;
+ case BPF_JA:
+ rvt[idx] |= RVT_DONE;
+ idx += insn->off;
+ break;
+ case BPF_JEQ:
+ case BPF_JGT:
+ case BPF_JGE:
+ case BPF_JLT:
+ case BPF_JLE:
+ case BPF_JSET:
+ case BPF_JNE:
+ case BPF_JSGT:
+ case BPF_JSGE:
+ case BPF_JSLT:
+ case BPF_JSLE:
+ if (follow_taken) {
+ rvt[idx] |= RVT_BRANCH_TAKEN;
+ idx += insn->off;
+ follow_taken = false;
+ } else {
+ rvt[idx] |= RVT_FALL_THROUGH;
+ }
+ break;
+ case BPF_CALL:
+ set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT);
+ /* Upon call return, argument registers are clobbered. */
+ for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++)
+ set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+ rvt[idx] |= RVT_DONE;
+ break;
+ default:
+ WARN(1, "Unhandled BPF_JMP case.\n");
+ rvt[idx] |= RVT_DONE;
+ break;
+ }
+ break;
+ default:
+ rvt[idx] |= RVT_DONE;
+ break;
+ }
+ }
+ return idx;
+}
+
+/*
+ * Track the value range (i.e. 32-bit vs. 64-bit) of each register at
+ * each eBPF insn. This allows unneeded sign and zero extension
+ * operations to be omitted.
+ *
+ * Doesn't handle yet confluence of control paths with conflicting
+ * ranges, but it is good enough for most sane code.
+ */
+static int reg_val_propagate(struct jit_ctx *ctx)
+{
+ const struct bpf_prog *prog = ctx->skf;
+ u64 exit_rvt;
+ int reg;
+ int i;
+
+ /*
+ * 11 registers * 3 bits/reg leaves top bits free for other
+ * uses. Bit-62..63 used to see if we have visited an insn.
+ */
+ exit_rvt = 0;
+
+ /* Upon entry, argument registers are 64-bit. */
+ for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++)
+ set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+ /*
+ * First follow all conditional branches on the fall-through
+ * edge of control flow..
+ */
+ reg_val_propagate_range(ctx, exit_rvt, 0, false);
+restart_search:
+ /*
+ * Then repeatedly find the first conditional branch where
+ * both edges of control flow have not been taken, and follow
+ * the branch taken edge. We will end up restarting the
+ * search once per conditional branch insn.
+ */
+ for (i = 0; i < prog->len; i++) {
+ u64 rvt = ctx->reg_val_types[i];
+
+ if ((rvt & RVT_VISITED_MASK) == RVT_DONE ||
+ (rvt & RVT_VISITED_MASK) == 0)
+ continue;
+ if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) {
+ reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true);
+ } else { /* RVT_BRANCH_TAKEN */
+ WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n");
+ reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false);
+ }
+ goto restart_search;
+ }
+ /*
+ * Eventually all conditional branches have been followed on
+ * both branches and we are done. Any insn that has not been
+ * visited at this point is dead.
+ */
+
+ return 0;
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+ u32 *p;
+
+ /* We are guaranteed to have aligned memory. */
+ for (p = area; size >= sizeof(u32); size -= sizeof(u32))
+ uasm_i_break(&p, BRK_BUG); /* Increments p */
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+ struct bpf_prog *orig_prog = prog;
+ bool tmp_blinded = false;
+ struct bpf_prog *tmp;
+ struct bpf_binary_header *header = NULL;
+ struct jit_ctx ctx;
+ unsigned int image_size;
+ u8 *image_ptr;
+
+ if (!bpf_jit_enable || !cpu_has_mips64r2)
+ return prog;
+
+ tmp = bpf_jit_blind_constants(prog);
+ /* If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_prog;
+ if (tmp != prog) {
+ tmp_blinded = true;
+ prog = tmp;
+ }
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ preempt_disable();
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON:
+ case CPU_CAVIUM_OCTEON_PLUS:
+ case CPU_CAVIUM_OCTEON2:
+ case CPU_CAVIUM_OCTEON3:
+ ctx.use_bbit_insns = 1;
+ break;
+ default:
+ ctx.use_bbit_insns = 0;
+ }
+ preempt_enable();
+
+ ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
+ if (ctx.offsets == NULL)
+ goto out_err;
+
+ ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL);
+ if (ctx.reg_val_types == NULL)
+ goto out_err;
+
+ ctx.skf = prog;
+
+ if (reg_val_propagate(&ctx))
+ goto out_err;
+
+ /*
+ * First pass discovers used resources and instruction offsets
+ * assuming short branches are used.
+ */
+ if (build_int_body(&ctx))
+ goto out_err;
+
+ /*
+ * If no calls are made (EBPF_SAVE_RA), then tail call count
+ * in $v1, else we must save in n$s4.
+ */
+ if (ctx.flags & EBPF_SEEN_TC) {
+ if (ctx.flags & EBPF_SAVE_RA)
+ ctx.flags |= EBPF_SAVE_S4;
+ else
+ ctx.flags |= EBPF_TCC_IN_V1;
+ }
+
+ /*
+ * Second pass generates offsets, if any branches are out of
+ * range a jump-around long sequence is generated, and we have
+ * to try again from the beginning to generate the new
+ * offsets. This is done until no additional conversions are
+ * necessary.
+ */
+ do {
+ ctx.idx = 0;
+ ctx.gen_b_offsets = 1;
+ ctx.long_b_conversion = 0;
+ if (gen_int_prologue(&ctx))
+ goto out_err;
+ if (build_int_body(&ctx))
+ goto out_err;
+ if (build_int_epilogue(&ctx, MIPS_R_RA))
+ goto out_err;
+ } while (ctx.long_b_conversion);
+
+ image_size = 4 * ctx.idx;
+
+ header = bpf_jit_binary_alloc(image_size, &image_ptr,
+ sizeof(u32), jit_fill_hole);
+ if (header == NULL)
+ goto out_err;
+
+ ctx.target = (u32 *)image_ptr;
+
+ /* Third pass generates the code */
+ ctx.idx = 0;
+ if (gen_int_prologue(&ctx))
+ goto out_err;
+ if (build_int_body(&ctx))
+ goto out_err;
+ if (build_int_epilogue(&ctx, MIPS_R_RA))
+ goto out_err;
+
+ /* Update the icache */
+ flush_icache_range((unsigned long)ctx.target,
+ (unsigned long)(ctx.target + ctx.idx * sizeof(u32)));
+
+ if (bpf_jit_enable > 1)
+ /* Dump JIT code */
+ bpf_jit_dump(prog->len, image_size, 2, ctx.target);
+
+ bpf_jit_binary_lock_ro(header);
+ prog->bpf_func = (void *)ctx.target;
+ prog->jited = 1;
+ prog->jited_len = image_size;
+out_normal:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(prog, prog == orig_prog ?
+ tmp : orig_prog);
+ kfree(ctx.offsets);
+ kfree(ctx.reg_val_types);
+
+ return prog;
+
+out_err:
+ prog = orig_prog;
+ if (header)
+ bpf_jit_binary_free(header);
+ goto out_normal;
+}
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 174575a9a112..fc7726088103 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -78,6 +78,12 @@ static void pcibios_scanbus(struct pci_controller *hose)
static int need_domain_info;
LIST_HEAD(resources);
struct pci_bus *bus;
+ struct pci_host_bridge *bridge;
+ int ret;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return;
if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
next_busno = (*hose->get_busno)();
@@ -87,18 +93,24 @@ static void pcibios_scanbus(struct pci_controller *hose)
pci_add_resource_offset(&resources,
hose->io_resource, hose->io_offset);
pci_add_resource(&resources, hose->busn_resource);
- bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
- &resources);
- hose->bus = bus;
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = hose;
+ bridge->busnr = next_busno;
+ bridge->ops = hose->pci_ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = pcibios_map_irq;
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
+ return;
+ }
+
+ hose->bus = bus = bridge->bus;
need_domain_info = need_domain_info || pci_domain_nr(bus);
set_pci_need_domain_info(hose, need_domain_info);
- if (!bus) {
- pci_free_resource_list(&resources);
- return;
- }
-
next_busno = bus->busn_res.end + 1;
/* Don't allow 8-bit bus number overflow inside the hose -
reserve some space for bridges. */
@@ -224,8 +236,6 @@ static int __init pcibios_init(void)
list_for_each_entry(hose, &controllers, list)
pcibios_scanbus(hose);
- pci_fixup_irqs(pci_common_swizzle, pcibios_map_irq);
-
pci_initialized = 1;
return 0;
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index bd67ac74fe2d..9632436d74d7 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -28,16 +28,15 @@ EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
static int __init pcibios_set_cache_line_size(void)
{
- struct cpuinfo_mips *c = &current_cpu_data;
unsigned int lsize;
/*
* Set PCI cacheline size to that of the highest level in the
* cache hierarchy.
*/
- lsize = c->dcache.linesz;
- lsize = c->scache.linesz ? : lsize;
- lsize = c->tcache.linesz ? : lsize;
+ lsize = cpu_dcache_line_size();
+ lsize = cpu_scache_line_size() ? : lsize;
+ lsize = cpu_tcache_line_size() ? : lsize;
BUG_ON(!lsize);
diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c
index 974276e828b2..e2690d7ca4dd 100644
--- a/arch/mips/vdso/gettimeofday.c
+++ b/arch/mips/vdso/gettimeofday.c
@@ -35,7 +35,8 @@ static __always_inline long gettimeofday_fallback(struct timeval *_tv,
" syscall\n"
: "=r" (ret), "=r" (error)
: "r" (tv), "r" (tz), "r" (nr)
- : "memory");
+ : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
return error ? -ret : ret;
}
@@ -55,7 +56,8 @@ static __always_inline long clock_gettime_fallback(clockid_t _clkid,
" syscall\n"
: "=r" (ret), "=r" (error)
: "r" (clkid), "r" (ts), "r" (nr)
- : "memory");
+ : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13",
+ "$14", "$15", "$24", "$25", "hi", "lo", "memory");
return error ? -ret : ret;
}
diff --git a/arch/mn10300/configs/asb2303_defconfig b/arch/mn10300/configs/asb2303_defconfig
index 1fd41ec1dfb5..d06dae131139 100644
--- a/arch/mn10300/configs/asb2303_defconfig
+++ b/arch/mn10300/configs/asb2303_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TINY_RCU=y
@@ -28,16 +27,13 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_MTD=y
CONFIG_MTD_DEBUG=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -48,8 +44,6 @@ CONFIG_MTD_PHYSMAP=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
diff --git a/arch/mn10300/configs/asb2364_defconfig b/arch/mn10300/configs/asb2364_defconfig
index cd0a6cb17dee..b1d80cee97ee 100644
--- a/arch/mn10300/configs/asb2364_defconfig
+++ b/arch/mn10300/configs/asb2364_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -40,7 +39,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
CONFIG_IPV6=y
# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
@@ -50,10 +48,8 @@ CONFIG_IPV6=y
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_DEBUG=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -64,8 +60,6 @@ CONFIG_MTD_PHYSMAP=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -77,7 +71,6 @@ CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_PROC_KCORE=y
# CONFIG_PROC_PAGE_MONITOR is not set
@@ -93,4 +86,3 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 9c7b8f7942d8..fe413b41df6c 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -26,11 +26,6 @@
#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index c710db354ff2..ac82a3f26dbf 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 1e95920b0737..a0f2e4a323c1 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -29,6 +29,9 @@ config OPENRISC
select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
select NO_BOOTMEM
+config CPU_BIG_ENDIAN
+ def_bool y
+
config MMU
def_bool y
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h
index 778087341977..8fed278a24b8 100644
--- a/arch/openrisc/include/asm/futex.h
+++ b/arch/openrisc/include/asm/futex.h
@@ -30,20 +30,10 @@
})
static inline int
-futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
switch (op) {
@@ -68,30 +58,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (oldval == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (oldval != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (oldval < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (oldval >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (oldval <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (oldval > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index dda1f558ef35..ba7b7ddc3844 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,6 +9,9 @@ config PARISC
select ARCH_WANT_FRAME_POINTERS
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_WANTS_UBSAN_NO_NULL
+ select ARCH_SUPPORTS_MEMORY_FAILURE
select RTC_CLASS
select RTC_DRV_GENERIC
select INIT_ALL_POSSIBLE
@@ -17,6 +20,12 @@ config PARISC
select BUG
select BUILDTIME_EXTABLE_SORT
select HAVE_PERF_EVENTS
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
+ select HAVE_KERNEL_LZMA
+ select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_XZ
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_IRQ_PROBE
select GENERIC_PCI_IOMAP
@@ -50,6 +59,9 @@ config PARISC
config CPU_BIG_ENDIAN
def_bool y
+config CPU_BIG_ENDIAN
+ def_bool y
+
config MMU
def_bool y
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 75cb451b1f03..58fae5d2449d 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -24,15 +24,20 @@ KBUILD_DEFCONFIG := default_defconfig
NM = sh $(srctree)/arch/parisc/nm
CHECKFLAGS += -D__hppa__=1
LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+export LIBGCC
ifdef CONFIG_64BIT
UTS_MACHINE := parisc64
CHECKFLAGS += -D__LP64__=1 -m64
CC_ARCHES = hppa64
+LD_BFD := elf64-hppa-linux
else # 32-bit
CC_ARCHES = hppa hppa2.0 hppa1.1
+LD_BFD := elf32-hppa-linux
endif
+export LD_BFD
+
ifneq ($(SUBARCH),$(UTS_MACHINE))
ifeq ($(CROSS_COMPILE),)
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
@@ -88,6 +93,8 @@ libs-y += arch/parisc/lib/ $(LIBGCC)
drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/
+boot := arch/parisc/boot
+
PALO := $(shell if (which palo 2>&1); then : ; \
elif [ -x /sbin/palo ]; then echo /sbin/palo; \
fi)
@@ -116,11 +123,14 @@ INSTALL_TARGETS = zinstall install
PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
-bzImage zImage: vmlinuz
+zImage: vmlinuz
Image: vmlinux
-vmlinuz: vmlinux
- @gzip -cf -9 $< > $@
+bzImage: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+vmlinuz: bzImage
+ $(OBJCOPY) $(boot)/bzImage $@
install:
$(CONFIG_SHELL) $(src)/arch/parisc/install.sh \
diff --git a/arch/parisc/boot/.gitignore b/arch/parisc/boot/.gitignore
new file mode 100644
index 000000000000..017d5912ad2d
--- /dev/null
+++ b/arch/parisc/boot/.gitignore
@@ -0,0 +1,2 @@
+image
+bzImage
diff --git a/arch/parisc/boot/Makefile b/arch/parisc/boot/Makefile
new file mode 100644
index 000000000000..cad68a584884
--- /dev/null
+++ b/arch/parisc/boot/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for the linux parisc-specific parts of the boot image creator.
+#
+
+COMPILE_VERSION := __linux_compile_version_id__`hostname | \
+ tr -c '[0-9A-Za-z]' '_'`__`date | \
+ tr -c '[0-9A-Za-z]' '_'`_t
+
+ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
+
+targets := image
+targets += bzImage
+subdir- := compressed
+
+$(obj)/image: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+install: $(CONFIGURE) $(obj)/bzImage
+ sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+ System.map "$(INSTALL_PATH)"
diff --git a/arch/parisc/boot/compressed/.gitignore b/arch/parisc/boot/compressed/.gitignore
new file mode 100644
index 000000000000..ae06b9b4c02f
--- /dev/null
+++ b/arch/parisc/boot/compressed/.gitignore
@@ -0,0 +1,3 @@
+sizes.h
+vmlinux
+vmlinux.lds
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
new file mode 100644
index 000000000000..5450a11c9d10
--- /dev/null
+++ b/arch/parisc/boot/compressed/Makefile
@@ -0,0 +1,86 @@
+#
+# linux/arch/parisc/boot/compressed/Makefile
+#
+# create a compressed self-extracting vmlinux image from the original vmlinux
+#
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head.o real2.o firmware.o
+
+KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs
+ifndef CONFIG_64BIT
+KBUILD_CFLAGS += -mfast-indirect-calls
+endif
+
+OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o
+
+# LDFLAGS_vmlinux := -X --whole-archive -e startup -T
+LDFLAGS_vmlinux := -X -e startup --as-needed -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC)
+ $(call if_changed,ld)
+
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p'
+
+quiet_cmd_sizes = GEN $@
+ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+
+$(obj)/sizes.h: vmlinux
+ $(call if_changed,sizes)
+
+AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER
+$(obj)/head.o: $(obj)/sizes.h
+
+CFLAGS_misc.o += -I$(objtree)/$(obj)
+$(obj)/misc.o: $(obj)/sizes.h
+
+$(obj)/firmware.o: $(obj)/firmware.c
+$(obj)/firmware.c: $(srctree)/arch/$(SRCARCH)/kernel/firmware.c
+ $(call cmd,shipped)
+
+AFLAGS_real2.o += -DBOOTLOADER
+$(obj)/real2.o: $(obj)/real2.S
+$(obj)/real2.S: $(srctree)/arch/$(SRCARCH)/kernel/real2.S
+ $(call cmd,shipped)
+
+$(obj)/misc.o: $(obj)/sizes.h
+
+CPPFLAGS_vmlinux.lds += -I$(objtree)/$(obj) -DBOOTLOADER
+$(obj)/vmlinux.lds: $(obj)/sizes.h
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
+ $(call if_changed,objcopy)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
+suffix-$(CONFIG_KERNEL_LZMA) := lzma
+suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_XZ) := xz
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+ $(call if_changed,lz4)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+ $(call if_changed,lzma)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+ $(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+ $(call if_changed,xzkern)
+
+LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+ $(call if_changed,ld)
diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S
new file mode 100644
index 000000000000..5aba20fa48aa
--- /dev/null
+++ b/arch/parisc/boot/compressed/head.S
@@ -0,0 +1,85 @@
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ * (C) 2017 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/psw.h>
+#include <asm/pdc.h>
+#include <asm/assembly.h>
+#include "sizes.h"
+
+#define BOOTADDR(x) (x)
+
+#ifndef CONFIG_64BIT
+ .import $global$ /* forward declaration */
+#endif /*!CONFIG_64BIT*/
+
+ __HEAD
+
+ENTRY(startup)
+ .level LEVEL
+
+#define PSW_W_SM 0x200
+#define PSW_W_BIT 36
+
+ ;! nuke the W bit, saving original value
+ .level 2.0
+ rsm PSW_W_SM, %r1
+
+ .level 1.1
+ extrw,u %r1, PSW_W_BIT-32, 1, %r1
+ copy %r1, %arg0
+
+ /* Make sure sr4-sr7 are set to zero for the kernel address space */
+ mtsp %r0,%sr4
+ mtsp %r0,%sr5
+ mtsp %r0,%sr6
+ mtsp %r0,%sr7
+
+ /* Clear BSS */
+
+ .import _bss,data
+ .import _ebss,data
+
+ load32 BOOTADDR(_bss),%r3
+ load32 BOOTADDR(_ebss),%r4
+ ldo FRAME_SIZE(%r4),%sp /* stack at end of bss */
+$bss_loop:
+ cmpb,<<,n %r3,%r4,$bss_loop
+ stw,ma %r0,4(%r3)
+
+ /* Initialize the global data pointer */
+ loadgp
+
+ /* arg0..arg4 were set by palo. */
+ copy %arg1, %r6 /* command line */
+ copy %arg2, %r7 /* rd-start */
+ copy %arg3, %r8 /* rd-end */
+ load32 BOOTADDR(decompress_kernel),%r3
+
+#ifdef CONFIG_64BIT
+ .level LEVEL
+ ssm PSW_W_SM, %r0 /* set W-bit */
+ depdi 0, 31, 32, %r3
+#endif
+ load32 BOOTADDR(startup_continue), %r2
+ bv,n 0(%r3)
+
+startup_continue:
+#ifdef CONFIG_64BIT
+ .level LEVEL
+ rsm PSW_W_SM, %r0 /* clear W-bit */
+#endif
+
+ load32 KERNEL_BINARY_TEXT_START, %arg0 /* free mem */
+ copy %r6, %arg1 /* command line */
+ copy %r7, %arg2 /* rd-start */
+ copy %r8, %arg3 /* rd-end */
+
+ bv,n 0(%ret0)
+END(startup)
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
new file mode 100644
index 000000000000..13a4bf9ac4da
--- /dev/null
+++ b/arch/parisc/boot/compressed/misc.c
@@ -0,0 +1,301 @@
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * (C) 2017 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/unaligned.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+#define malloc malloc_gzip
+#define free free_gzip
+
+/* Symbols defined by linker scripts */
+extern char input_data[];
+extern int input_len;
+extern __le32 output_len; /* at unaligned address, little-endian */
+extern char _text, _end;
+extern char _bss, _ebss;
+extern char _startcode_end;
+extern void startup_continue(void *entry, unsigned long cmdline,
+ unsigned long rd_start, unsigned long rd_end) __noreturn;
+
+void error(char *m) __noreturn;
+
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dest;
+
+ if (d <= s) {
+ while (n--)
+ *d++ = *s++;
+ } else {
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = (char *)s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
+
+void *memcpy(void *d, const void *s, size_t len)
+{
+ char *dest = (char *)d;
+ const char *source = (const char *)s;
+
+ while (len--)
+ *dest++ = *source++;
+ return d;
+}
+
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ ;
+ return sc - s;
+}
+
+char *strchr(const char *s, int c)
+{
+ while (*s) {
+ if (*s == (char)c)
+ return (char *)s;
+ ++s;
+ }
+ return NULL;
+}
+
+int puts(const char *s)
+{
+ const char *nuline = s;
+
+ while ((nuline = strchr(s, '\n')) != NULL) {
+ if (nuline != s)
+ pdc_iodc_print(s, nuline - s);
+ pdc_iodc_print("\r\n", 2);
+ s = nuline + 1;
+ }
+ if (*s != '\0')
+ pdc_iodc_print(s, strlen(s));
+
+ return 0;
+}
+
+static int putchar(int c)
+{
+ char buf[2];
+
+ buf[0] = c;
+ buf[1] = '\0';
+ puts(buf);
+ return c;
+}
+
+void __noreturn error(char *x)
+{
+ puts("\n\n");
+ puts(x);
+ puts("\n\n -- System halted");
+ while (1) /* wait forever */
+ ;
+}
+
+static int print_hex(unsigned long num)
+{
+ const char hex[] = "0123456789abcdef";
+ char str[40];
+ int i = sizeof(str)-1;
+
+ str[i--] = '\0';
+ do {
+ str[i--] = hex[num & 0x0f];
+ num >>= 4;
+ } while (num);
+
+ str[i--] = 'x';
+ str[i] = '0';
+ puts(&str[i]);
+
+ return 0;
+}
+
+int printf(const char *fmt, ...)
+{
+ va_list args;
+ int i = 0;
+
+ va_start(args, fmt);
+
+ while (fmt[i]) {
+ if (fmt[i] != '%') {
+put:
+ putchar(fmt[i++]);
+ continue;
+ }
+
+ if (fmt[++i] == '%')
+ goto put;
+ ++i;
+ print_hex(va_arg(args, unsigned long));
+ }
+
+ va_end(args);
+ return 0;
+}
+
+/* helper functions for libgcc */
+void abort(void)
+{
+ error("aborted.");
+}
+
+#undef malloc
+void *malloc(size_t size)
+{
+ return malloc_gzip(size);
+}
+
+#undef free
+void free(void *ptr)
+{
+ return free_gzip(ptr);
+}
+
+
+static void flush_data_cache(char *start, unsigned long length)
+{
+ char *end = start + length;
+
+ do {
+ asm volatile("fdc 0(%0)" : : "r" (start));
+ asm volatile("fic 0(%%sr0,%0)" : : "r" (start));
+ start += 16;
+ } while (start < end);
+ asm volatile("fdc 0(%0)" : : "r" (end));
+
+ asm ("sync");
+}
+
+unsigned long decompress_kernel(unsigned int started_wide,
+ unsigned int command_line,
+ const unsigned int rd_start,
+ const unsigned int rd_end)
+{
+ char *output;
+ unsigned long len, len_all;
+
+#ifdef CONFIG_64BIT
+ parisc_narrow_firmware = 0;
+#endif
+
+ set_firmware_width_unlocked();
+
+ putchar('U'); /* if you get this p and no more, string storage */
+ /* in $GLOBAL$ is wrong or %dp is wrong */
+ puts("ncompressing ...\n");
+
+ output = (char *) KERNEL_BINARY_TEXT_START;
+ len_all = __pa(SZ_end) - __pa(SZparisc_kernel_start);
+
+ if ((unsigned long) &_startcode_end > (unsigned long) output)
+ error("Bootcode overlaps kernel code");
+
+ len = get_unaligned_le32(&output_len);
+ if (len > len_all)
+ error("Output len too big.");
+ else
+ memset(&output[len], 0, len_all - len);
+
+ /*
+ * Initialize free_mem_ptr and free_mem_end_ptr.
+ */
+ free_mem_ptr = (unsigned long) &_ebss;
+ free_mem_ptr += 2*1024*1024; /* leave 2 MB for stack */
+
+ /* Limit memory for bootoader to 1GB */
+ #define ARTIFICIAL_LIMIT (1*1024*1024*1024)
+ free_mem_end_ptr = PAGE0->imm_max_mem;
+ if (free_mem_end_ptr > ARTIFICIAL_LIMIT)
+ free_mem_end_ptr = ARTIFICIAL_LIMIT;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /* if we have ramdisk this is at end of memory */
+ if (rd_start && rd_start < free_mem_end_ptr)
+ free_mem_end_ptr = rd_start;
+#endif
+
+#ifdef DEBUG
+ printf("startcode_end = %x\n", &_startcode_end);
+ printf("commandline = %x\n", command_line);
+ printf("rd_start = %x\n", rd_start);
+ printf("rd_end = %x\n", rd_end);
+
+ printf("free_ptr = %x\n", free_mem_ptr);
+ printf("free_ptr_end = %x\n", free_mem_end_ptr);
+
+ printf("input_data = %x\n", input_data);
+ printf("input_len = %x\n", input_len);
+ printf("output = %x\n", output);
+ printf("output_len = %x\n", len);
+ printf("output_max = %x\n", len_all);
+#endif
+
+ __decompress(input_data, input_len, NULL, NULL,
+ output, 0, NULL, error);
+
+ flush_data_cache(output, len);
+
+ printf("Booting kernel ...\n\n");
+
+ return (unsigned long) output;
+}
diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000000..a4ce3314e78e
--- /dev/null
+++ b/arch/parisc/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,101 @@
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+#ifndef CONFIG_64BIT
+OUTPUT_FORMAT("elf32-hppa-linux")
+OUTPUT_ARCH(hppa)
+#else
+OUTPUT_FORMAT("elf64-hppa-linux")
+OUTPUT_ARCH(hppa:hppa2.0w)
+#endif
+
+ENTRY(startup)
+
+SECTIONS
+{
+ /* palo loads at 0x60000 */
+ /* loaded kernel will move to 0x10000 */
+ . = 0xe0000; /* should not overwrite palo code */
+
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+
+ /* keep __gp below 0x1000000 */
+#ifdef CONFIG_64BIT
+ . = ALIGN(16);
+ /* Linkage tables */
+ .opd : {
+ *(.opd)
+ } PROVIDE (__gp = .);
+ .plt : {
+ *(.plt)
+ }
+ .dlt : {
+ *(.dlt)
+ }
+#endif
+ _startcode_end = .;
+
+ /* bootloader code and data starts behind area of extracted kernel */
+ . = (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START);
+
+ /* align on next page boundary */
+ . = ALIGN(4096);
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ . = ALIGN(8);
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ . = ALIGN(8);
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ . = ALIGN(8);
+ .rodata.compressed : {
+ *(.rodata.compressed)
+ }
+ . = ALIGN(8);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(4096);
+ _ebss = .;
+ }
+
+ STABS_DEBUG
+ .note 0 : { *(.note) }
+
+ /* Sections to be discarded */
+ DISCARDS
+ /DISCARD/ : {
+#ifdef CONFIG_64BIT
+ /* temporary hack until binutils is fixed to not emit these
+ * for static binaries
+ */
+ *(.PARISC.unwind) /* no unwind data */
+ *(.interp)
+ *(.dynsym)
+ *(.dynstr)
+ *(.dynamic)
+ *(.hash)
+ *(.gnu.hash)
+#endif
+ }
+}
diff --git a/arch/parisc/boot/compressed/vmlinux.scr b/arch/parisc/boot/compressed/vmlinux.scr
new file mode 100644
index 000000000000..dac2d142bcfa
--- /dev/null
+++ b/arch/parisc/boot/compressed/vmlinux.scr
@@ -0,0 +1,10 @@
+SECTIONS
+{
+ .rodata.compressed : {
+ input_len = .;
+ LONG(input_data_end - input_data) input_data = .;
+ *(.data)
+ output_len = . - 4; /* can be at unaligned address */
+ input_data_end = .;
+ }
+}
diff --git a/arch/parisc/boot/install.sh b/arch/parisc/boot/install.sh
new file mode 100644
index 000000000000..8f7c365fad83
--- /dev/null
+++ b/arch/parisc/boot/install.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+#
+# arch/parisc/install.sh, derived from arch/i386/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+verify () {
+ if [ ! -f "$1" ]; then
+ echo "" 1>&2
+ echo " *** Missing file: $1" 1>&2
+ echo ' *** You need to run "make" before "make install".' 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+}
+
+# Make sure the files actually exist
+
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+
+if [ -n "${INSTALLKERNEL}" ]; then
+ if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+ if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+fi
+
+# Default install
+
+if [ "$(basename $2)" = "zImage" ]; then
+# Compressed install
+ echo "Installing compressed kernel"
+ base=vmlinuz
+else
+# Normal install
+ echo "Installing normal kernel"
+ base=vmlinux
+fi
+
+if [ -f $4/$base-$1 ]; then
+ mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+ mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
index 0764d3971cf6..8d41a73bd71b 100644
--- a/arch/parisc/configs/c3000_defconfig
+++ b/arch/parisc/configs/c3000_defconfig
@@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
CONFIG_NET_PKTGEN=m
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 5394b9c5f914..17b98a87e5e2 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i)
_atomic_spin_unlock_irqrestore(v, flags);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
static __inline__ int atomic_read(const atomic_t *v)
{
return READ_ONCE((v)->counter);
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0ba14300cd8e..c601aab2fb36 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -32,22 +32,12 @@ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
}
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
unsigned long int flags;
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval, ret;
u32 tmp;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
- return -EFAULT;
-
_futex_spin_lock_irqsave(uaddr, &flags);
pagefault_disable();
@@ -85,17 +75,9 @@ out_pagefault_enable:
pagefault_enable();
_futex_spin_unlock_irqrestore(uaddr, &flags);
- if (ret == 0) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index a81226257878..e4a657094058 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -63,6 +63,9 @@ static inline void switch_mm(struct mm_struct *prev,
{
unsigned long flags;
+ if (prev == next)
+ return;
+
local_irq_save(flags);
switch_mm_irqs_off(prev, next, tsk);
local_irq_restore(flags);
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 80e742a1c162..bfed09d80bae 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -116,11 +116,15 @@ extern int npmem_ranges;
/* This governs the relationship between virtual and physical addresses.
* If you alter it, make sure to take care of our various fixed mapping
* segments in fixmap.h */
+#if defined(BOOTLOADER)
+#define __PAGE_OFFSET (0) /* bootloader uses physical addresses */
+#else
#ifdef CONFIG_64BIT
#define __PAGE_OFFSET (0x40000000) /* 1GB */
#else
#define __PAGE_OFFSET (0x10000000) /* 256MB */
#endif
+#endif /* BOOTLOADER */
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 7569627a032b..26b4455baa83 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -5,6 +5,8 @@
#if !defined(__ASSEMBLY__)
+extern int parisc_narrow_firmware;
+
extern int pdc_type;
extern unsigned long parisc_cell_num; /* cell number the CPU runs on (PAT) */
extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT) */
diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
index e3c0586260d8..a468a172ee33 100644
--- a/arch/parisc/include/asm/pdcpat.h
+++ b/arch/parisc/include/asm/pdcpat.h
@@ -223,6 +223,18 @@ struct pdc_pat_mem_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_INFO (return info) */
unsigned long clear_time; /* last PDT clear time (since Jan 1970) */
};
+struct pdc_pat_mem_cell_pdt_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_CELL_INFO */
+ u64 reserved:32;
+ u64 cs:1; /* clear status: cleared since the last call? */
+ u64 current_pdt_entries:15;
+ u64 ic:1; /* interleaving had to be changed ? */
+ u64 max_pdt_entries:15;
+ unsigned long good_mem;
+ unsigned long first_dbe_loc; /* first location of double bit error */
+ unsigned long clear_time; /* last PDT clear time (since Jan 1970) */
+};
+
+
struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */
unsigned long actual_count_bytes;
unsigned long pdt_entries;
@@ -325,6 +337,8 @@ extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *va
extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val);
extern int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo);
+extern int pdc_pat_mem_pdt_cell_info(struct pdc_pat_mem_cell_pdt_retinfo *rinfo,
+ unsigned long cell);
extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
unsigned long *pdt_entries_ptr, unsigned long max_entries);
extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index e32936cd7f10..55bfe4affca3 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -14,13 +14,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
-{
- volatile unsigned int *a = __ldcw_align(x);
-
- smp_cond_load_acquire(a, VAL);
-}
-
static inline void arch_spin_lock_flags(arch_spinlock_t *x,
unsigned long flags)
{
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 5979745815a5..775b5d5e41a1 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -40,9 +40,6 @@
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_DONTNEED 4 /* don't need these pages */
-#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */
-#define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */
-#define MADV_VPS_INHERIT 7 /* Inherit parents page size */
/* common/generic parameters */
#define MADV_FREE 8 /* free pages only if memory pressure */
@@ -60,21 +57,16 @@
overrides the coredump filter bits */
#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */
+#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */
+
+#define MADV_HWPOISON 100 /* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
+
/* compatibility flags */
#define MAP_FILE 0
#define MAP_VARIABLE 0
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a0d4dc9f4eb2..3b2bf7ae703b 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
#define SO_PEERGROUPS 0x4034
+#define SO_ZEROCOPY 0x4035
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index f622a311d04a..ab80e5c6f651 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -69,7 +69,15 @@
#include <asm/pdcpat.h>
#include <asm/processor.h> /* for boot_cpu_data */
+#if defined(BOOTLOADER)
+# undef spin_lock_irqsave
+# define spin_lock_irqsave(a, b) { b = 1; }
+# undef spin_unlock_irqrestore
+# define spin_unlock_irqrestore(a, b)
+#else
static DEFINE_SPINLOCK(pdc_lock);
+#endif
+
extern unsigned long pdc_result[NUM_PDC_RESULT];
extern unsigned long pdc_result2[NUM_PDC_RESULT];
@@ -142,8 +150,8 @@ static void convert_to_wide(unsigned long *addr)
int i;
unsigned int *p = (unsigned int *)addr;
- if(unlikely(parisc_narrow_firmware)) {
- for(i = 31; i >= 0; --i)
+ if (unlikely(parisc_narrow_firmware)) {
+ for (i = (NUM_PDC_RESULT-1); i >= 0; --i)
addr[i] = p[i];
}
#endif
@@ -186,6 +194,8 @@ void set_firmware_width(void)
}
#endif /*CONFIG_64BIT*/
+
+#if !defined(BOOTLOADER)
/**
* pdc_emergency_unlock - Unlock the linux pdc lock
*
@@ -979,16 +989,22 @@ int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *pret,
spin_lock_irqsave(&pdc_lock, flags);
retval = mem_pdc_call(PDC_MEM, PDC_MEM_READ_PDT, __pa(pdc_result),
- __pa(pdc_result2));
+ __pa(pdt_entries_ptr));
if (retval == PDC_OK) {
convert_to_wide(pdc_result);
memcpy(pret, pdc_result, sizeof(*pret));
- convert_to_wide(pdc_result2);
- memcpy(pdt_entries_ptr, pdc_result2,
- pret->pdt_entries * sizeof(*pdt_entries_ptr));
}
spin_unlock_irqrestore(&pdc_lock, flags);
+#ifdef CONFIG_64BIT
+ /*
+ * 64-bit kernels should not call this PDT function in narrow mode.
+ * The pdt_entries_ptr array above will now contain 32-bit values
+ */
+ if (WARN_ON_ONCE((retval == PDC_OK) && parisc_narrow_firmware))
+ return PDC_ERROR;
+#endif
+
return retval;
}
@@ -1143,6 +1159,8 @@ void pdc_io_reset_devices(void)
spin_unlock_irqrestore(&pdc_lock, flags);
}
+#endif /* defined(BOOTLOADER) */
+
/* locked by pdc_console_lock */
static int __attribute__((aligned(8))) iodc_retbuf[32];
static char __attribute__((aligned(64))) iodc_dbuf[4096];
@@ -1187,6 +1205,7 @@ print:
return i;
}
+#if !defined(BOOTLOADER)
/**
* pdc_iodc_getc - Read a character (non-blocking) from the PDC console.
*
@@ -1440,6 +1459,29 @@ int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo)
}
/**
+ * pdc_pat_mem_pdt_cell_info - Retrieve information about page deallocation
+ * table of a cell
+ * @rinfo: memory pdt information
+ * @cell: cell number
+ *
+ */
+int pdc_pat_mem_pdt_cell_info(struct pdc_pat_mem_cell_pdt_retinfo *rinfo,
+ unsigned long cell)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_INFO,
+ __pa(&pdc_result), cell);
+ if (retval == PDC_OK)
+ memcpy(rinfo, &pdc_result, sizeof(*rinfo));
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+ return retval;
+}
+
+/**
* pdc_pat_mem_read_cell_pdt - Read PDT entries from (old) PAT firmware
* @pret: array of PDT entries
* @pdt_entries_ptr: ptr to hold number of PDT entries
@@ -1455,14 +1497,14 @@ int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
spin_lock_irqsave(&pdc_lock, flags);
/* PDC_PAT_MEM_CELL_READ is available on early PAT machines only */
retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_READ,
- __pa(&pdc_result), parisc_cell_num, __pa(&pdc_result2));
+ __pa(&pdc_result), parisc_cell_num,
+ __pa(pdt_entries_ptr));
if (retval == PDC_OK) {
/* build up return value as for PDC_PAT_MEM_PD_READ */
entries = min(pdc_result[0], max_entries);
pret->pdt_entries = entries;
pret->actual_count_bytes = entries * sizeof(unsigned long);
- memcpy(pdt_entries_ptr, &pdc_result2, pret->actual_count_bytes);
}
spin_unlock_irqrestore(&pdc_lock, flags);
@@ -1474,6 +1516,8 @@ int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
* pdc_pat_mem_read_pd_pdt - Read PDT entries from (newer) PAT firmware
* @pret: array of PDT entries
* @pdt_entries_ptr: ptr to hold number of PDT entries
+ * @count: number of bytes to read
+ * @offset: offset to start (in bytes)
*
*/
int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
@@ -1524,6 +1568,7 @@ int pdc_pat_mem_get_dimm_phys_location(
return retval;
}
#endif /* CONFIG_64BIT */
+#endif /* defined(BOOTLOADER) */
/***************** 32-bit real-mode calls ***********/
@@ -1633,4 +1678,3 @@ long real64_call(unsigned long fn, ...)
}
#endif /* CONFIG_64BIT */
-
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 5f0067a62738..bd4c0a7471d3 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -41,7 +41,7 @@ static unsigned long pcxl_used_bytes __read_mostly = 0;
static unsigned long pcxl_used_pages __read_mostly = 0;
extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
-static spinlock_t pcxl_res_lock;
+static DEFINE_SPINLOCK(pcxl_res_lock);
static char *pcxl_res_map;
static int pcxl_res_hint;
static int pcxl_res_size;
@@ -390,7 +390,6 @@ pcxl_dma_init(void)
if (pcxl_dma_start == 0)
return 0;
- spin_lock_init(&pcxl_res_lock);
pcxl_res_size = PCXL_DMA_MAP_SIZE >> (PAGE_SHIFT + 3);
pcxl_res_hint = 0;
pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index d02874ecb94d..05730a83895c 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -1,19 +1,20 @@
/*
* Page Deallocation Table (PDT) support
*
- * The Page Deallocation Table (PDT) holds a table with pointers to bad
- * memory (broken RAM modules) which is maintained by firmware.
+ * The Page Deallocation Table (PDT) is maintained by firmware and holds a
+ * list of memory addresses in which memory errors were detected.
+ * The list contains both single-bit (correctable) and double-bit
+ * (uncorrectable) errors.
*
* Copyright 2017 by Helge Deller <deller@gmx.de>
*
- * TODO:
- * - check regularily for new bad memory
- * - add userspace interface with procfs or sysfs
- * - increase number of PDT entries dynamically
+ * possible future enhancements:
+ * - add userspace interface via procfs or sysfs to clear PDT
*/
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <linux/kthread.h>
#include <asm/pdc.h>
#include <asm/pdcpat.h>
@@ -24,11 +25,16 @@ enum pdt_access_type {
PDT_NONE,
PDT_PDC,
PDT_PAT_NEW,
- PDT_PAT_OLD
+ PDT_PAT_CELL
};
static enum pdt_access_type pdt_type;
+/* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
+#define PDT_POLL_INTERVAL_DEFAULT (5*60*HZ)
+#define PDT_POLL_INTERVAL_SHORT (1*60*HZ)
+static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;
+
/* global PDT status information */
static struct pdc_mem_retinfo pdt_status;
@@ -36,6 +42,21 @@ static struct pdc_mem_retinfo pdt_status;
#define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
+/*
+ * Constants for the pdt_entry format:
+ * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
+ * reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
+ * The perm bit indicates whether the error have been verified as a permanent
+ * error (value of 1) or has not been verified, and may be transient (value
+ * of 0). The error_type bit indicates whether the error is a single bit error
+ * (value of 1) or a multiple bit error.
+ * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
+ * 63. Those machines don't provide the perm bit.
+ */
+
+#define PDT_ADDR_PHYS_MASK (pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
+#define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL)
+#define PDT_ADDR_SINGLE_ERR 1UL
/* report PDT entries via /proc/meminfo */
void arch_report_meminfo(struct seq_file *m)
@@ -49,6 +70,68 @@ void arch_report_meminfo(struct seq_file *m)
pdt_status.pdt_entries);
}
+static int get_info_pat_new(void)
+{
+ struct pdc_pat_mem_retinfo pat_rinfo;
+ int ret;
+
+ /* newer PAT machines like C8000 report info for all cells */
+ if (is_pdc_pat())
+ ret = pdc_pat_mem_pdt_info(&pat_rinfo);
+ else
+ return PDC_BAD_PROC;
+
+ pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
+ pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
+ pdt_status.pdt_status = 0;
+ pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
+ pdt_status.good_mem = pat_rinfo.good_mem;
+
+ return ret;
+}
+
+static int get_info_pat_cell(void)
+{
+ struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
+ int ret;
+
+ /* older PAT machines like rp5470 report cell info only */
+ if (is_pdc_pat())
+ ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
+ else
+ return PDC_BAD_PROC;
+
+ pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
+ pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
+ pdt_status.pdt_status = 0;
+ pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
+ pdt_status.good_mem = cell_rinfo.good_mem;
+
+ return ret;
+}
+
+static void report_mem_err(unsigned long pde)
+{
+ struct pdc_pat_mem_phys_mem_location loc;
+ unsigned long addr;
+ char dimm_txt[32];
+
+ addr = pde & PDT_ADDR_PHYS_MASK;
+
+ /* show DIMM slot description on PAT machines */
+ if (is_pdc_pat()) {
+ pdc_pat_mem_get_dimm_phys_location(&loc, addr);
+ sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
+ } else
+ dimm_txt[0] = 0;
+
+ pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
+ addr, dimm_txt,
+ pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
+ pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
+}
+
+
/*
* pdc_pdt_init()
*
@@ -63,18 +146,17 @@ void __init pdc_pdt_init(void)
unsigned long entries;
struct pdc_mem_read_pdt pdt_read_ret;
- if (is_pdc_pat()) {
- struct pdc_pat_mem_retinfo pat_rinfo;
+ pdt_type = PDT_PAT_NEW;
+ ret = get_info_pat_new();
- pdt_type = PDT_PAT_NEW;
- ret = pdc_pat_mem_pdt_info(&pat_rinfo);
- pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
- pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
- pdt_status.pdt_status = 0;
- pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
- pdt_status.good_mem = pat_rinfo.good_mem;
- } else {
+ if (ret != PDC_OK) {
+ pdt_type = PDT_PAT_CELL;
+ ret = get_info_pat_cell();
+ }
+
+ if (ret != PDC_OK) {
pdt_type = PDT_PDC;
+ /* non-PAT machines provide the standard PDC call */
ret = pdc_mem_pdt_info(&pdt_status);
}
@@ -86,13 +168,17 @@ void __init pdc_pdt_init(void)
}
entries = pdt_status.pdt_entries;
- WARN_ON(entries > MAX_PDT_ENTRIES);
+ if (WARN_ON(entries > MAX_PDT_ENTRIES))
+ entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;
- pr_info("PDT: size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
- " good_mem %lu\n",
+ pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
+ " good_mem %lu MB\n",
+ pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
+ pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
+ : __stringify(PDT_PAT_NEW),
pdt_status.pdt_size, pdt_status.pdt_entries,
pdt_status.pdt_status, pdt_status.first_dbe_loc,
- pdt_status.good_mem);
+ pdt_status.good_mem / 1024 / 1024);
if (entries == 0) {
pr_info("PDT: Firmware reports all memory OK.\n");
@@ -112,15 +198,12 @@ void __init pdc_pdt_init(void)
#ifdef CONFIG_64BIT
struct pdc_pat_mem_read_pd_retinfo pat_pret;
- /* try old obsolete PAT firmware function first */
- pdt_type = PDT_PAT_OLD;
- ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
- MAX_PDT_ENTRIES);
- if (ret != PDC_OK) {
- pdt_type = PDT_PAT_NEW;
+ if (pdt_type == PDT_PAT_CELL)
+ ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
+ MAX_PDT_ENTRIES);
+ else
ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
MAX_PDT_TABLE_SIZE, 0);
- }
#else
ret = PDC_BAD_PROC;
#endif
@@ -128,27 +211,142 @@ void __init pdc_pdt_init(void)
if (ret != PDC_OK) {
pdt_type = PDT_NONE;
- pr_debug("PDT type %d, retval = %d\n", pdt_type, ret);
+ pr_warn("PDT: Get PDT entries failed with %d\n", ret);
return;
}
for (i = 0; i < pdt_status.pdt_entries; i++) {
- struct pdc_pat_mem_phys_mem_location loc;
+ report_mem_err(pdt_entry[i]);
+
+ /* mark memory page bad */
+ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
+ }
+}
- /* get DIMM slot number */
- loc.dimm_slot = 0xff;
+
+/*
+ * This is the PDT kernel thread main loop.
+ */
+
+static int pdt_mainloop(void *unused)
+{
+ struct pdc_mem_read_pdt pdt_read_ret;
+ struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
+ unsigned long old_num_entries;
+ unsigned long *bad_mem_ptr;
+ int num, ret;
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ old_num_entries = pdt_status.pdt_entries;
+
+ schedule_timeout(pdt_poll_interval);
+ if (kthread_should_stop())
+ break;
+
+ /* Do we have new PDT entries? */
+ switch (pdt_type) {
+ case PDT_PAT_NEW:
+ ret = get_info_pat_new();
+ break;
+ case PDT_PAT_CELL:
+ ret = get_info_pat_cell();
+ break;
+ default:
+ ret = pdc_mem_pdt_info(&pdt_status);
+ break;
+ }
+
+ if (ret != PDC_OK) {
+ pr_warn("PDT: unexpected failure %d\n", ret);
+ return -EINVAL;
+ }
+
+ /* if no new PDT entries, just wait again */
+ num = pdt_status.pdt_entries - old_num_entries;
+ if (num <= 0)
+ continue;
+
+ /* decrease poll interval in case we found memory errors */
+ if (pdt_status.pdt_entries &&
+ pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
+ pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;
+
+ /* limit entries to get */
+ if (num > MAX_PDT_ENTRIES) {
+ num = MAX_PDT_ENTRIES;
+ pdt_status.pdt_entries = old_num_entries + num;
+ }
+
+ /* get new entries */
+ switch (pdt_type) {
#ifdef CONFIG_64BIT
- pdc_pat_mem_get_dimm_phys_location(&loc, pdt_entry[i]);
+ case PDT_PAT_CELL:
+ if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
+ pr_crit("PDT: too many entries.\n");
+ return -ENOMEM;
+ }
+ ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
+ MAX_PDT_ENTRIES);
+ bad_mem_ptr = &pdt_entry[old_num_entries];
+ break;
+ case PDT_PAT_NEW:
+ ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
+ pdt_entry,
+ num * sizeof(unsigned long),
+ old_num_entries * sizeof(unsigned long));
+ bad_mem_ptr = &pdt_entry[0];
+ break;
#endif
+ default:
+ ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
+ pdt_entry);
+ bad_mem_ptr = &pdt_entry[old_num_entries];
+ break;
+ }
- pr_warn("PDT: BAD PAGE #%d at 0x%08lx, "
- "DIMM slot %02x (error_type = %lu)\n",
- i,
- pdt_entry[i] & PAGE_MASK,
- loc.dimm_slot,
- pdt_entry[i] & 1);
+ /* report and mark memory broken */
+ while (num--) {
+ unsigned long pde = *bad_mem_ptr++;
- /* mark memory page bad */
- memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
+ report_mem_err(pde);
+
+#ifdef CONFIG_MEMORY_FAILURE
+ if ((pde & PDT_ADDR_PERM_ERR) ||
+ ((pde & PDT_ADDR_SINGLE_ERR) == 0))
+ memory_failure(pde >> PAGE_SHIFT, 0, 0);
+ else
+ soft_offline_page(
+ pfn_to_page(pde >> PAGE_SHIFT), 0);
+#else
+ pr_crit("PDT: memory error at 0x%lx ignored.\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
+ "for real handling.\n",
+ pde & PDT_ADDR_PHYS_MASK);
+#endif
+
+ }
}
+
+ return 0;
}
+
+
+static int __init pdt_initcall(void)
+{
+ struct task_struct *kpdtd_task;
+
+ if (pdt_type == PDT_NONE)
+ return -ENODEV;
+
+ kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd");
+ if (IS_ERR(kpdtd_task))
+ return PTR_ERR(kpdtd_task);
+
+ wake_up_process(kpdtd_task);
+
+ return 0;
+}
+
+late_initcall(pdt_initcall);
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 6017a5af2e6e..0813359049ae 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -69,7 +69,7 @@ struct rdr_tbl_ent {
static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
static int perf_enabled __read_mostly;
-static spinlock_t perf_lock;
+static DEFINE_SPINLOCK(perf_lock);
struct parisc_device *cpu_device __read_mostly;
/* RDRs to write for PCX-W */
@@ -533,8 +533,6 @@ static int __init perf_init(void)
/* Patch the images to match the system */
perf_patch_images();
- spin_lock_init(&perf_lock);
-
/* TODO: this only lets us access the first cpu.. what to do for SMP? */
cpu_device = per_cpu(cpu_data, 0).dev;
printk("Performance monitoring counters enabled for %s\n",
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 0ab32779dfa7..a778bd3c107c 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -30,6 +30,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/seq_file.h>
+#include <linux/random.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <asm/param.h>
@@ -89,7 +90,7 @@ init_percpu_prof(unsigned long cpunum)
* (return 1). If so, initialize the chip and tell other partners in crime
* they have work to do.
*/
-static int processor_probe(struct parisc_device *dev)
+static int __init processor_probe(struct parisc_device *dev)
{
unsigned long txn_addr;
unsigned long cpuid;
@@ -237,28 +238,45 @@ static int processor_probe(struct parisc_device *dev)
*/
void __init collect_boot_cpu_data(void)
{
+ unsigned long cr16_seed;
+
memset(&boot_cpu_data, 0, sizeof(boot_cpu_data));
+ cr16_seed = get_cycles();
+ add_device_randomness(&cr16_seed, sizeof(cr16_seed));
+
boot_cpu_data.cpu_hz = 100 * PAGE0->mem_10msec; /* Hz of this PARISC */
/* get CPU-Model Information... */
#define p ((unsigned long *)&boot_cpu_data.pdc.model)
- if (pdc_model_info(&boot_cpu_data.pdc.model) == PDC_OK)
+ if (pdc_model_info(&boot_cpu_data.pdc.model) == PDC_OK) {
printk(KERN_INFO
"model %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]);
+
+ add_device_randomness(&boot_cpu_data.pdc.model,
+ sizeof(boot_cpu_data.pdc.model));
+ }
#undef p
- if (pdc_model_versions(&boot_cpu_data.pdc.versions, 0) == PDC_OK)
+ if (pdc_model_versions(&boot_cpu_data.pdc.versions, 0) == PDC_OK) {
printk(KERN_INFO "vers %08lx\n",
boot_cpu_data.pdc.versions);
- if (pdc_model_cpuid(&boot_cpu_data.pdc.cpuid) == PDC_OK)
+ add_device_randomness(&boot_cpu_data.pdc.versions,
+ sizeof(boot_cpu_data.pdc.versions));
+ }
+
+ if (pdc_model_cpuid(&boot_cpu_data.pdc.cpuid) == PDC_OK) {
printk(KERN_INFO "CPUID vers %ld rev %ld (0x%08lx)\n",
(boot_cpu_data.pdc.cpuid >> 5) & 127,
boot_cpu_data.pdc.cpuid & 31,
boot_cpu_data.pdc.cpuid);
+ add_device_randomness(&boot_cpu_data.pdc.cpuid,
+ sizeof(boot_cpu_data.pdc.cpuid));
+ }
+
if (pdc_model_capabilities(&boot_cpu_data.pdc.capabilities) == PDC_OK)
printk(KERN_INFO "capabilities 0x%lx\n",
boot_cpu_data.pdc.capabilities);
@@ -414,12 +432,12 @@ show_cpuinfo (struct seq_file *m, void *v)
return 0;
}
-static const struct parisc_device_id processor_tbl[] = {
+static const struct parisc_device_id processor_tbl[] __initconst = {
{ HPHW_NPROC, HVERSION_REV_ANY_ID, HVERSION_ANY_ID, SVERSION_ANY_ID },
{ 0, }
};
-static struct parisc_driver cpu_driver = {
+static struct parisc_driver cpu_driver __refdata = {
.name = "CPU",
.id_table = processor_tbl,
.probe = processor_probe
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 1db58e546230..cc9963421a19 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -162,6 +162,7 @@ ENDPROC_CFI(restore_control_regs)
.text
.align 128
ENTRY_CFI(rfi_virt2real)
+#if !defined(BOOTLOADER)
/* switch to real mode... */
rsm PSW_SM_I,%r0
load32 PA(rfi_v2r_1), %r1
@@ -191,6 +192,7 @@ ENTRY_CFI(rfi_virt2real)
nop
rfi_v2r_1:
tophys_r1 %r2
+#endif /* defined(BOOTLOADER) */
bv 0(%r2)
nop
ENDPROC_CFI(rfi_virt2real)
@@ -198,6 +200,7 @@ ENDPROC_CFI(rfi_virt2real)
.text
.align 128
ENTRY_CFI(rfi_real2virt)
+#if !defined(BOOTLOADER)
rsm PSW_SM_I,%r0
load32 (rfi_r2v_1), %r1
nop
@@ -226,6 +229,7 @@ ENTRY_CFI(rfi_real2virt)
nop
rfi_r2v_1:
tovirt_r1 %r2
+#endif /* defined(BOOTLOADER) */
bv 0(%r2)
nop
ENDPROC_CFI(rfi_real2virt)
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 1b73690477c5..48dc7d4d20bb 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -34,7 +34,7 @@
extern struct unwind_table_entry __start___unwind[];
extern struct unwind_table_entry __stop___unwind[];
-static spinlock_t unwind_lock;
+static DEFINE_SPINLOCK(unwind_lock);
/*
* the kernel unwind block is not dynamically allocated so that
* we can call unwind_init as early in the bootup process as
@@ -181,8 +181,6 @@ int __init unwind_init(void)
start = (long)&__start___unwind[0];
stop = (long)&__stop___unwind[0];
- spin_lock_init(&unwind_lock);
-
printk("unwind_init: start = 0x%lx, end = 0x%lx, entries = %lu\n",
start, stop,
(stop - start) / sizeof(struct unwind_table_entry));
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 99115cd9e790..865a7f796c7f 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -27,8 +27,6 @@
#include <linux/compiler.h>
#include <linux/uaccess.h>
-DECLARE_PER_CPU(struct exception_data, exception_data);
-
#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3))
#define get_kernel_space() (0)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 36f858c37ca7..809c468edab1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -85,6 +85,17 @@ config NMI_IPI
depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
default y
+config PPC_WATCHDOG
+ bool
+ depends on HARDLOCKUP_DETECTOR
+ depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+ default y
+ help
+ This is a placeholder when the powerpc hardlockup detector
+ watchdog is selected (arch/powerpc/kernel/watchdog.c). It is
+ seleted via the generic lockup detector menu which is why we
+ have no standalone config option for it here.
+
config STACKTRACE_SUPPORT
bool
default y
@@ -165,7 +176,7 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
- select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION)
+ select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
@@ -199,7 +210,7 @@ config PPC
select HAVE_OPTPROBES if PPC64
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
- select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
@@ -356,10 +367,6 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
-config PPC_EMULATE_SSTEP
- bool
- default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
-
config ZONE_DMA32
bool
default y if PPC64
@@ -394,7 +401,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
config MATH_EMULATION
bool "Math emulation"
- depends on 4xx || 8xx || PPC_MPC832x || BOOKE
+ depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
---help---
Some PowerPC chips designed for embedded applications do not have
a floating-point unit and therefore do not implement the
@@ -956,9 +963,9 @@ config PPC_PCI_CHOICE
config PCI
bool "PCI support" if PPC_PCI_CHOICE
- default y if !40x && !CPM2 && !8xx && !PPC_83xx \
+ default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \
&& !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
- default PCI_QSPAN if !4xx && !CPM2 && 8xx
+ default PCI_QSPAN if PPC_8xx
select GENERIC_PCI_IOMAP
help
Find out whether your system includes a PCI bus. PCI is the name of
@@ -974,7 +981,7 @@ config PCI_SYSCALL
config PCI_QSPAN
bool "QSpan PCI"
- depends on !4xx && !CPM2 && 8xx
+ depends on PPC_8xx
select PPC_I8259
help
Say Y here if you have a system based on a Motorola 8xx-series
@@ -1165,12 +1172,23 @@ config CONSISTENT_SIZE
config PIN_TLB
bool "Pinned Kernel TLBs (860 ONLY)"
- depends on ADVANCED_OPTIONS && 8xx
+ depends on ADVANCED_OPTIONS && PPC_8xx && \
+ !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX
+
+config PIN_TLB_DATA
+ bool "Pinned TLB for DATA"
+ depends on PIN_TLB
+ default y
config PIN_TLB_IMMR
bool "Pinned TLB for IMMR"
depends on PIN_TLB
default y
+
+config PIN_TLB_TEXT
+ bool "Pinned TLB for TEXT"
+ depends on PIN_TLB
+ default y
endmenu
if PPC64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2b3e7a00c9e..1381693a4a51 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -250,7 +250,7 @@ KBUILD_AFLAGS += $(aflags-y)
KBUILD_CFLAGS += $(cflags-y)
head-y := arch/powerpc/kernel/head_$(BITS).o
-head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
+head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o
head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o
@@ -317,6 +317,10 @@ PHONY += ppc64le_defconfig
ppc64le_defconfig:
$(call merge_into_defconfig,ppc64_defconfig,le)
+PHONY += powernv_be_defconfig
+powernv_be_defconfig:
+ $(call merge_into_defconfig,powernv_defconfig,be)
+
PHONY += mpc85xx_defconfig
mpc85xx_defconfig:
$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 9d3bd4c45a24..f7da65169124 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -564,7 +564,7 @@ void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
fbdv = 16;
cbdv = ((pllmr & 0x00060000) >> 17) + 1; /* CPU:PLB */
opdv = ((pllmr & 0x00018000) >> 15) + 1; /* PLB:OPB */
- ppdv = ((pllmr & 0x00001800) >> 13) + 1; /* PLB:PCI */
+ ppdv = ((pllmr & 0x00006000) >> 13) + 1; /* PLB:PCI */
epdv = ((pllmr & 0x00001800) >> 11) + 2; /* PLB:EBC */
udiv = ((cpc0_cr0 & 0x3e) >> 1) + 1;
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6f952fe1f084..c4e6fe35c075 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -107,17 +107,18 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
- oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
- uartlite.c mpc52xx-psc.c opal.c
+ oflib.c ofconsole.c cuboot.c cpm-serial.c \
+ uartlite.c opal.c
+src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
ifndef CONFIG_PPC64_BOOT_WRAPPER
src-wlib-y += crtsavres.S
endif
src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
-src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
+src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
src-plat-y := of.c epapr.c
src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
@@ -132,7 +133,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
treeboot-iss4xx.c treeboot-currituck.c \
treeboot-akebono.c \
simpleboot.c fixed-head.S virtex.c
-src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
+src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 12866ccb5694..dcf2f15e6797 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -26,17 +26,17 @@ _zimage_start_opd:
#ifdef __powerpc64__
.balign 8
-p_start: .llong _start
-p_etext: .llong _etext
-p_bss_start: .llong __bss_start
-p_end: .llong _end
-
-p_toc: .llong __toc_start + 0x8000 - p_base
-p_dyn: .llong __dynamic_start - p_base
-p_rela: .llong __rela_dyn_start - p_base
-p_prom: .llong 0
+p_start: .8byte _start
+p_etext: .8byte _etext
+p_bss_start: .8byte __bss_start
+p_end: .8byte _end
+
+p_toc: .8byte __toc_start + 0x8000 - p_base
+p_dyn: .8byte __dynamic_start - p_base
+p_rela: .8byte __rela_dyn_start - p_base
+p_prom: .8byte 0
.weak _platform_stack_top
-p_pstack: .llong _platform_stack_top
+p_pstack: .8byte _platform_stack_top
#else
p_start: .long _start
p_etext: .long _etext
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index 475953ada707..f10a64aeb83b 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -52,6 +52,7 @@
clocks {
mmc_clk: mmc_clk {
compatible = "fixed-clock";
+ #clock-cells = <0>;
clock-frequency = <50000000>;
clock-output-names = "mmc_clk";
};
@@ -359,20 +360,6 @@
interrupts = <31 0x4 15 0x84>;
};
- mmc0: sdhci@020c0000 {
- compatible = "st,sdhci-stih407", "st,sdhci";
- status = "disabled";
- reg = <0x020c0000 0x20000>;
- reg-names = "mmc";
- interrupt-parent = <&UIC1_3>;
- interrupts = <21 0x4 22 0x4>;
- interrupt-names = "mmcirq";
- pinctrl-names = "default";
- pinctrl-0 = <>;
- clock-names = "mmc";
- clocks = <&mmc_clk>;
- };
-
plb6 {
compatible = "ibm,plb6";
#address-cells = <2>;
@@ -501,6 +488,24 @@
/*RXDE*/ 4 &UIC1_2 13 0x4>;
};
+ mmc0: mmc@20c0000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ reg = <0x020c0000 0x20000>;
+ reg-names = "mmc";
+ interrupts = <21 0x4>;
+ interrupt-parent = <&UIC1_3>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <>;
+ clock-names = "mmc";
+ clocks = <&mmc_clk>;
+ bus-width = <4>;
+ non-removable;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ sd-uhs-ddr50;
+ };
+
opb {
compatible = "ibm,opb";
#address-cells = <1>;
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index 68e388ee94fe..c63299f9fdd9 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -80,4 +80,12 @@
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest) mftb dest
+#define MFTBU(dest) mftbu dest
+#else
+#define MFTBL(dest) mfspr dest, SPRN_TBRL
+#define MFTBU(dest) mfspr dest, SPRN_TBRU
+#endif
+
#endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index e04c1e4063ae..7b5c02b1afd0 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -120,15 +120,19 @@ int serial_console_init(void)
if (dt_is_compatible(devp, "ns16550") ||
dt_is_compatible(devp, "pnpPNP,501"))
rc = ns16550_console_init(devp, &serial_cd);
+#ifdef CONFIG_EMBEDDED6xx
else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
rc = mpsc_console_init(devp, &serial_cd);
+#endif
else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
rc = cpm_console_init(devp, &serial_cd);
+#ifdef CONFIG_PPC_MPC52XX
else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
rc = mpc5200_psc_console_init(devp, &serial_cd);
+#endif
else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
rc = uartlite_console_init(devp, &serial_cd);
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index 243b8497d58b..ec069177d942 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -71,32 +71,18 @@ udelay:
add r4,r4,r5
addi r4,r4,-1
divw r4,r4,r5 /* BUS ticks */
-#ifdef CONFIG_8xx
-1: mftbu r5
- mftb r6
- mftbu r7
-#else
-1: mfspr r5, SPRN_TBRU
- mfspr r6, SPRN_TBRL
- mfspr r7, SPRN_TBRU
-#endif
+1: MFTBU(r5)
+ MFTBL(r6)
+ MFTBU(r7)
cmpw 0,r5,r7
bne 1b /* Get [synced] base time */
addc r9,r6,r4 /* Compute end time */
addze r8,r5
-#ifdef CONFIG_8xx
-2: mftbu r5
-#else
-2: mfspr r5, SPRN_TBRU
-#endif
+2: MFTBU(r5)
cmpw 0,r5,r8
blt 2b
bgt 3f
-#ifdef CONFIG_8xx
- mftb r6
-#else
- mfspr r6, SPRN_TBRL
-#endif
+ MFTBL(r6)
cmpw 0,r6,r9
blt 2b
3: blr
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index 3438ed99c088..e57344c3b0d7 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -64,4 +64,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index 36c44c0b560c..0f66f8a87be8 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -64,4 +64,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index ad2156c6e2fc..b5cc7426c21f 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -72,4 +72,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
index 28adb782ec51..caab658d1da1 100644
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ b/arch/powerpc/configs/40x/klondike_defconfig
@@ -26,7 +26,6 @@ CONFIG_SCSI_SAS_ATTRS=y
# CONFIG_VT is not set
# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index a00f434c4d47..e0b1489b7c7b 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -62,4 +62,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
index e500e6a12b3e..aac06d2ad01a 100644
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ b/arch/powerpc/configs/40x/obs600_defconfig
@@ -72,4 +72,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig
index 65dc084a154c..a2b2770eee8f 100644
--- a/arch/powerpc/configs/40x/virtex_defconfig
+++ b/arch/powerpc/configs/40x/virtex_defconfig
@@ -41,9 +41,9 @@ CONFIG_NETDEVICES=y
CONFIG_SERIO_XILINX_XPS_PS2=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_XILINX_HWICAP=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
@@ -74,4 +74,3 @@ CONFIG_FONT_8x16=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_KERNEL=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index 567f99bd64a3..6faa03cd661c 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -57,4 +57,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 143b2fbddb46..9fcd361607e2 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -123,7 +123,6 @@ CONFIG_NLS_DEFAULT="n"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_XMON=y
@@ -135,5 +134,4 @@ CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1_PPC=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 477d99fefd9a..6f3a6ecc81e7 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -55,4 +55,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index 3799a26de6f4..5f1df5fe4453 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -81,7 +81,6 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NLS_DEFAULT="n"
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_XMON=y
@@ -94,5 +93,4 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index c265f54ab9e5..e2b6578993d5 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -59,5 +59,4 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index bb6bd6d90821..f6dc23fef683 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -84,18 +84,14 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e8e6a6999852..bae6b26bcfba 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -92,8 +92,10 @@ CONFIG_MMC_DEBUG=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_MMC_SDHCI_ST=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_M41T80=y
+CONFIG_RESET_CONTROLLER=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
@@ -115,7 +117,6 @@ CONFIG_PRINTK_TIME=y
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_CRYPTO_CBC=y
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 060f2edddb71..4453a4590b1a 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -47,8 +47,6 @@ CONFIG_FUSION_LOGGING=y
CONFIG_NETDEVICES=y
CONFIG_IBM_EMAC=y
# CONFIG_WLAN is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
# CONFIG_MOUSE_PS2_ALPS is not set
# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
# CONFIG_MOUSE_PS2_SYNAPTICS is not set
@@ -94,4 +92,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index 115a6b2be18b..d24bfa6ecd62 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -63,7 +63,6 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_PPC_EARLY_DEBUG=y
@@ -72,5 +71,4 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index b999048c4ae6..5d3f685a7af8 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -60,4 +60,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index b8c9ee45d0a2..7b8355a5698d 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -66,4 +66,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index a4bb048448da..918cfb63f0c8 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -83,18 +83,14 @@ CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index b3792fd8111d..1e04122912f3 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -67,4 +67,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index ff6f86241418..42cc7b4ed95f 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -61,4 +61,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig
index ce052064bcbb..99cc3dc02df1 100644
--- a/arch/powerpc/configs/44x/virtex5_defconfig
+++ b/arch/powerpc/configs/44x/virtex5_defconfig
@@ -40,9 +40,9 @@ CONFIG_NETDEVICES=y
CONFIG_SERIO_XILINX_XPS_PS2=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_XILINX_HWICAP=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
@@ -73,4 +73,3 @@ CONFIG_FONT_8x16=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_KERNEL=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index ab932488e68b..b5c866073efd 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -97,4 +97,3 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig
index c1faac800806..73948e88ac82 100644
--- a/arch/powerpc/configs/52xx/cm5200_defconfig
+++ b/arch/powerpc/configs/52xx/cm5200_defconfig
@@ -77,4 +77,3 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
index 9493b02ac660..6fc7f786c83c 100644
--- a/arch/powerpc/configs/52xx/lite5200b_defconfig
+++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
@@ -14,6 +14,7 @@ CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
CONFIG_PPC_LITE5200=y
# CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -44,7 +45,6 @@ CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -62,4 +62,3 @@ CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
index fe8126bc1655..ae2a1f74103b 100644
--- a/arch/powerpc/configs/52xx/motionpro_defconfig
+++ b/arch/powerpc/configs/52xx/motionpro_defconfig
@@ -41,16 +41,16 @@ CONFIG_ATA=y
CONFIG_PATA_MPC52xx=y
CONFIG_NETDEVICES=y
CONFIG_FEC_MPC52xx=y
-CONFIG_MARVELL_PHY=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
+CONFIG_ICPLUS_PHY=y
CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_QSEMI_PHY=y
CONFIG_SMSC_PHY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_ICPLUS_PHY=y
-CONFIG_MDIO_BITBANG=y
+CONFIG_VITESSE_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -90,4 +90,3 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
index b8b316b884aa..0777e6efd22d 100644
--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -48,7 +48,6 @@ CONFIG_PATA_PLATFORM=y
CONFIG_NETDEVICES=y
CONFIG_FEC_MPC52xx=y
CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -92,4 +91,3 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig
index b60cac088a7b..dd884df32dfd 100644
--- a/arch/powerpc/configs/83xx/asp8347_defconfig
+++ b/arch/powerpc/configs/83xx/asp8347_defconfig
@@ -42,7 +42,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -71,4 +70,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
index 9547dcdd6489..d21b5cb365f2 100644
--- a/arch/powerpc/configs/83xx/kmeter1_defconfig
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -55,7 +55,6 @@ CONFIG_HDLC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HW_RANDOM=y
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index 80aa844c1428..1f69f4edf074 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -48,8 +48,6 @@ CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -87,4 +85,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index d89d13bc6901..797fc3ffddee 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -47,7 +47,6 @@ CONFIG_MD_RAID1=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -85,4 +84,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
index e789518a2881..4f914906ee4b 100644
--- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
@@ -14,7 +14,6 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC832x_MDS=y
-CONFIG_QUICC_ENGINE=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -36,7 +35,6 @@ CONFIG_SCSI=y
CONFIG_NETDEVICES=y
CONFIG_UCC_GETH=y
CONFIG_DAVICOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -50,6 +48,7 @@ CONFIG_I2C_MPC=y
CONFIG_WATCHDOG=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -59,4 +58,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
index 917a49ca2bd1..a484eb8401e8 100644
--- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -14,7 +14,7 @@ CONFIG_LDM_PARTITION=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC832x_RDB=y
-CONFIG_QUICC_ENGINE=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -38,7 +38,6 @@ CONFIG_NETDEVICES=y
CONFIG_UCC_GETH=y
CONFIG_E1000=y
CONFIG_ICPLUS_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -46,7 +45,6 @@ CONFIG_ICPLUS_PHY=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -62,6 +60,7 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
CONFIG_MMC_SPI=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
@@ -78,4 +77,3 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
index 00f636e95cc8..37f4d93b3f81 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -49,7 +49,6 @@ CONFIG_MD_RAID1=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -84,4 +83,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
index a539d44d1dba..7adb6708a761 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -75,4 +75,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
index 9f0ddc830c82..d7ce3551529d 100644
--- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
@@ -35,7 +35,6 @@ CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -58,4 +57,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
index ceed4c1f0ab5..92134cee3f37 100644
--- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
@@ -14,7 +14,6 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC836x_MDS=y
-CONFIG_QUICC_ENGINE=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -41,7 +40,6 @@ CONFIG_SCSI=y
CONFIG_NETDEVICES=y
CONFIG_UCC_GETH=y
CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -55,6 +53,7 @@ CONFIG_I2C_MPC=y
CONFIG_WATCHDOG=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -64,4 +63,3 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
index a6819bf3ef5e..97f7ea5f205f 100644
--- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
@@ -12,7 +12,6 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC836x_RDK=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -39,11 +38,9 @@ CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
CONFIG_UCC_GETH=y
CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_QE=y
@@ -63,6 +60,7 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -72,4 +70,3 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
index 4bd1992e4d98..ee7510a33d06 100644
--- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC837x_MDS=y
+CONFIG_GEN_RTC=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -35,7 +36,6 @@ CONFIG_SATA_FSL=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -43,7 +43,6 @@ CONFIG_MARVELL_PHY=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_ROOT_NFS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index 2d4bb63882b8..8966a9af4230 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC837x_RDB=y
+CONFIG_GEN_RTC=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -41,9 +42,7 @@ CONFIG_MD_RAID456=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_MARVELL_PHY=y
-CONFIG_FIXED_PHY=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -51,7 +50,6 @@ CONFIG_INPUT_FF_MEMLESS=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -86,4 +84,3 @@ CONFIG_CRC_T10DIF=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig
index b3380dbd1925..7d74699334da 100644
--- a/arch/powerpc/configs/83xx/sbc834x_defconfig
+++ b/arch/powerpc/configs/83xx/sbc834x_defconfig
@@ -11,6 +11,7 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_SBC834x=y
+CONFIG_GEN_RTC=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -41,7 +42,6 @@ CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -52,7 +52,6 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=2
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -72,5 +71,4 @@ CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index a917f7afb4f9..dd98f43b2fb8 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -22,7 +22,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_GE_IMP3A=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_CPM2=y
CONFIG_HIGHMEM=y
@@ -161,6 +160,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_RX8581=y
CONFIG_DMADEVICES=y
CONFIG_FSL_DMA=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -233,5 +233,4 @@ CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig
index bd814dfb0bbd..9ce6f48cfb61 100644
--- a/arch/powerpc/configs/85xx/ksi8560_defconfig
+++ b/arch/powerpc/configs/85xx/ksi8560_defconfig
@@ -8,6 +8,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_KSI8560=y
CONFIG_CPM2=y
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
@@ -39,14 +40,12 @@ CONFIG_FS_ENET=y
CONFIG_FS_ENET_MDIO_FCC=y
CONFIG_GIANFAR=y
CONFIG_MARVELL_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -57,4 +56,3 @@ CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 32af10def641..5fbc3f904046 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_MPC8540_ADS=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
# CONFIG_SECCOMP is not set
@@ -30,7 +31,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -38,7 +38,6 @@ CONFIG_GIANFAR=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -47,4 +46,3 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index a52b2170ee33..ff981d7905c7 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -7,6 +7,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_MPC8560_ADS=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
# CONFIG_SECCOMP is not set
@@ -32,16 +33,14 @@ CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
CONFIG_GIANFAR=y
CONFIG_E1000=y
-CONFIG_MARVELL_PHY=y
CONFIG_DAVICOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_MARVELL_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -50,4 +49,3 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 002bb48abaa3..974f0706d777 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_MPC85xx_CDS=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
# CONFIG_SECCOMP is not set
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E1000=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -43,7 +43,6 @@ CONFIG_E1000=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -52,4 +51,3 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
index 97ae02377cf3..7e3e84a842e4 100644
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ b/arch/powerpc/configs/85xx/sbc8548_defconfig
@@ -6,6 +6,7 @@ CONFIG_EXPERT=y
CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_SBC8548=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
# CONFIG_SECCOMP is not set
@@ -36,7 +37,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -44,10 +44,8 @@ CONFIG_BROADCOM_PHY=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
# CONFIG_USB_SUPPORT is not set
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig
index 13579cb30539..6106fadbbd8b 100644
--- a/arch/powerpc/configs/85xx/socrates_defconfig
+++ b/arch/powerpc/configs/85xx/socrates_defconfig
@@ -42,8 +42,6 @@ CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_MARVELL_PHY=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=800
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -86,4 +84,3 @@ CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_FONTS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index 384926f3ce1d..5b9cc01b9098 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -39,8 +39,6 @@ CONFIG_SCSI_CONSTANTS=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_MARVELL_PHY=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=m
# CONFIG_VT is not set
@@ -68,4 +66,3 @@ CONFIG_CRC_T10DIF=m
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_BDI_SWITCH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
index 908f3885f4a5..98982a0e82d8 100644
--- a/arch/powerpc/configs/85xx/tqm8540_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8540=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -35,14 +36,12 @@ CONFIG_BLK_DEV_VIA82CXXX=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -56,4 +55,3 @@ CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
index f47e57610b7c..a6e21db1dafe 100644
--- a/arch/powerpc/configs/85xx/tqm8541_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8541=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
index 42f5d0a7698e..2697e4e8a761 100644
--- a/arch/powerpc/configs/85xx/tqm8548_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -43,7 +43,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -66,4 +65,3 @@ CONFIG_ROOT_NFS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
index 71552b7929cd..ca1de3979474 100644
--- a/arch/powerpc/configs/85xx/tqm8555_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8555=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
index 25aac973d6d7..ca3b8c8ef30f 100644
--- a/arch/powerpc/configs/85xx/tqm8560_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -9,6 +9,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8560=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
CONFIG_NET=y
@@ -35,7 +36,6 @@ CONFIG_BLK_DEV_VIA82CXXX=y
CONFIG_NETDEVICES=y
CONFIG_GIANFAR=y
CONFIG_E100=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -44,7 +44,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
@@ -58,4 +57,3 @@ CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index 72900b84d3e0..6531139a8a8d 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -54,7 +54,6 @@ CONFIG_IP_PIMSM_V2=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_IPV6=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
@@ -86,7 +85,6 @@ CONFIG_DUMMY=y
CONFIG_GIANFAR=y
CONFIG_E1000=y
CONFIG_BROADCOM_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
CONFIG_SERIO_LIBPS2=y
@@ -107,8 +105,8 @@ CONFIG_SENSORS_LM90=y
CONFIG_WATCHDOG=y
CONFIG_USB=y
CONFIG_USB_MON=y
-CONFIG_USB_ISP1760=y
CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_PCA955X=y
@@ -143,4 +141,3 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 6a3f825452e9..935ea3ade7de 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -12,6 +12,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_PPC_ADDER875=y
CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -41,7 +42,6 @@ CONFIG_DAVICOM_PHY=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
# CONFIG_USB_SUPPORT is not set
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index 8d3e3c41258d..12f397d403c6 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -45,7 +45,6 @@ CONFIG_PARPORT_PC_FIFO=y
CONFIG_BLK_DEV_FD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
@@ -120,5 +119,4 @@ CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/be.config b/arch/powerpc/configs/be.config
new file mode 100644
index 000000000000..c5cdc99a6530
--- /dev/null
+++ b/arch/powerpc/configs/be.config
@@ -0,0 +1 @@
+CONFIG_CPU_BIG_ENDIAN=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index 7c9d95370150..f1552af9eecc 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -27,6 +27,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_BINFMT_MISC=y
@@ -197,7 +198,6 @@ CONFIG_TUN=m
# CONFIG_ATM_DRIVERS is not set
CONFIG_MV643XX_ETH=y
CONFIG_VITESSE_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -209,7 +209,6 @@ CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_MPSC=y
CONFIG_SERIAL_MPSC_CONSOLE=y
CONFIG_NVRAM=m
-CONFIG_GEN_RTC=m
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=8192
CONFIG_I2C=m
@@ -390,7 +389,6 @@ CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA1=y
@@ -402,4 +400,3 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index aa564599e368..560a93a84efe 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -4,7 +4,6 @@ CONFIG_ALTIVEC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
@@ -34,10 +33,10 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=m
CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
-CONFIG_MEMORY_HOTREMOVE=y
CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
CONFIG_PCIEPORTBUS=y
@@ -53,7 +52,6 @@ CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=y
CONFIG_SYN_COOKIES=y
# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_IPV6=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
@@ -141,7 +139,6 @@ CONFIG_SKY2=m
CONFIG_GELIC_NET=m
CONFIG_GELIC_WIRELESS=y
CONFIG_SPIDER_NET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO_I8042 is not set
@@ -149,8 +146,6 @@ CONFIG_SPIDER_NET=y
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_TXX9_NR_UARTS=2
-CONFIG_SERIAL_TXX9_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_HVC_RTAS=y
CONFIG_IPMI_HANDLER=m
@@ -159,7 +154,6 @@ CONFIG_IPMI_SI=m
CONFIG_IPMI_WATCHDOG=m
CONFIG_IPMI_POWEROFF=m
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_WATCHDOG=y
# CONFIG_VGA_CONSOLE is not set
@@ -207,7 +201,6 @@ CONFIG_NLS_ISO8859_13=m
CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index 1f6f90cd8aff..a203b1cf67d3 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -16,6 +16,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAC_PARTITION=y
# CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=y
CONFIG_IRQ_ALL_CPUS=y
@@ -79,7 +80,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
@@ -124,5 +124,4 @@ CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index 3403b85f9d81..2e6c8a45ae88 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -70,5 +70,4 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index 95411aeeeb8d..7cb590e8f8fd 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -14,6 +14,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_PPC_EP88XC=y
CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -45,7 +46,6 @@ CONFIG_LXT_PHY=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index e18f2e06553f..e084fa548d73 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -4,7 +4,6 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
@@ -25,6 +24,7 @@ CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC64=y
+CONFIG_GEN_RTC=y
CONFIG_KEXEC=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_PCI_MSI=y
@@ -115,7 +115,6 @@ CONFIG_USB_USBNET=m
# CONFIG_USB_NET_NET1080 is not set
# CONFIG_USB_NET_CDC_SUBSET is not set
# CONFIG_USB_NET_ZAURUS is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -123,7 +122,6 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_RAW_DRIVER=y
CONFIG_I2C_CHARDEV=y
CONFIG_AGP=m
@@ -213,20 +211,20 @@ CONFIG_USB_SERIAL_CYBERJACK=m
CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_APPLEDISPLAY=m
-CONFIG_FS_DAX=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=y
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -254,14 +252,12 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
@@ -276,5 +272,4 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index c0eec4a5df4e..79bbc8238b32 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -45,7 +45,6 @@ CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_NETDEVICES=y
# CONFIG_WLAN is not set
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -54,7 +53,6 @@ CONFIG_INPUT_JOYSTICK=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_LEGACY_PTY_COUNT=64
-# CONFIG_DEVKMEM is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_FB=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index e56e80090529..71d8d2430b6c 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_EMBEDDED6xx=y
CONFIG_PPC_HOLLY=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0,115200"
@@ -37,7 +38,6 @@ CONFIG_NETDEVICES=y
CONFIG_VORTEX=y
CONFIG_TSI108_ETH=y
CONFIG_PHYLIB=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -49,7 +49,6 @@ CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index b413c19d7031..477794c41d50 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -26,7 +26,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -79,7 +78,6 @@ CONFIG_NET_TULIP=y
CONFIG_TULIP=y
CONFIG_TULIP_MMIO=y
CONFIG_R8169=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -142,4 +140,3 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
index c4018179e219..078cdb427fc9 100644
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -3,7 +3,6 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
@@ -24,6 +23,7 @@ CONFIG_MAC_PARTITION=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_MAPLE=y
CONFIG_UDBG_RTAS_CONSOLE=y
+CONFIG_GEN_RTC=y
CONFIG_KEXEC=y
CONFIG_IRQ_ALL_CPUS=y
CONFIG_PCI_MSI=y
@@ -53,9 +53,6 @@ CONFIG_AMD8111_ETH=y
CONFIG_TIGON3=y
CONFIG_E1000=y
CONFIG_USB_PEGASUS=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -63,7 +60,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HVC_RTAS=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_AMD8111=y
@@ -100,8 +96,8 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_TI=m
CONFIG_EXT2_FS=y
-CONFIG_FS_DAX=y
CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -127,5 +123,4 @@ CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index 197acaa026eb..5d5f08e5b8d9 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -46,7 +46,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
CONFIG_FS_ENET_MDIO_FCC=y
-CONFIG_FIXED_PHY=y
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -83,5 +82,4 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_BDI_SWITCH=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index 0b4854cf26cb..10be5773ad5d 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -12,6 +12,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC512x=y
+CONFIG_MPC512x_LPBFIFO=y
CONFIG_MPC5121_ADS=y
CONFIG_MPC512x_GENERIC=y
CONFIG_PDM360NG=y
@@ -61,25 +62,22 @@ CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
-CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_SMSC_PHY=y
+CONFIG_MDIO_BITBANG=y
CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
+CONFIG_DAVICOM_PHY=y
CONFIG_ICPLUS_PHY=y
-CONFIG_REALTEK_PHY=y
+CONFIG_LSI_ET1011C_PHY=y
+CONFIG_LXT_PHY=y
+CONFIG_MARVELL_PHY=y
CONFIG_NATIONAL_PHY=y
+CONFIG_QSEMI_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_SMSC_PHY=y
CONFIG_STE10XP=y
-CONFIG_LSI_ET1011C_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_MDIO_BITBANG=y
+CONFIG_VITESSE_PHY=y
# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -111,10 +109,9 @@ CONFIG_RTC_DRV_M41T80=y
CONFIG_RTC_DRV_MPC5121=y
CONFIG_DMADEVICES=y
CONFIG_MPC512X_DMA=y
-CONFIG_MPC512x_LPBFIFO=y
-CONFIG_FS_DAX=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
# CONFIG_DNOTIFY is not set
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
@@ -126,5 +123,4 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 88336d0df0d6..7a2b2aa37def 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -50,7 +50,6 @@ CONFIG_NETDEVICES=y
CONFIG_FEC_MPC52xx=y
CONFIG_AMD_PHY=y
CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -71,12 +70,10 @@ CONFIG_SENSORS_LM87=m
CONFIG_WATCHDOG=y
CONFIG_MFD_SM501=m
CONFIG_DRM=y
-CONFIG_FB=y
CONFIG_FB_FOREIGN_ENDIAN=y
CONFIG_FB_RADEON=y
CONFIG_FB_SM501=m
# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=y
CONFIG_SND=y
@@ -130,4 +127,3 @@ CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig
index d933326b4cf9..4b14c02b437c 100644
--- a/arch/powerpc/configs/mpc7448_hpc2_defconfig
+++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig
@@ -11,6 +11,7 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_EMBEDDED6xx=y
CONFIG_MPC7448HPC2=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -38,7 +39,6 @@ CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
CONFIG_TSI108_ETH=y
CONFIG_PHYLIB=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -46,7 +46,6 @@ CONFIG_PHYLIB=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -54,4 +53,3 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC_T10DIF=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig
index 4cb0f617c0d6..b1e88b64536b 100644
--- a/arch/powerpc/configs/mpc8272_ads_defconfig
+++ b/arch/powerpc/configs/mpc8272_ads_defconfig
@@ -77,5 +77,4 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index 6574477fd726..d1b82035d35f 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -21,7 +21,6 @@ CONFIG_MPC837x_MDS=y
CONFIG_MPC837x_RDB=y
CONFIG_SBC834x=y
CONFIG_ASP834x=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
@@ -60,13 +59,11 @@ CONFIG_SATA_SIL=y
CONFIG_NETDEVICES=y
CONFIG_UCC_GETH=y
CONFIG_GIANFAR=y
-CONFIG_MARVELL_PHY=y
CONFIG_DAVICOM_PHY=y
-CONFIG_VITESSE_PHY=y
CONFIG_ICPLUS_PHY=y
-CONFIG_FIXED_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -99,6 +96,7 @@ CONFIG_USB_EHCI_FSL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
@@ -109,7 +107,5 @@ CONFIG_ROOT_NFS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index 998454471a48..f1f176c29fa3 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -14,6 +14,7 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_MPC86XADS=y
CONFIG_8xx_COPYBACK=y
CONFIG_8xx_CPU6=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
CONFIG_MATH_EMULATION=y
# CONFIG_SECCOMP is not set
@@ -28,12 +29,10 @@ CONFIG_SYN_COOKIES=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
-CONFIG_FIXED_PHY=y
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT4_FS=y
@@ -43,4 +42,3 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC_CCITT=y
CONFIG_CRC32_SLICEBY4=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_basic_defconfig
index 3283f0586e11..67bd1fa036ee 100644
--- a/arch/powerpc/configs/mpc86xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc86xx_basic_defconfig
@@ -1,11 +1,11 @@
-CONFIG_HIGHMEM=y
-CONFIG_KEXEC=y
CONFIG_PPC_86xx=y
-CONFIG_PROC_KCORE=y
+CONFIG_MPC8641_HPCN=y
+CONFIG_SBC8641D=y
+CONFIG_MPC8610_HPCD=y
CONFIG_GEF_PPC9A=y
CONFIG_GEF_SBC310=y
CONFIG_GEF_SBC610=y
-CONFIG_MPC8610_HPCD=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
CONFIG_MVME7100=y
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 91f53f1bec5d..ec3fcc2bf737 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -13,6 +13,7 @@ CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -51,7 +52,6 @@ CONFIG_DAVICOM_PHY=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 139add95a16a..63e38c7220f1 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -70,7 +69,6 @@ CONFIG_TUN=m
# CONFIG_NET_VENDOR_3COM is not set
CONFIG_E100=y
# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -130,4 +128,3 @@ CONFIG_CRYPTO_DES=y
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index fe43ff47bd2f..8cf4a46bef86 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -3,7 +3,6 @@ CONFIG_ALTIVEC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_SYSVIPC=y
-CONFIG_FHANDLE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
@@ -145,6 +144,7 @@ CONFIG_EDAC=y
CONFIG_EDAC_PASEMI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
+CONFIG_RAS=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -167,7 +167,6 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
@@ -175,7 +174,5 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_BLOWFISH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index fc1e7a7388b8..8e798b1fbc99 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -21,6 +21,7 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC=y
CONFIG_PPC601_SYNC_FIX=y
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -179,10 +180,10 @@ CONFIG_PPP_ASYNC=y
CONFIG_PPP_SYNC_TTY=m
CONFIG_USB_USBNET=m
# CONFIG_USB_NET_CDC_SUBSET is not set
-CONFIG_PRISM54=m
CONFIG_B43=m
CONFIG_B43LEGACY=m
CONFIG_P54_COMMON=m
+CONFIG_PRISM54=m
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
# CONFIG_MOUSE_PS2 is not set
@@ -193,7 +194,6 @@ CONFIG_SERIAL_8250=m
CONFIG_SERIAL_PMACZILOG=m
CONFIG_SERIAL_PMACZILOG_TTYS=y
CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
CONFIG_I2C_CHARDEV=m
CONFIG_APM_POWER=y
CONFIG_BATTERY_PMU=y
@@ -201,8 +201,9 @@ CONFIG_HWMON=m
CONFIG_AGP=m
CONFIG_AGP_UNINORTH=m
CONFIG_DRM=m
-CONFIG_DRM_R128=m
CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
+CONFIG_DRM_R128=m
CONFIG_FB=y
CONFIG_FB_OF=y
CONFIG_FB_CONTROL=y
@@ -300,8 +301,6 @@ CONFIG_NFSD_V4=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_ISO8859_1=m
CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
@@ -310,7 +309,6 @@ CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_BOOTX_TEXT=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_SHA512=m
@@ -325,4 +323,3 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 0695ce047d56..caee834760d2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -1,10 +1,8 @@
CONFIG_PPC64=y
-CONFIG_SMP=y
CONFIG_NR_CPUS=2048
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_NO_HZ=y
@@ -26,8 +24,8 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_BPF=y
CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_BPF_SYSCALL=y
@@ -62,7 +60,6 @@ CONFIG_PPC_64K_PAGES=y
CONFIG_PPC_SUBPAGE_PROT=y
CONFIG_SCHED_SMT=y
CONFIG_PM=y
-CONFIG_PCI_MSI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -158,7 +155,6 @@ CONFIG_NETCONSOLE=y
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -184,16 +180,13 @@ CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
# CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_JSM=m
CONFIG_VIRTIO_CONSOLE=m
-CONFIG_POWERNV_OP_PANEL=m
CONFIG_IPMI_HANDLER=y
CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_POWERNV=y
@@ -293,11 +286,15 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
@@ -309,6 +306,7 @@ CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
@@ -317,14 +315,13 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 370c0bbcff71..10fb1df63b46 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -51,9 +51,9 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=m
CONFIG_I2C=m
@@ -85,4 +85,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 2766e8f590bc..66dd6bf45cde 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -68,9 +68,9 @@ CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_8250_PCI is not set
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=m
CONFIG_I2C=m
@@ -94,7 +94,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_UBIFS_FS=m
-CONFIG_LOGFS=m
CONFIG_CRAMFS=y
CONFIG_SQUASHFS=m
CONFIG_SQUASHFS_XATTR=y
@@ -108,6 +107,5 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
CONFIG_VIRTUALIZATION=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 5175028c56ce..791db775a09c 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -1,8 +1,6 @@
CONFIG_PPC64=y
-CONFIG_SMP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
@@ -28,9 +26,10 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_SPLPAR=y
+CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
+CONFIG_IBMEBUS=y
CONFIG_PPC_MAPLE=y
CONFIG_PPC_PASEMI=y
CONFIG_PPC_PASEMI_IOMMU=y
@@ -41,9 +40,8 @@ CONFIG_PS3_FLASH=m
CONFIG_PS3_LPM=m
CONFIG_PPC_IBM_CELL_BLADE=y
CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ_PMAC64=y
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_PMAC64=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
@@ -51,14 +49,15 @@ CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_CRASH_DUMP=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
-CONFIG_PCCARD=y
-CONFIG_ELECTRA_CF=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_RPA=m
CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PCCARD=y
+CONFIG_ELECTRA_CF=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -154,7 +153,6 @@ CONFIG_DUMMY=m
CONFIG_NETCONSOLE=y
CONFIG_TUN=m
CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -181,15 +179,14 @@ CONFIG_SUNGEM=y
CONFIG_GELIC_NET=m
CONFIG_GELIC_WIRELESS=y
CONFIG_SPIDER_NET=m
-CONFIG_MARVELL_PHY=y
CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
@@ -197,7 +194,6 @@ CONFIG_INPUT_PCSPKR=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_TXX9_CONSOLE=y
CONFIG_SERIAL_JSM=m
CONFIG_HVC_CONSOLE=y
CONFIG_HVC_RTAS=y
@@ -250,6 +246,9 @@ CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_USB_APPLEDISPLAY=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
@@ -267,7 +266,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
+CONFIG_RAS=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -287,6 +286,7 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
@@ -324,12 +324,15 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
@@ -342,6 +345,7 @@ CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
@@ -350,19 +354,14 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=m
-CONFIG_LEDS_POWERNV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 6340e6c53c54..d0fe0f8f77c2 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -3,7 +3,6 @@ CONFIG_PPC_BOOK3E_64=y
CONFIG_SMP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_TASKSTATS=y
@@ -30,8 +29,8 @@ CONFIG_BINFMT_MISC=m
CONFIG_IRQ_ALL_CPUS=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_PCI_MSI=y
-CONFIG_PCCARD=y
CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -108,15 +107,14 @@ CONFIG_E100=y
CONFIG_E1000=y
CONFIG_IXGB=m
CONFIG_SUNGEM=y
-CONFIG_MARVELL_PHY=y
CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
# CONFIG_SERIO_SERPORT is not set
@@ -172,10 +170,8 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_ISER=m
-CONFIG_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
-CONFIG_FS_DAX=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -192,6 +188,7 @@ CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
+CONFIG_FS_DAX=y
CONFIG_AUTOFS4_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
@@ -251,5 +248,4 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 18d0d60dadbf..ae6eba482d75 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -11,10 +11,10 @@ CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_CGROUPS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
@@ -61,7 +61,7 @@ CONFIG_SBC8641D=y
CONFIG_MPC8610_HPCD=y
CONFIG_GEF_SBC610=y
CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_STAT=m
+CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
@@ -70,7 +70,6 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
CONFIG_CPU_FREQ_PMAC=y
CONFIG_TAU=y
CONFIG_TAU_AVERAGE=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_MCU_MPC8349EMITX=y
CONFIG_HIGHMEM=y
@@ -141,7 +140,6 @@ CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -187,7 +185,6 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -195,7 +192,6 @@ CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
CONFIG_NF_CONNTRACK_IPV4=m
-# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -334,9 +330,7 @@ CONFIG_BT_BNEP_MC_FILTER=y
CONFIG_BT_BNEP_PROTO_FILTER=y
CONFIG_BT_HIDP=m
CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_LL=y
CONFIG_BT_HCIBCM203X=m
CONFIG_BT_HCIBPA10X=m
CONFIG_BT_HCIBFUSB=m
@@ -370,7 +364,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_CDROM_PKTCDVD=m
CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_HD=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_SENSORS_TSL2550=m
CONFIG_EEPROM_AT24=m
@@ -548,16 +541,16 @@ CONFIG_PCMCIA_XIRC2PS=m
CONFIG_FDDI=y
CONFIG_SKFP=m
CONFIG_NET_SB1000=m
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
CONFIG_BROADCOM_PHY=m
+CONFIG_CICADA_PHY=m
+CONFIG_DAVICOM_PHY=m
CONFIG_ICPLUS_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_MARVELL_PHY=m
+CONFIG_QSEMI_PHY=m
CONFIG_REALTEK_PHY=m
+CONFIG_SMSC_PHY=m
+CONFIG_VITESSE_PHY=m
CONFIG_PLIP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPP_FILTER=y
@@ -585,7 +578,6 @@ CONFIG_USB_ALI_M5632=y
CONFIG_USB_AN2720=y
CONFIG_USB_EPSON2888=y
CONFIG_USB_KC2190=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=y
CONFIG_MOUSE_SERIAL=m
@@ -647,7 +639,6 @@ CONFIG_SYNCLINKMP=m
CONFIG_SYNCLINK_GT=m
CONFIG_NOZOMI=m
CONFIG_N_HDLC=m
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_CS=m
@@ -657,13 +648,13 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=m
CONFIG_SERIAL_PMACZILOG=m
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_PRINTER=m
CONFIG_LP_CONSOLE=y
CONFIG_PPDEV=m
@@ -748,9 +739,10 @@ CONFIG_MFD_SM501_GPIO=y
CONFIG_AGP=y
CONFIG_AGP_UNINORTH=y
CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
CONFIG_DRM_TDFX=m
CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
CONFIG_DRM_MGA=m
CONFIG_DRM_SIS=m
CONFIG_DRM_VIA=m
@@ -899,7 +891,7 @@ CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_EHCI_FSL=m
CONFIG_USB_OHCI_HCD=m
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
@@ -967,7 +959,6 @@ CONFIG_USB_ADUTUX=m
CONFIG_USB_SEVSEG=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
CONFIG_USB_IDMOUSE=m
CONFIG_USB_FTDI_ELAN=m
CONFIG_USB_APPLEDISPLAY=m
@@ -1020,15 +1011,14 @@ CONFIG_UIO_CIF=m
CONFIG_UIO_PDRV_GENIRQ=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=m
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_FS=y
CONFIG_JBD2_DEBUG=y
CONFIG_REISERFS_FS=m
CONFIG_REISERFS_PROC_INFO=y
@@ -1042,6 +1032,7 @@ CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
+CONFIG_FS_DAX=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
@@ -1146,7 +1137,6 @@ CONFIG_DEBUG_VM=y
CONFIG_DEBUG_HIGHMEM=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_SHIRQ=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
@@ -1173,7 +1163,6 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
@@ -1201,7 +1190,6 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_HIFN_795X=m
CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
CONFIG_CRYPTO_DEV_TALITOS=m
diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig
index 50b2bad51d0a..0ededa8c837d 100644
--- a/arch/powerpc/configs/pq2fads_defconfig
+++ b/arch/powerpc/configs/pq2fads_defconfig
@@ -79,4 +79,3 @@ CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ee0ec5a682fc..2efa025bf483 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -5,7 +5,6 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -94,7 +93,6 @@ CONFIG_USB_USBNET=m
# CONFIG_USB_NET_CDC_SUBSET is not set
# CONFIG_USB_NET_ZAURUS is not set
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=m
# CONFIG_INPUT_KEYBOARD is not set
@@ -161,7 +159,6 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_CRC_CCITT=m
CONFIG_CRC_T10DIF=y
CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_STACKOVERFLOW=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 1a61aa20dfba..3d935969e5a2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -1,11 +1,8 @@
CONFIG_PPC64=y
-CONFIG_SMP=y
CONFIG_NR_CPUS=2048
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
@@ -18,17 +15,16 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=18
CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
CONFIG_NUMA_BALANCING=y
-CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_BPF=y
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
CONFIG_CGROUP_PERF=y
-CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_BPF=y
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_BPF_SYSCALL=y
@@ -43,12 +39,12 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_SPLPAR=y
+CONFIG_DTL=y
CONFIG_SCANLOG=m
CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
+CONFIG_IBMEBUS=y
# CONFIG_PPC_PMAC is not set
CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
@@ -155,7 +151,6 @@ CONFIG_NETCONSOLE=y
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -183,12 +178,10 @@ CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
# CONFIG_SERIO_SERPORT is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_ICOM=m
@@ -198,8 +191,6 @@ CONFIG_HVC_RTAS=y
CONFIG_HVCS=m
CONFIG_VIRTIO_CONSOLE=m
CONFIG_IBM_BSR=m
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
CONFIG_RAW_DRIVER=y
CONFIG_MAX_RAW_DEVS=1024
CONFIG_FB=y
@@ -227,6 +218,9 @@ CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
@@ -238,9 +232,10 @@ CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_IPOIB_CM=y
CONFIG_INFINIBAND_SRP=m
CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
-CONFIG_FS_DAX=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -256,6 +251,7 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
@@ -291,11 +287,14 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
@@ -306,6 +305,7 @@ CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
@@ -314,19 +314,14 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
CONFIG_CRYPTO_DEV_VMX=y
-CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=m
-CONFIG_LEDS_POWERNV=m
+CONFIG_VHOST_NET=m
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 78fddf24b5d3..cd72193fac0a 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -18,6 +18,7 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_TQM8XX=y
CONFIG_8xx_COPYBACK=y
# CONFIG_8xx_CPU15 is not set
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -44,7 +45,6 @@ CONFIG_MTD_PHYSMAP_OF=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
CONFIG_DAVICOM_PHY=y
-CONFIG_FIXED_PHY=y
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -53,7 +53,6 @@ CONFIG_FIXED_PHY=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index dcdd51b57783..aef41b17a8bc 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -55,9 +55,6 @@ CONFIG_B43_SDIO=y
# CONFIG_B43_PHY_LP is not set
CONFIG_B43_DEBUG=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -68,7 +65,6 @@ CONFIG_INPUT_UINPUT=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_LEGACY_PTY_COUNT=64
-# CONFIG_DEVKMEM is not set
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
CONFIG_I2C=y
@@ -119,5 +115,4 @@ CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 5c4fbc80dc6c..2542ea15d338 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h
generic-y += preempt.h
generic-y += rwsem.h
generic-y += vtime.h
+generic-y += msi.h
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index cee3aa087653..7f2a7702596c 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -25,7 +25,7 @@
#define PPC_LCMPI stringify_in_c(cmpdi)
#define PPC_LCMPLI stringify_in_c(cmpldi)
#define PPC_LCMP stringify_in_c(cmpd)
-#define PPC_LONG stringify_in_c(.llong)
+#define PPC_LONG stringify_in_c(.8byte)
#define PPC_LONG_ALIGN stringify_in_c(.balign 8)
#define PPC_TLNEI stringify_in_c(tdnei)
#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh)
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 25d42bd3f114..9c601adfc500 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,13 +74,6 @@ do { \
___p1; \
})
-/*
- * This must resolve to hwsync on SMP for the context switch path.
- * See _switch, and core scheduler context switch memory ordering
- * comments.
- */
-#define smp_mb__before_spinlock() smp_mb()
-
#include <asm-generic/barrier.h>
#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 7fb755880409..4d453f979553 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -294,13 +294,11 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
- pmd_t **pmdp);
-
int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_read(pte_t pte) { return 1; }
static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 36fc7bfe9e11..f88452019114 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -40,7 +40,7 @@
* Define the address range of the kernel non-linear virtual area
*/
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
+#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */
/*
* The vmalloc space starts at the beginning of that region, and
@@ -48,9 +48,11 @@
* (we keep a quarter for the virtual memmap)
*/
#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1)
+#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */
#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_KERN_IO_START H_VMALLOC_END
+
/*
* Region IDs
*/
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5c28bd6f2ae1..2d1ca488ca44 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -54,9 +54,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static inline bool gigantic_page_supported(void)
{
- if (radix_enabled())
- return true;
- return false;
+ return true;
}
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 6981a52b3887..508275bb05d5 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -104,6 +104,7 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
@@ -468,7 +469,7 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
int psize, int ssize);
int htab_remove_mapping(unsigned long vstart, unsigned long vend,
int psize, int ssize);
-extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
+extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5b4023c616f7..c3b00e8ff791 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -83,6 +83,9 @@ typedef struct {
mm_context_id_t id;
u16 user_psize; /* page size index */
+ /* Number of bits in the mm_cpumask */
+ atomic_t active_cpus;
+
/* NPU NMMU context */
struct npu_context *npu_context;
@@ -97,11 +100,6 @@ typedef struct {
#ifdef CONFIG_PPC_SUBPAGE_PROT
struct subpage_prot_table spt;
#endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
- struct spinlock *cop_lockp; /* guard acop and cop_pid */
- unsigned long acop; /* mask of enabled coprocessor types */
- unsigned int cop_pid; /* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
#ifdef CONFIG_PPC_64K_PAGES
/* for 4K PTE fragment support */
void *pte_frag;
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index e2329db9d6f4..1fcfa425cefa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,8 +41,6 @@ extern struct kmem_cache *pgtable_cache[];
pgtable_cache[(shift) - 1]; \
})
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
-
extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
extern void pte_fragment_free(unsigned long *, int);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 818a58fc3f4f..b9aff515b4de 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -272,8 +272,10 @@ extern unsigned long __vmalloc_end;
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
+extern unsigned long __kernel_io_start;
#define KERN_VIRT_START __kernel_virt_start
#define KERN_VIRT_SIZE __kernel_virt_size
+#define KERN_IO_START __kernel_io_start
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
@@ -298,7 +300,6 @@ extern unsigned long pci_io_base;
* PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
* IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
*/
-#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
#define FULL_IO_SIZE 0x80000000ul
#define ISA_IO_BASE (KERN_IO_START)
#define ISA_IO_END (KERN_IO_START + 0x10000ul)
@@ -409,6 +410,11 @@ static inline int pte_write(pte_t pte)
return __pte_write(pte) || pte_savedwrite(pte);
}
+static inline int pte_read(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
+}
+
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
@@ -1167,6 +1173,7 @@ static inline bool arch_needs_pgtable_deposit(void)
return false;
return true;
}
+extern void serialize_against_pte_lookup(struct mm_struct *mm);
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 544440b5aff3..1e5ba94e62ef 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -110,6 +110,8 @@
*/
#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
+#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+
#ifndef __ASSEMBLY__
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
#define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index cc7fbde4f53c..9b433a624bf3 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -22,22 +22,21 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end
extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
extern void radix__tlb_flush(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
extern void radix__flush_tlb_mm(struct mm_struct *mm);
extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize);
#else
#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
-#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr)
#endif
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
unsigned long page_size);
extern void radix__flush_tlb_lpid(unsigned long lpid);
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 87fcc1948817..7ee763d3bea9 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,6 +133,7 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
extern void die(const char *, struct pt_regs *, long);
+extern bool die_will_crash(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 5a90292afbad..d122f7f957ce 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -5,7 +5,7 @@
/* bytes per L1 cache line */
-#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX)
#define L1_CACHE_SHIFT 4
#define MAX_COPY_PREFETCH 1
#elif defined(CONFIG_PPC_E500MC)
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 52586f9956bb..eb43b5c3a7b5 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -67,6 +67,17 @@
#define ERR_DEEP_STATE_ESL_MISMATCH -2
#ifndef __ASSEMBLY__
+/* Additional SPRs that need to be saved/restored during stop */
+struct stop_sprs {
+ u64 pid;
+ u64 ldbar;
+ u64 fscr;
+ u64 hfscr;
+ u64 mmcr1;
+ u64 mmcr2;
+ u64 mmcra;
+};
+
extern u32 pnv_fastsleep_workaround_at_entry[];
extern u32 pnv_fastsleep_workaround_at_exit[];
@@ -90,20 +101,4 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
#endif
-/* Idle state entry routines */
-#ifdef CONFIG_PPC_P7_NAP
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
- ptesync; \
- ld r0,0(r1); \
-236: cmpd cr0,r0,r0; \
- bne 236b; \
- IDLE_INST; \
-
-#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
- IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- b .
-#endif /* CONFIG_PPC_P7_NAP */
-
#endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d02ad93bf708..a9bf921f4efc 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -513,7 +513,7 @@ enum {
#else
CPU_FTRS_GENERIC_32 |
#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
CPU_FTRS_8XX |
#endif
#ifdef CONFIG_40x
@@ -565,7 +565,7 @@ enum {
#else
CPU_FTRS_GENERIC_32 &
#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
CPU_FTRS_8XX &
#endif
#ifdef CONFIG_40x
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8e37b71674f4..9847ae3a12d1 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -131,7 +131,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
struct eeh_dev {
int mode; /* EEH mode */
int class_code; /* Class code of the device */
- int config_addr; /* Config address */
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
int pcix_cap; /* Saved PCIx capability */
@@ -141,7 +140,6 @@ struct eeh_dev {
struct eeh_pe *pe; /* Associated PE */
struct list_head list; /* Form link list in the PE */
struct list_head rmv_list; /* Record the removed edevs */
- struct pci_controller *phb; /* Associated PHB */
struct pci_dn *pdn; /* Associated PCI device node */
struct pci_dev *pdev; /* Associated PCI device */
bool in_error; /* Error flag for edev */
@@ -262,7 +260,8 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+ int pe_no, int config_addr);
int eeh_add_to_parent_pe(struct eeh_dev *edev);
int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index ce88bbe1d809..5a23010af600 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -209,11 +209,13 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
extern int fadump_reserve_mem(void);
extern int setup_fadump(void);
extern int is_fadump_active(void);
+extern int should_fadump_crash(void);
extern void crash_fadump(struct pt_regs *, const char *);
extern void fadump_cleanup(void);
#else /* CONFIG_FA_DUMP */
static inline int is_fadump_active(void) { return 0; }
+static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
#endif
#endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 2de2319b99e2..8f88f771cc55 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -19,11 +19,11 @@
*/
#if defined(CONFIG_PPC64) && !defined(__powerpc64__)
/* 64 bits kernel, 32 bits code (ie. vdso32) */
-#define FTR_ENTRY_LONG .llong
+#define FTR_ENTRY_LONG .8byte
#define FTR_ENTRY_OFFSET .long 0xffffffff; .long
#elif defined(CONFIG_PPC64)
-#define FTR_ENTRY_LONG .llong
-#define FTR_ENTRY_OFFSET .llong
+#define FTR_ENTRY_LONG .8byte
+#define FTR_ENTRY_OFFSET .8byte
#else
#define FTR_ENTRY_LONG .long
#define FTR_ENTRY_OFFSET .long
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 4508b322f2cd..6c40dfda5912 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -62,9 +63,6 @@ enum fixed_addresses {
__end_of_fixed_addresses
};
-extern void __set_fixmap (enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags);
-
#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
@@ -72,5 +70,11 @@ extern void __set_fixmap (enum fixed_addresses idx,
#include <asm-generic/fixmap.h>
+static inline void __set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags));
+}
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
index f79d6c74eb2a..8def56ec05c6 100644
--- a/arch/powerpc/include/asm/fs_pd.h
+++ b/arch/powerpc/include/asm/fs_pd.h
@@ -26,7 +26,7 @@
#define cpm2_unmap(addr) do {} while(0)
#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
#include <asm/8xx_immap.h>
extern immap_t __iomem *mpc8xx_immr;
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index eaada6c92344..719ed9b61ea7 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -29,18 +29,10 @@
: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
: "cr0", "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
pagefault_disable();
@@ -66,17 +58,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 8add8b861e8d..c97603d617e3 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -12,6 +12,10 @@ typedef struct {
unsigned int mce_exceptions;
unsigned int spurious_irqs;
unsigned int hmi_exceptions;
+ unsigned int sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ unsigned int soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
unsigned int doorbell_irqs;
#endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7f4025a6c69e..b8a0fb442c64 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -218,18 +218,4 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
}
#endif /* CONFIG_HUGETLB_PAGE */
-/*
- * FSL Book3E platforms require special gpage handling - the gpages
- * are reserved early in the boot process by memblock instead of via
- * the .dts as on IBM platforms.
- */
-#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \
- defined(CONFIG_PPC_8xx))
-extern void __init reserve_hugetlb_gpages(void);
-#else
-static inline void reserve_hugetlb_gpages(void)
-{
-}
-#endif
-
#endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 57d38b504ff7..3d34dc0869f6 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -280,7 +280,18 @@
#define H_RESIZE_HPT_COMMIT 0x370
#define H_REGISTER_PROC_TBL 0x37C
#define H_SIGNAL_SYS_RESET 0x380
-#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
+#define H_INT_GET_SOURCE_INFO 0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO 0x3B4
+#define H_INT_SET_QUEUE_CONFIG 0x3B8
+#define H_INT_GET_QUEUE_CONFIG 0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB 0x3C8
+#define H_INT_SYNC 0x3CC
+#define H_INT_RESET 0x3D0
+#define MAX_HCALL_OPCODE H_INT_RESET
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
index 27e588f6c72e..6a2c87577541 100644
--- a/arch/powerpc/include/asm/icswx.h
+++ b/arch/powerpc/include/asm/icswx.h
@@ -69,7 +69,10 @@ struct coprocessor_completion_block {
#define CSB_CC_WR_PROTECTION (16)
#define CSB_CC_UNKNOWN_CODE (17)
#define CSB_CC_ABORT (18)
+#define CSB_CC_EXCEED_BYTE_COUNT (19) /* P9 or later */
#define CSB_CC_TRANSPORT (20)
+#define CSB_CC_INVALID_CRB (21) /* P9 or later */
+#define CSB_CC_INVALID_DDE (30) /* P9 or later */
#define CSB_CC_SEGMENTED_DDL (31)
#define CSB_CC_PROGRESS_POINT (32)
#define CSB_CC_DDE_OVERFLOW (33)
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 000000000000..7f74c282710f
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_POWERPC_IMC_PMU_H
+#define __ASM_POWERPC_IMC_PMU_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/io.h>
+#include <asm/opal.h>
+
+/*
+ * For static allocation of some of the structures.
+ */
+#define IMC_MAX_PMUS 32
+
+/*
+ * Compatibility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters"
+
+
+/*
+ * LDBAR: Counter address and Enable/Disable macro.
+ * perf/imc-pmu.c has the LDBAR layout information.
+ */
+#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
+#define THREAD_IMC_ENABLE 0x8000000000000000ULL
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+ u64 *vbase;
+ u32 id;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+ u32 value;
+ char *name;
+ char *unit;
+ char *scale;
+};
+
+/* Event attribute array index */
+#define IMC_FORMAT_ATTR 0
+#define IMC_EVENT_ATTR 1
+#define IMC_CPUMASK_ATTR 2
+#define IMC_NULL_ATTR 3
+
+/* PMU Format attribute macros */
+#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+ struct pmu pmu;
+ struct imc_mem_info *mem_info;
+ struct imc_events **events;
+ /*
+ * Attribute groups for the PMU. Slot 0 used for
+ * format attribute, slot 1 used for cpusmask attribute,
+ * slot 2 used for event attribute. Slot 3 keep as
+ * NULL.
+ */
+ const struct attribute_group *attr_groups[4];
+ u32 counter_mem_size;
+ int domain;
+ /*
+ * flag to notify whether the memory is mmaped
+ * or allocated by kernel.
+ */
+ bool imc_counter_mmaped;
+};
+
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+ struct mutex lock;
+ unsigned int id;
+ int refc;
+};
+
+/*
+ * In-Memory Collection Counters type.
+ * Data comes from Device tree.
+ * Three device type are supported.
+ */
+
+enum {
+ IMC_TYPE_THREAD = 0x1,
+ IMC_TYPE_CORE = 0x4,
+ IMC_TYPE_CHIP = 0x10,
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST 1
+#define IMC_DOMAIN_CORE 2
+#define IMC_DOMAIN_THREAD 3
+
+extern int init_imc_pmu(struct device_node *parent,
+ struct imc_pmu *pmu_ptr, int pmu_id);
+extern void thread_imc_disable(void);
+#endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 7cea76f11c26..83596f32f50b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -104,6 +104,10 @@ struct kvmppc_host_state {
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * hwthread_req/hwthread_state pair is used to pull sibling threads
+ * out of guest on pre-ISAv3.0B CPUs where threads share MMU.
+ */
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8b3f1238d07f..e372ed871c51 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -67,11 +67,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
-{
-}
-
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index cd2fc1cc1cc7..73b92017b6d7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,7 +76,6 @@ struct machdep_calls {
void __noreturn (*restart)(char *cmd);
void __noreturn (*halt)(void);
- void (*panic)(char *str);
void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 0c76675394c5..309592589e30 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -77,58 +77,8 @@ extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
-/*
- * switch_mm is the entry point called from the architecture independent
- * code in kernel/sched/core.c
- */
-static inline void switch_mm_irqs_off(struct mm_struct *prev,
- struct mm_struct *next,
- struct task_struct *tsk)
-{
- bool new_on_cpu = false;
-
- /* Mark this context has been used on the new CPU */
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
- new_on_cpu = true;
- }
-
- /* 32-bit keeps track of the current PGDIR in the thread struct */
-#ifdef CONFIG_PPC32
- tsk->thread.pgdir = next->pgd;
-#endif /* CONFIG_PPC32 */
-
- /* 64-bit Book3E keeps track of current PGD in the PACA */
-#ifdef CONFIG_PPC_BOOK3E_64
- get_paca()->pgd = next->pgd;
-#endif
- /* Nothing else to do if we aren't actually switching */
- if (prev == next)
- return;
-
-#ifdef CONFIG_PPC_ICSWX
- /* Switch coprocessor context only if prev or next uses a coprocessor */
- if (prev->context.acop || next->context.acop)
- switch_cop(next);
-#endif /* CONFIG_PPC_ICSWX */
-
- /* We must stop all altivec streams before changing the HW
- * context
- */
-#ifdef CONFIG_ALTIVEC
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile ("dssall");
-#endif /* CONFIG_ALTIVEC */
-
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
-
- /*
- * The actual HW switching method differs between the various
- * sub architectures. Out of line for now
- */
- switch_mmu_context(prev, next, tsk);
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
@@ -150,11 +100,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
- unsigned long flags;
-
- local_irq_save(flags);
switch_mm(prev, next, current);
- local_irq_restore(flags);
}
/* We don't currently use enter_lazy_tlb() for anything */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 6f8e79cd35d8..3760150a0ff0 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -1,9 +1,8 @@
#ifndef _ASM_NMI_H
#define _ASM_NMI_H
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_PPC_WATCHDOG
extern void arch_touch_nmi_watchdog(void);
-
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
bool exclude_self);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 91314268f04f..185c6a47f9ba 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -121,7 +121,7 @@ extern int icache_44x_need_flush;
#include <asm/nohash/pte-book3e.h>
#elif defined(CONFIG_FSL_BOOKE)
#include <asm/nohash/32/pte-fsl-booke.h>
-#elif defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_8xx)
#include <asm/nohash/32/pte-8xx.h>
#endif
@@ -337,9 +337,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
- pmd_t **pmdp);
-
int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index e5805ad78e12..17989c3d9a24 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -14,6 +14,7 @@ static inline int pte_write(pte_t pte)
{
return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
}
+static inline int pte_read(pte_t pte) { return 1; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 3130a73652c7..450a60b81d2a 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -42,6 +42,7 @@
#define OPAL_I2C_STOP_ERR -24
#define OPAL_XIVE_PROVISIONING -31
#define OPAL_XIVE_FREE_ACTIVE -32
+#define OPAL_TIMEOUT -33
/* API Tokens (in r0) */
#define OPAL_INVALID_CALL -1
@@ -190,7 +191,16 @@
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
#define OPAL_NPU_MAP_LPAR 148
-#define OPAL_LAST 148
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START 150
+#define OPAL_IMC_COUNTERS_STOP 151
+#define OPAL_GET_POWERCAP 152
+#define OPAL_SET_POWERCAP 153
+#define OPAL_GET_POWER_SHIFT_RATIO 154
+#define OPAL_SET_POWER_SHIFT_RATIO 155
+#define OPAL_SENSOR_GROUP_CLEAR 156
+#define OPAL_PCI_SET_P2P 157
+#define OPAL_LAST 157
/* Device tree flags */
@@ -1084,6 +1094,18 @@ enum {
XIVE_DUMP_EMU_STATE = 5,
};
+/* "type" argument options for OPAL_IMC_COUNTERS_* calls */
+enum {
+ OPAL_IMC_COUNTERS_NEST = 1,
+ OPAL_IMC_COUNTERS_CORE = 2,
+};
+
+
+/* PCI p2p descriptor */
+#define OPAL_PCI_P2P_ENABLE 0x1
+#define OPAL_PCI_P2P_LOAD 0x2
+#define OPAL_PCI_P2P_STORE 0x4
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c23af9..726c23304a57 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -50,7 +50,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
uint32_t hour_min);
int64_t opal_cec_power_down(uint64_t request);
int64_t opal_cec_reboot(void);
-int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag);
+int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag);
int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
@@ -267,6 +267,19 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
+ uint64_t desc, uint16_t pe_number);
+
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
+ uint64_t cpu_pir);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
+int opal_get_powercap(u32 handle, int token, u32 *pcap);
+int opal_set_powercap(u32 handle, int token, u32 pcap);
+int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
+int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
+int opal_sensor_group_clear(u32 group_hndl, int token);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -345,6 +358,10 @@ static inline int opal_get_async_rc(struct opal_msg msg)
void opal_wake_poller(void);
+void opal_powercap_init(void);
+void opal_psr_init(void);
+void opal_sensor_groups_init(void);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index dc88a31cc79a..04b60af027ae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -31,6 +31,7 @@
#endif
#include <asm/accounting.h>
#include <asm/hmi.h>
+#include <asm/cpuidle.h>
register struct paca_struct *local_paca asm("r13");
@@ -183,6 +184,12 @@ struct paca_struct {
struct paca_struct **thread_sibling_pacas;
/* The PSSCR value that the kernel requested before going to stop */
u64 requested_psscr;
+
+ /*
+ * Save area for additional SPRs that need to be
+ * saved/restored during cpuidle stop.
+ */
+ struct stop_sprs stop_sprs;
#endif
#ifdef CONFIG_PPC_STD_MMU_64
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 56c67d3f0108..0b8aa1fe2d5f 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -195,7 +195,6 @@ struct pci_dn {
struct pci_dn *parent;
struct pci_controller *phb; /* for pci devices */
struct iommu_table_group *table_group; /* for phb's or bridges */
- struct device_node *node; /* back-pointer to the device_node */
int pci_ext_config_space; /* for pci devices */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index d795c5d5789c..45ae1212ab8a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -17,6 +17,8 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
}
#endif /* MODULE */
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index 9c0f5db5cf46..67e7e3d990f4 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
unsigned long *p = (unsigned long *)ptep;
__be64 prev;
+ /* See comment in switch_mm_irqs_off() */
prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
(__force unsigned long)pte_raw(new));
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 8bd3b13fe2fb..369a164b545c 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
{
unsigned long *p = (unsigned long *)ptep;
+ /* See comment in switch_mm_irqs_off() */
return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
}
#endif
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index afae9a336136..7d0d38f58243 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,22 +66,14 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *shift);
-static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *shift)
-{
- VM_WARN(!arch_irqs_disabled(),
- "%s called with irq enabled\n", __func__);
- return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
-}
+/* can we use this in kvm */
unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
void mark_initmem_nx(void);
#else
static inline void mark_initmem_nx(void) { }
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index de9681034353..3e5cf251ad9a 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
struct opal_msg *msg);
+extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
+ u64 desc);
int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index fa9ebaead91e..ce0930d68857 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -193,6 +193,7 @@
#define PPC_INST_CLRBHRB 0x7c00035c
#define PPC_INST_COPY 0x7c20060c
#define PPC_INST_CP_ABORT 0x7c00068c
+#define PPC_INST_DARN 0x7c0005e6
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
#define PPC_INST_DCBAL 0x7c2005ec
@@ -204,6 +205,8 @@
#define PPC_INST_ISEL_MASK 0xfc00003e
#define PPC_INST_LDARX 0x7c0000a8
#define PPC_INST_STDCX 0x7c0001ad
+#define PPC_INST_LQARX 0x7c000228
+#define PPC_INST_STQCX 0x7c00016d
#define PPC_INST_LSWI 0x7c0004aa
#define PPC_INST_LSWX 0x7c00042a
#define PPC_INST_LWARX 0x7c000028
@@ -261,7 +264,7 @@
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
-#define PPC_INST_XXLOR 0xf0000510
+#define PPC_INST_XXLOR 0xf0000490
#define PPC_INST_XXSWAPD 0xf0000250
#define PPC_INST_XVCPSGNDP 0xf0000780
#define PPC_INST_TRECHKPT 0x7c0007dd
@@ -395,16 +398,25 @@
#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT)
#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \
+ ___PPC_RT(t) | \
+ (((l) & 0x3) << 16))
#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
__PPC_RA(a) | __PPC_RB(b))
+#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | __PPC_EH(eh))
#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \
___PPC_RT(t) | ___PPC_RA(a) | \
___PPC_RB(b) | __PPC_EH(eh))
#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \
___PPC_RT(t) | ___PPC_RA(a) | \
___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC)
@@ -414,6 +426,8 @@
___PPC_RB(b))
#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \
___PPC_RB(b))
+#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \
+ ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
__PPC_RA(a) | __PPC_RS(s))
#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 6baeeb9acd0d..36f3e41c9fbe 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -378,10 +378,16 @@ BEGIN_FTR_SECTION_NESTED(96); \
cmpwi dest,0; \
beq- 90b; \
END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
-#elif defined(CONFIG_8xx)
-#define MFTB(dest) mftb dest
#else
-#define MFTB(dest) mfspr dest, SPRN_TBRL
+#define MFTB(dest) MFTBL(dest)
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest) mftb dest
+#define MFTBU(dest) mftbu dest
+#else
+#define MFTBL(dest) mfspr dest, SPRN_TBRL
+#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
#ifndef CONFIG_SMP
@@ -411,7 +417,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
* and they must be used.
*/
-#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
+#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx)
#define tlbia \
li r4,1024; \
mtctr r4; \
@@ -439,7 +445,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
.machine push ; \
.machine "power4" ; \
lis scratch,0x60000000@h; \
- dcbt r0,scratch,0b01010; \
+ dcbt 0,scratch,0b01010; \
.machine pop
/*
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 35c00d7a0cf8..825bd5998701 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -159,7 +159,10 @@ struct of_drconf_cell {
#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
-#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */
+#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */
+#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */
+#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */
+#define OV5_XIVE_EITHER 0x1780 /* XIVE legacy or exploitation mode */
/* MMU Base Architecture */
#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */
#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
new file mode 100644
index 000000000000..2d633e9d686c
--- /dev/null
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -0,0 +1,35 @@
+#ifndef _ASM_POWERPC_PTE_WALK_H
+#define _ASM_POWERPC_PTE_WALK_H
+
+#include <linux/sched.h>
+
+/* Don't use this directly */
+extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hshift);
+
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hshift)
+{
+ VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+ return __find_linux_pte(pgdir, ea, is_thp, hshift);
+}
+
+static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
+{
+ pgd_t *pgdir = init_mm.pgd;
+ return __find_linux_pte(pgdir, ea, NULL, hshift);
+}
+/*
+ * This is what we should always use. Any other lockless page table lookup needs
+ * careful audit against THP split.
+ */
+static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hshift)
+{
+ VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+ VM_WARN(pgdir != current->mm->pgd,
+ "%s lock less page table lookup called on wrong mm\n", __func__);
+ return __find_linux_pte(pgdir, ea, is_thp, hshift);
+}
+
+#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a3b6575c7842..f92eaf7a4c0d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -22,9 +22,9 @@
#include <asm/reg_fsl_emb.h>
#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
#include <asm/reg_8xx.h>
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
#define MSR_SF_LG 63 /* Enable 64 bit mode */
#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */
@@ -135,7 +135,7 @@
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
-#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx)
/* Default MSR for kernel mode. */
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
@@ -272,16 +272,65 @@
#define SPRN_DAR 0x013 /* Data Address Register */
#define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
-#define DSISR_NOHPTE 0x40000000 /* no translation found */
-#define DSISR_PROTFAULT 0x08000000 /* protection fault */
-#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
-#define DSISR_ISSTORE 0x02000000 /* access was a store */
-#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
-#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */
-#define DSISR_KEYFAULT 0x00200000 /* Key fault */
-#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */
-#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */
-#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */
+#define DSISR_BAD_DIRECT_ST 0x80000000 /* Obsolete: Direct store error */
+#define DSISR_NOHPTE 0x40000000 /* no translation found */
+#define DSISR_ATTR_CONFLICT 0x20000000 /* P9: Process vs. Partition attr */
+#define DSISR_NOEXEC_OR_G 0x10000000 /* Alias of SRR1 bit, see below */
+#define DSISR_PROTFAULT 0x08000000 /* protection fault */
+#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
+#define DSISR_ISSTORE 0x02000000 /* access was a store */
+#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
+#define DSISR_NOSEGMENT 0x00200000 /* STAB miss (unsupported) */
+#define DSISR_KEYFAULT 0x00200000 /* Storage Key fault */
+#define DSISR_BAD_EXT_CTRL 0x00100000 /* Obsolete: External ctrl error */
+#define DSISR_UNSUPP_MMU 0x00080000 /* P9: Unsupported MMU config */
+#define DSISR_SET_RC 0x00040000 /* P9: Failed setting of R/C bits */
+#define DSISR_PRTABLE_FAULT 0x00020000 /* P9: Fault on process table */
+#define DSISR_ICSWX_NO_CT 0x00004000 /* P7: icswx unavailable cp type */
+#define DSISR_BAD_COPYPASTE 0x00000008 /* P9: Copy/Paste on wrong memtype */
+#define DSISR_BAD_AMO 0x00000004 /* P9: Incorrect AMO opcode */
+#define DSISR_BAD_CI_LDST 0x00000002 /* P8: Bad HV CI load/store */
+
+/*
+ * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always
+ * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1
+ * indicates an attempt at executing from a no-execute PTE
+ * or segment or from a guarded page.
+ *
+ * We add a definition here for completeness as we alias
+ * DSISR and SRR1 in do_page_fault.
+ */
+
+/*
+ * DSISR bits that are treated as a fault. Any bit set
+ * here will skip hash_page, and cause do_page_fault to
+ * trigger a SIGBUS or SIGSEGV:
+ */
+#define DSISR_BAD_FAULT_32S (DSISR_BAD_DIRECT_ST | \
+ DSISR_BADACCESS | \
+ DSISR_BAD_EXT_CTRL)
+#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \
+ DSISR_ATTR_CONFLICT | \
+ DSISR_KEYFAULT | \
+ DSISR_UNSUPP_MMU | \
+ DSISR_PRTABLE_FAULT | \
+ DSISR_ICSWX_NO_CT | \
+ DSISR_BAD_COPYPASTE | \
+ DSISR_BAD_AMO | \
+ DSISR_BAD_CI_LDST)
+/*
+ * These bits are equivalent in SRR1 and DSISR for 0x400
+ * instruction access interrupts on Book3S
+ */
+#define DSISR_SRR1_MATCH_32S (DSISR_NOHPTE | \
+ DSISR_NOEXEC_OR_G | \
+ DSISR_PROTFAULT)
+#define DSISR_SRR1_MATCH_64S (DSISR_SRR1_MATCH_32S | \
+ DSISR_KEYFAULT | \
+ DSISR_UNSUPP_MMU | \
+ DSISR_SET_RC | \
+ DSISR_PRTABLE_FAULT)
+
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */
@@ -307,6 +356,7 @@
#define SPRN_PMSR 0x355 /* Power Management Status Reg */
#define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */
#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
+#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */
#define SPRN_PMCR 0x374 /* Power Management Control Register */
/* HFSCR and FSCR bit numbers are the same */
@@ -675,6 +725,7 @@
* may not be recoverable */
#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
+#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
@@ -1114,7 +1165,7 @@
#endif
#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2
@@ -1197,10 +1248,8 @@
* differentiated by the version number in the Communication Processor
* Module (CPM).
*/
-#define PVR_821 0x00500000
-#define PVR_823 PVR_821
-#define PVR_850 PVR_821
-#define PVR_860 PVR_821
+#define PVR_8xx 0x00500000
+
#define PVR_8240 0x00810100
#define PVR_8245 0x80811014
#define PVR_8260 PVR_8240
@@ -1295,12 +1344,12 @@ static inline void msr_check_and_clear(unsigned long bits)
".section __ftr_fixup,\"a\"\n" \
".align 3\n" \
"98:\n" \
- " .llong %1\n" \
- " .llong %1\n" \
- " .llong 97b-98b\n" \
- " .llong 99b-98b\n" \
- " .llong 0\n" \
- " .llong 0\n" \
+ " .8byte %1\n" \
+ " .8byte %1\n" \
+ " .8byte 97b-98b\n" \
+ " .8byte 99b-98b\n" \
+ " .8byte 0\n" \
+ " .8byte 0\n" \
".previous" \
: "=r" (rval) \
: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
@@ -1313,7 +1362,7 @@ static inline void msr_check_and_clear(unsigned long bits)
#else /* __powerpc64__ */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
#define mftbl() ({unsigned long rval; \
asm volatile("mftbl %0" : "=r" (rval)); rval;})
#define mftbu() ({unsigned long rval; \
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 737e012ef56e..eb2a33d5df26 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -221,10 +221,7 @@
#define SPRN_CSRR0 SPRN_SRR2 /* Critical Save and Restore Register 0 */
#define SPRN_CSRR1 SPRN_SRR3 /* Critical Save and Restore Register 1 */
#endif
-
-#ifdef CONFIG_PPC_ICSWX
#define SPRN_HACOP 0x15F /* Hypervisor Available Coprocessor Register */
-#endif
/* Bit definitions for CCR1. */
#define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 654d64c9f3ac..3a3fb0ca68f5 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,7 +23,6 @@ extern void reloc_got2(unsigned long);
void check_for_initrd(void);
void initmem_init(void);
-void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 8ea98504f900..fac963e10d39 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -97,6 +97,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
#endif
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
static inline struct cpumask *cpu_sibling_mask(int cpu)
@@ -109,6 +110,11 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
extern int cpu_to_core_id(int cpu);
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 8c1b913de6d7..edbe571bcc54 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -170,39 +170,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
lock->slock = 0;
}
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- arch_spinlock_t lock_val;
-
- smp_mb();
-
- /*
- * Atomically load and store back the lock value (unchanged). This
- * ensures that our observation of the lock value is ordered with
- * respect to other lock operations.
- */
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0, 0, %2, 0) "\n"
-" stwcx. %0, 0, %2\n"
-" bne- 1b\n"
- : "=&r" (lock_val), "+m" (*lock)
- : "r" (lock)
- : "cr0", "xer");
-
- if (arch_spin_value_unlocked(lock_val))
- goto out;
-
- while (lock->slock) {
- HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
- }
- HMT_medium();
-
-out:
- smp_mb();
-}
-
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
@@ -342,5 +309,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
#define arch_read_relax(lock) __rw_yield(lock)
#define arch_write_relax(lock) __rw_yield(lock)
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
+
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc45a82..ab9d849644d0 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -23,12 +23,9 @@ struct pt_regs;
#define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024)
#define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064)
-/* Emulate instructions that cause a transfer of control. */
-extern int emulate_step(struct pt_regs *regs, unsigned int instr);
-
enum instruction_type {
COMPUTE, /* arith/logical/CR op, etc. */
- LOAD,
+ LOAD, /* load and store types need to be contiguous */
LOAD_MULTI,
LOAD_FP,
LOAD_VMX,
@@ -55,10 +52,31 @@ enum instruction_type {
#define INSTR_TYPE_MASK 0x1f
+#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
+
+/* Compute flags, ORed in with type */
+#define SETREG 0x20
+#define SETCC 0x40
+#define SETXER 0x80
+
+/* Branch flags, ORed in with type */
+#define SETLK 0x20
+#define BRTAKEN 0x40
+#define DECCTR 0x80
+
/* Load/store flags, ORed in with type */
#define SIGNEXT 0x20
#define UPDATE 0x40 /* matches bit in opcode 31 instructions */
#define BYTEREV 0x80
+#define FPCONV 0x100
+
+/* Barrier type field, ORed in with type */
+#define BARRIER_MASK 0xe0
+#define BARRIER_SYNC 0x00
+#define BARRIER_ISYNC 0x20
+#define BARRIER_EIEIO 0x40
+#define BARRIER_LWSYNC 0x60
+#define BARRIER_PTESYNC 0x80
/* Cacheop values, ORed in with type */
#define CACHEOP_MASK 0x700
@@ -67,10 +85,17 @@ enum instruction_type {
#define DCBTST 0x200
#define DCBT 0x300
#define ICBI 0x400
+#define DCBZ 0x500
+
+/* VSX flags values */
+#define VSX_FPCONV 1 /* do floating point SP/DP conversion */
+#define VSX_SPLAT 2 /* store loaded value into all elements */
+#define VSX_LDLEFT 4 /* load VSX register from left */
+#define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */
/* Size field in type word */
-#define SIZE(n) ((n) << 8)
-#define GETSIZE(w) ((w) >> 8)
+#define SIZE(n) ((n) << 12)
+#define GETSIZE(w) ((w) >> 12)
#define MKOP(t, f, s) ((t) | (f) | SIZE(s))
@@ -83,7 +108,63 @@ struct instruction_op {
int update_reg;
/* For MFSPR */
int spr;
+ u32 ccval;
+ u32 xerval;
+ u8 element_size; /* for VSX/VMX loads/stores */
+ u8 vsx_flags;
+};
+
+union vsx_reg {
+ u8 b[16];
+ u16 h[8];
+ u32 w[4];
+ unsigned long d[2];
+ float fp[4];
+ double dp[2];
+ __vector128 v;
};
-extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
unsigned int instr);
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * arithmetic/logical instructions, loads and stores,
+ * cache operations and barriers.
+ *
+ * Returns 1 if the instruction was emulated successfully,
+ * 0 if it could not be emulated, or -1 for an instruction that
+ * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
+ */
+extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+
+/*
+ * Emulate a load or store instruction by reading/writing the
+ * memory of the current process. FP/VMX/VSX registers are assumed
+ * to hold live values if the appropriate enable bit in regs->msr is
+ * set; otherwise this will use the saved values in the thread struct
+ * for user-mode accesses.
+ */
+extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op);
+
+extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+ const void *mem, bool cross_endian);
+extern void emulate_vsx_store(struct instruction_op *op,
+ const union vsx_reg *reg, void *mem,
+ bool cross_endian);
+extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index da3cdffca440..cc9addefb51c 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,6 +10,7 @@
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_MEMCMP
#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMSET16
extern char * strcpy(char *,const char *);
extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -23,6 +24,31 @@ extern void * memmove(void *,const void *,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
+#ifdef CONFIG_PPC64
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8);
+}
+#else
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 2cf846edb3fc..cb61eae5b7ed 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void)
ret = 0;
__asm__ __volatile__(
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
"97: mftb %0\n"
#else
"97: mfspr %0, %2\n"
@@ -45,11 +45,7 @@ static inline cycles_t get_cycles(void)
" .long 0\n"
" .long 0\n"
".previous"
-#ifdef CONFIG_8xx
- : "=r" (ret) : "i" (CPU_FTR_601));
-#else
: "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
-#endif
return ret;
#endif
}
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index 609557569f65..a7eabff27a0f 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -69,13 +69,22 @@ static inline int mm_is_core_local(struct mm_struct *mm)
topology_sibling_cpumask(smp_processor_id()));
}
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ if (atomic_read(&mm->context.active_cpus) > 1)
+ return false;
+ return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
+}
+#else /* CONFIG_PPC_BOOK3S_64 */
static inline int mm_is_thread_local(struct mm_struct *mm)
{
return cpumask_equal(mm_cpumask(mm),
cpumask_of(smp_processor_id()));
}
+#endif /* !CONFIG_PPC_BOOK3S_64 */
-#else
+#else /* CONFIG_SMP */
static inline int mm_is_core_local(struct mm_struct *mm)
{
return 1;
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index dc4e15937ccf..2d84bca8d053 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -16,8 +16,6 @@ struct device_node;
#include <asm/mmzone.h>
-#define parent_node(node) (node)
-
#define cpumask_of_node(node) ((node) == -1 ? \
cpu_all_mask : \
node_to_cpumask_map[node])
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 000000000000..fd5963acd658
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_VAS_H
+#define _ASM_POWERPC_VAS_H
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN (1 << 10) /* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX (8 << 20) /* 8MB */
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ * limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED 0
+#define VAS_THRESH_FIFO_GT_HALF_FULL 1
+#define VAS_THRESH_FIFO_GT_QTR_FULL 2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
+
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v) (((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m) (__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val) \
+ (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+ VAS_COP_TYPE_FAULT,
+ VAS_COP_TYPE_842,
+ VAS_COP_TYPE_842_HIPRI,
+ VAS_COP_TYPE_GZIP,
+ VAS_COP_TYPE_GZIP_HIPRI,
+ VAS_COP_TYPE_FTW,
+ VAS_COP_TYPE_MAX,
+};
+
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+ void *rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+
+ bool pin_win;
+ bool rej_no_credit;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_win_ord_mode;
+ bool rx_win_ord_mode;
+ bool data_stamp;
+ bool nx_win;
+ bool fault_win;
+ bool user_win;
+ bool notify_disable;
+ bool intr_disable;
+ bool notify_early;
+
+ int lnotify_lpid;
+ int lnotify_pid;
+ int lnotify_tid;
+ u32 pswid;
+
+ int tc_mode;
+};
+
+/*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+ enum vas_cop_type cop;
+ int wcreds_max;
+ int lpid;
+ int pidr; /* hardware PID (from SPRN_PID) */
+ int pid; /* linux process id */
+ int pswid;
+ int rsvd_txbuf_count;
+ int tc_mode;
+
+ bool user_win;
+ bool pin_win;
+ bool rej_no_credit;
+ bool rsvd_txbuf_enable;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_win_ord_mode;
+ bool rx_win_ord_mode;
+};
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_rx_win_attr *attr);
+
+/*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+ enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr);
+
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ */
+int vas_copy_crb(void *crb, int offset);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. @re is expected/
+ * assumed to be true for NX windows.
+ */
+int vas_paste_crb(struct vas_window *win, int offset, bool re);
+
+#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index ab3acd2f2786..7a7b541b7493 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -33,8 +33,16 @@ static inline u16 scr_readw(volatile const u16 *addr)
return le16_to_cpu(*addr);
}
+#define VT_BUF_HAVE_MEMSETW
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int n)
+{
+ memset16(s, cpu_to_le16(v), n / 2);
+}
+
#define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
#define scr_memcpyw memcpy
+#define scr_memmovew memmove
#endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index c23ff4389ca2..371fbebf1ec9 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -45,6 +45,7 @@ struct xive_irq_data {
void __iomem *trig_mmio;
u32 esb_shift;
int src_chip;
+ u32 hw_irq;
/* Setup/used by frontend */
int target;
@@ -55,6 +56,7 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_SHIFT_BUG 0x04
#define XIVE_IRQ_FLAG_MASK_FW 0x08
#define XIVE_IRQ_FLAG_EOI_FW 0x10
+#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
#define XIVE_INVALID_CHIP_ID -1
@@ -110,11 +112,13 @@ extern bool __xive_enabled;
static inline bool xive_enabled(void) { return __xive_enabled; }
+extern bool xive_spapr_init(void);
extern bool xive_native_init(void);
extern void xive_smp_probe(void);
extern int xive_smp_prepare_cpu(unsigned int cpu);
extern void xive_smp_setup_cpu(void);
extern void xive_smp_disable_cpu(void);
+extern void xive_teardown_cpu(void);
extern void xive_kexec_teardown_cpu(int secondary);
extern void xive_shutdown(void);
extern void xive_flush_interrupt(void);
@@ -147,6 +151,7 @@ extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
static inline bool xive_enabled(void) { return false; }
+static inline bool xive_spapr_init(void) { return false; }
static inline bool xive_native_init(void) { return false; }
static inline void xive_smp_probe(void) { }
extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index ab45cc2f3101..03c06ba7464f 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -29,20 +29,4 @@
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
-/*
- * When MAP_HUGETLB is set, bits [26:31] of the flags argument to mmap(2),
- * encode the log2 of the huge page size. A value of zero indicates that the
- * default huge page size should be used. To use a non-default huge page size,
- * one of these defines can be used, or the size can be encoded by hand. Note
- * that on most systems only a subset, or possibly none, of these sizes will be
- * available.
- */
-#define MAP_HUGE_512KB (19 << MAP_HUGE_SHIFT) /* 512KB HugeTLB Page */
-#define MAP_HUGE_1MB (20 << MAP_HUGE_SHIFT) /* 1MB HugeTLB Page */
-#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) /* 2MB HugeTLB Page */
-#define MAP_HUGE_8MB (23 << MAP_HUGE_SHIFT) /* 8MB HugeTLB Page */
-#define MAP_HUGE_16MB (24 << MAP_HUGE_SHIFT) /* 16MB HugeTLB Page */
-#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) /* 1GB HugeTLB Page */
-#define MAP_HUGE_16GB (34 << MAP_HUGE_SHIFT) /* 16GB HugeTLB Page */
-
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 4aa7c147e447..91960f83039c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_VDSO32) += vdso32/
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
@@ -83,7 +83,7 @@ extra-y := head_$(BITS).o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
-extra-$(CONFIG_8xx) := head_8xx.o
+extra-$(CONFIG_PPC_8xx) := head_8xx.o
extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index ec7a8b099dd9..26b9994d27ee 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -27,6 +27,7 @@
#include <asm/switch_to.h>
#include <asm/disassemble.h>
#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
struct aligninfo {
unsigned char len;
@@ -40,364 +41,9 @@ struct aligninfo {
#define LD 0 /* load */
#define ST 1 /* store */
#define SE 2 /* sign-extend value, or FP ld/st as word */
-#define F 4 /* to/from fp regs */
-#define U 8 /* update index register */
-#define M 0x10 /* multiple load/store */
#define SW 0x20 /* byte swap */
-#define S 0x40 /* single-precision fp or... */
-#define SX 0x40 /* ... byte count in XER */
-#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
-#define SPLT 0x80 /* VSX SPLAT load */
-
-/* DSISR bits reported for a DCBZ instruction: */
-#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-
-/*
- * The PowerPC stores certain bits of the instruction that caused the
- * alignment exception in the DSISR register. This array maps those
- * bits to information about the operand length and what the
- * instruction would do.
- */
-static struct aligninfo aligninfo[128] = {
- { 4, LD }, /* 00 0 0000: lwz / lwarx */
- INVALID, /* 00 0 0001 */
- { 4, ST }, /* 00 0 0010: stw */
- INVALID, /* 00 0 0011 */
- { 2, LD }, /* 00 0 0100: lhz */
- { 2, LD+SE }, /* 00 0 0101: lha */
- { 2, ST }, /* 00 0 0110: sth */
- { 4, LD+M }, /* 00 0 0111: lmw */
- { 4, LD+F+S }, /* 00 0 1000: lfs */
- { 8, LD+F }, /* 00 0 1001: lfd */
- { 4, ST+F+S }, /* 00 0 1010: stfs */
- { 8, ST+F }, /* 00 0 1011: stfd */
- { 16, LD }, /* 00 0 1100: lq */
- { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
- INVALID, /* 00 0 1110 */
- { 8, ST }, /* 00 0 1111: std/stdu */
- { 4, LD+U }, /* 00 1 0000: lwzu */
- INVALID, /* 00 1 0001 */
- { 4, ST+U }, /* 00 1 0010: stwu */
- INVALID, /* 00 1 0011 */
- { 2, LD+U }, /* 00 1 0100: lhzu */
- { 2, LD+SE+U }, /* 00 1 0101: lhau */
- { 2, ST+U }, /* 00 1 0110: sthu */
- { 4, ST+M }, /* 00 1 0111: stmw */
- { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
- { 8, LD+F+U }, /* 00 1 1001: lfdu */
- { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
- { 8, ST+F+U }, /* 00 1 1011: stfdu */
- { 16, LD+F }, /* 00 1 1100: lfdp */
- INVALID, /* 00 1 1101 */
- { 16, ST+F }, /* 00 1 1110: stfdp */
- INVALID, /* 00 1 1111 */
- { 8, LD }, /* 01 0 0000: ldx */
- INVALID, /* 01 0 0001 */
- { 8, ST }, /* 01 0 0010: stdx */
- INVALID, /* 01 0 0011 */
- INVALID, /* 01 0 0100 */
- { 4, LD+SE }, /* 01 0 0101: lwax */
- INVALID, /* 01 0 0110 */
- INVALID, /* 01 0 0111 */
- { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
- { 4, LD+M+HARD }, /* 01 0 1001: lswi */
- { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
- { 4, ST+M+HARD }, /* 01 0 1011: stswi */
- INVALID, /* 01 0 1100 */
- { 8, LD+U }, /* 01 0 1101: ldu */
- INVALID, /* 01 0 1110 */
- { 8, ST+U }, /* 01 0 1111: stdu */
- { 8, LD+U }, /* 01 1 0000: ldux */
- INVALID, /* 01 1 0001 */
- { 8, ST+U }, /* 01 1 0010: stdux */
- INVALID, /* 01 1 0011 */
- INVALID, /* 01 1 0100 */
- { 4, LD+SE+U }, /* 01 1 0101: lwaux */
- INVALID, /* 01 1 0110 */
- INVALID, /* 01 1 0111 */
- INVALID, /* 01 1 1000 */
- INVALID, /* 01 1 1001 */
- INVALID, /* 01 1 1010 */
- INVALID, /* 01 1 1011 */
- INVALID, /* 01 1 1100 */
- INVALID, /* 01 1 1101 */
- INVALID, /* 01 1 1110 */
- INVALID, /* 01 1 1111 */
- INVALID, /* 10 0 0000 */
- INVALID, /* 10 0 0001 */
- INVALID, /* 10 0 0010: stwcx. */
- INVALID, /* 10 0 0011 */
- INVALID, /* 10 0 0100 */
- INVALID, /* 10 0 0101 */
- INVALID, /* 10 0 0110 */
- INVALID, /* 10 0 0111 */
- { 4, LD+SW }, /* 10 0 1000: lwbrx */
- INVALID, /* 10 0 1001 */
- { 4, ST+SW }, /* 10 0 1010: stwbrx */
- INVALID, /* 10 0 1011 */
- { 2, LD+SW }, /* 10 0 1100: lhbrx */
- { 4, LD+SE }, /* 10 0 1101 lwa */
- { 2, ST+SW }, /* 10 0 1110: sthbrx */
- { 16, ST }, /* 10 0 1111: stq */
- INVALID, /* 10 1 0000 */
- INVALID, /* 10 1 0001 */
- INVALID, /* 10 1 0010 */
- INVALID, /* 10 1 0011 */
- INVALID, /* 10 1 0100 */
- INVALID, /* 10 1 0101 */
- INVALID, /* 10 1 0110 */
- INVALID, /* 10 1 0111 */
- INVALID, /* 10 1 1000 */
- INVALID, /* 10 1 1001 */
- INVALID, /* 10 1 1010 */
- INVALID, /* 10 1 1011 */
- INVALID, /* 10 1 1100 */
- INVALID, /* 10 1 1101 */
- INVALID, /* 10 1 1110 */
- { 0, ST+HARD }, /* 10 1 1111: dcbz */
- { 4, LD }, /* 11 0 0000: lwzx */
- INVALID, /* 11 0 0001 */
- { 4, ST }, /* 11 0 0010: stwx */
- INVALID, /* 11 0 0011 */
- { 2, LD }, /* 11 0 0100: lhzx */
- { 2, LD+SE }, /* 11 0 0101: lhax */
- { 2, ST }, /* 11 0 0110: sthx */
- INVALID, /* 11 0 0111 */
- { 4, LD+F+S }, /* 11 0 1000: lfsx */
- { 8, LD+F }, /* 11 0 1001: lfdx */
- { 4, ST+F+S }, /* 11 0 1010: stfsx */
- { 8, ST+F }, /* 11 0 1011: stfdx */
- { 16, LD+F }, /* 11 0 1100: lfdpx */
- { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
- { 16, ST+F }, /* 11 0 1110: stfdpx */
- { 4, ST+F }, /* 11 0 1111: stfiwx */
- { 4, LD+U }, /* 11 1 0000: lwzux */
- INVALID, /* 11 1 0001 */
- { 4, ST+U }, /* 11 1 0010: stwux */
- INVALID, /* 11 1 0011 */
- { 2, LD+U }, /* 11 1 0100: lhzux */
- { 2, LD+SE+U }, /* 11 1 0101: lhaux */
- { 2, ST+U }, /* 11 1 0110: sthux */
- INVALID, /* 11 1 0111 */
- { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
- { 8, LD+F+U }, /* 11 1 1001: lfdux */
- { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
- { 8, ST+F+U }, /* 11 1 1011: stfdux */
- INVALID, /* 11 1 1100 */
- { 4, LD+F }, /* 11 1 1101: lfiwzx */
- INVALID, /* 11 1 1110 */
- INVALID, /* 11 1 1111 */
-};
-
-/*
- * The dcbz (data cache block zero) instruction
- * gives an alignment fault if used on non-cacheable
- * memory. We handle the fault mainly for the
- * case when we are running with the cache disabled
- * for debugging.
- */
-static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
-{
- long __user *p;
- int i, size;
-
-#ifdef __powerpc64__
- size = ppc64_caches.l1d.block_size;
-#else
- size = L1_CACHE_BYTES;
-#endif
- p = (long __user *) (regs->dar & -size);
- if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
- return -EFAULT;
- for (i = 0; i < size / sizeof(long); ++i)
- if (__put_user_inatomic(0, p+i))
- return -EFAULT;
- return 1;
-}
-
-/*
- * Emulate load & store multiple instructions
- * On 64-bit machines, these instructions only affect/use the
- * bottom 4 bytes of each register, and the loads clear the
- * top 4 bytes of the affected register.
- */
-#ifdef __BIG_ENDIAN__
-#ifdef CONFIG_PPC64
-#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
-#else
-#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
-#endif
-#else
-#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
-#endif
-
-#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
-
-static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int nb,
- unsigned int flags, unsigned int instr,
- unsigned long swiz)
-{
- unsigned long *rptr;
- unsigned int nb0, i, bswiz;
- unsigned long p;
-
- /*
- * We do not try to emulate 8 bytes multiple as they aren't really
- * available in our operating environments and we don't try to
- * emulate multiples operations in kernel land as they should never
- * be used/generated there at least not on unaligned boundaries
- */
- if (unlikely((nb > 4) || !user_mode(regs)))
- return 0;
-
- /* lmw, stmw, lswi/x, stswi/x */
- nb0 = 0;
- if (flags & HARD) {
- if (flags & SX) {
- nb = regs->xer & 127;
- if (nb == 0)
- return 1;
- } else {
- unsigned long pc = regs->nip ^ (swiz & 4);
-
- if (__get_user_inatomic(instr,
- (unsigned int __user *)pc))
- return -EFAULT;
- if (swiz == 0 && (flags & SW))
- instr = cpu_to_le32(instr);
- nb = (instr >> 11) & 0x1f;
- if (nb == 0)
- nb = 32;
- }
- if (nb + reg * 4 > 128) {
- nb0 = nb + reg * 4 - 128;
- nb = 128 - reg * 4;
- }
-#ifdef __LITTLE_ENDIAN__
- /*
- * String instructions are endian neutral but the code
- * below is not. Force byte swapping on so that the
- * effects of swizzling are undone in the load/store
- * loops below.
- */
- flags ^= SW;
-#endif
- } else {
- /* lwm, stmw */
- nb = (32 - reg) * 4;
- }
-
- if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
- return -EFAULT; /* bad address */
-
- rptr = &regs->gpr[reg];
- p = (unsigned long) addr;
- bswiz = (flags & SW)? 3: 0;
-
- if (!(flags & ST)) {
- /*
- * This zeroes the top 4 bytes of the affected registers
- * in 64-bit mode, and also zeroes out any remaining
- * bytes of the last register for lsw*.
- */
- memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
- if (nb0 > 0)
- memset(&regs->gpr[0], 0,
- ((nb0 + 3) / 4) * sizeof(unsigned long));
-
- for (i = 0; i < nb; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
-
- } else {
- for (i = 0; i < nb; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
- }
- return 1;
-}
-
-/*
- * Emulate floating-point pair loads and stores.
- * Only POWER6 has these instructions, and it does true little-endian,
- * so we don't need the address swizzling.
- */
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
- unsigned int flags)
-{
- char *ptr0 = (char *) &current->thread.TS_FPR(reg);
- char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: FRS/FRT must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-
-#ifdef CONFIG_PPC64
-static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
-{
- char *ptr0 = (char *)&regs->gpr[reg];
- char *ptr1 = (char *)&regs->gpr[reg+1];
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: GPR must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-#endif /* CONFIG_PPC64 */
#ifdef CONFIG_SPE
@@ -636,133 +282,21 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
#endif /* CONFIG_SPE */
-#ifdef CONFIG_VSX
-/*
- * Emulate VSX instructions...
- */
-static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
- unsigned int areg, struct pt_regs *regs,
- unsigned int flags, unsigned int length,
- unsigned int elsize)
-{
- char *ptr;
- unsigned long *lptr;
- int ret = 0;
- int sw = 0;
- int i, j;
-
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
-
- flush_vsx_to_thread(current);
-
- if (reg < 32)
- ptr = (char *) &current->thread.fp_state.fpr[reg][0];
- else
- ptr = (char *) &current->thread.vr_state.vr[reg - 32];
-
- lptr = (unsigned long *) ptr;
-
-#ifdef __LITTLE_ENDIAN__
- if (flags & SW) {
- elsize = length;
- sw = length-1;
- } else {
- /*
- * The elements are BE ordered, even in LE mode, so process
- * them in reverse order.
- */
- addr += length - elsize;
-
- /* 8 byte memory accesses go in the top 8 bytes of the VR */
- if (length == 8)
- ptr += 8;
- }
-#else
- if (flags & SW)
- sw = elsize-1;
-#endif
-
- for (j = 0; j < length; j += elsize) {
- for (i = 0; i < elsize; ++i) {
- if (flags & ST)
- ret |= __put_user(ptr[i^sw], addr + i);
- else
- ret |= __get_user(ptr[i^sw], addr + i);
- }
- ptr += elsize;
-#ifdef __LITTLE_ENDIAN__
- addr -= elsize;
-#else
- addr += elsize;
-#endif
- }
-
-#ifdef __BIG_ENDIAN__
-#define VSX_HI 0
-#define VSX_LO 1
-#else
-#define VSX_HI 1
-#define VSX_LO 0
-#endif
-
- if (!ret) {
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- /* Splat load copies the same data to top and bottom 8 bytes */
- if (flags & SPLT)
- lptr[VSX_LO] = lptr[VSX_HI];
- /* For 8 byte loads, zero the low 8 bytes */
- else if (!(flags & ST) && (8 == length))
- lptr[VSX_LO] = 0;
- } else
- return -EFAULT;
-
- return 1;
-}
-#endif
-
/*
* Called on alignment exception. Attempts to fixup
*
* Return 1 on success
* Return 0 if unable to handle the interrupt
* Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
*/
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags, instruction = 0;
- unsigned int reg, areg;
- unsigned int dsisr;
- unsigned char __user *addr;
- unsigned long p, swiz;
- int ret, i;
- union data {
- u64 ll;
- double dd;
- unsigned char v[8];
- struct {
-#ifdef __LITTLE_ENDIAN__
- int low32;
- unsigned hi32;
-#else
- unsigned hi32;
- int low32;
-#endif
- } x32;
- struct {
-#ifdef __LITTLE_ENDIAN__
- short low16;
- unsigned char hi48[6];
-#else
- unsigned char hi48[6];
- short low16;
-#endif
- } x16;
- } data;
+ unsigned int instr;
+ struct instruction_op op;
+ int r, type;
/*
* We require a complete register set, if not, then our assembly
@@ -770,121 +304,23 @@ int fix_alignment(struct pt_regs *regs)
*/
CHECK_FULL_REGS(regs);
- dsisr = regs->dsisr;
-
- /* Some processors don't provide us with a DSISR we can use here,
- * let's make one up from the instruction
- */
- if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
- unsigned long pc = regs->nip;
-
- if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
- pc ^= 4;
- if (unlikely(__get_user_inatomic(instr,
- (unsigned int __user *)pc)))
- return -EFAULT;
- if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
- instr = cpu_to_le32(instr);
- dsisr = make_dsisr(instr);
- instruction = instr;
+ if (unlikely(__get_user(instr, (unsigned int __user *)regs->nip)))
+ return -EFAULT;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+ /* We don't handle PPC little-endian any more... */
+ if (cpu_has_feature(CPU_FTR_PPC_LE))
+ return -EIO;
+ instr = swab32(instr);
}
- /* extract the operation and registers from the dsisr */
- reg = (dsisr >> 5) & 0x1f; /* source/dest register */
- areg = dsisr & 0x1f; /* register to update */
-
#ifdef CONFIG_SPE
if ((instr >> 26) == 0x4) {
+ int reg = (instr >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
- instr = (dsisr >> 10) & 0x7f;
- instr |= (dsisr >> 13) & 0x60;
-
- /* Lookup the operation in our table */
- nb = aligninfo[instr].len;
- flags = aligninfo[instr].flags;
-
- /*
- * Handle some cases which give overlaps in the DSISR values.
- */
- if (IS_XFORM(instruction)) {
- switch (get_xop(instruction)) {
- case 532: /* ldbrx */
- nb = 8;
- flags = LD+SW;
- break;
- case 660: /* stdbrx */
- nb = 8;
- flags = ST+SW;
- break;
- case 20: /* lwarx */
- case 84: /* ldarx */
- case 116: /* lharx */
- case 276: /* lqarx */
- return 0; /* not emulated ever */
- }
- }
-
- /* Byteswap little endian loads and stores */
- swiz = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
- flags ^= SW;
-#ifdef __BIG_ENDIAN__
- /*
- * So-called "PowerPC little endian" mode works by
- * swizzling addresses rather than by actually doing
- * any byte-swapping. To emulate this, we XOR each
- * byte address with 7. We also byte-swap, because
- * the processor's address swizzling depends on the
- * operand size (it xors the address with 7 for bytes,
- * 6 for halfwords, 4 for words, 0 for doublewords) but
- * we will xor with 7 and load/store each byte separately.
- */
- if (cpu_has_feature(CPU_FTR_PPC_LE))
- swiz = 7;
-#endif
- }
-
- /* DAR has the operand effective address */
- addr = (unsigned char __user *)regs->dar;
-
-#ifdef CONFIG_VSX
- if ((instruction & 0xfc00003e) == 0x7c000018) {
- unsigned int elsize;
-
- /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
- reg |= (instruction & 0x1) << 5;
- /* Simple inline decoder instead of a table */
- /* VSX has only 8 and 16 byte memory accesses */
- nb = 8;
- if (instruction & 0x200)
- nb = 16;
-
- /* Vector stores in little-endian mode swap individual
- elements, so process them separately */
- elsize = 4;
- if (instruction & 0x80)
- elsize = 8;
-
- flags = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
- flags |= SW;
- if (instruction & 0x100)
- flags |= ST;
- if (instruction & 0x040)
- flags |= U;
- /* splat load needs a special decoder */
- if ((instruction & 0x400) == 0){
- flags |= SPLT;
- nb = 8;
- }
- PPC_WARN_ALIGNMENT(vsx, regs);
- return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
- }
-#endif
/*
* ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
@@ -896,173 +332,27 @@ int fix_alignment(struct pt_regs *regs)
* when pasting to a co-processor. Furthermore, paste_last is the
* synchronisation point for preceding copy/paste sequences.
*/
- if ((instruction & 0xfc0006fe) == PPC_INST_COPY)
+ if ((instr & 0xfc0006fe) == PPC_INST_COPY)
return -EIO;
- /* A size of 0 indicates an instruction we don't support, with
- * the exception of DCBZ which is handled as a special case here
- */
- if (instr == DCBZ) {
- PPC_WARN_ALIGNMENT(dcbz, regs);
- return emulate_dcbz(regs, addr);
- }
- if (unlikely(nb == 0))
- return 0;
-
- /* Load/Store Multiple instructions are handled in their own
- * function
- */
- if (flags & M) {
- PPC_WARN_ALIGNMENT(multiple, regs);
- return emulate_multiple(regs, addr, reg, nb,
- flags, instr, swiz);
- }
-
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
- return -EFAULT;
-
- /* Force the fprs into the save area so we can reference them */
- if (flags & F) {
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
- flush_fp_to_thread(current);
- }
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return -EINVAL;
- if (nb == 16) {
- if (flags & F) {
- /* Special case for 16-byte FP loads and stores */
- PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
- } else {
-#ifdef CONFIG_PPC64
- /* Special case for 16-byte loads and stores */
- PPC_WARN_ALIGNMENT(lq_stq, regs);
- return emulate_lq_stq(regs, addr, reg, flags);
-#else
- return 0;
-#endif
- }
- }
-
- PPC_WARN_ALIGNMENT(unaligned, regs);
-
- /* If we are loading, get the data from user space, else
- * get it from register values
- */
- if (!(flags & ST)) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- data.ll = 0;
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __get_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
-
- } else if (flags & F) {
- data.ll = current->thread.TS_FPR(reg);
- if (flags & S) {
- /* Single-precision FP store requires conversion... */
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.x32.low32);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- }
- } else
- data.ll = regs->gpr[reg];
-
- if (flags & SW) {
- switch (nb) {
- case 8:
- data.ll = swab64(data.ll);
- break;
- case 4:
- data.x32.low32 = swab32(data.x32.low32);
- break;
- case 2:
- data.x16.low16 = swab16(data.x16.low16);
- break;
- }
- }
-
- /* Perform other misc operations like sign extension
- * or floating point single precision conversion
- */
- switch (flags & ~(U|SW)) {
- case LD+SE: /* sign extending integer loads */
- case LD+F+SE: /* sign extend for lfiwax */
- if ( nb == 2 )
- data.ll = data.x16.low16;
- else /* nb must be 4 */
- data.ll = data.x32.low32;
- break;
-
- /* Single-precision FP load requires conversion... */
- case LD+F+S:
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_fd((float *)&data.x32.low32, &data.dd);
- disable_kernel_fp();
- preempt_enable();
-#else
- return 0;
-#endif
- break;
+ type = op.type & INSTR_TYPE_MASK;
+ if (!OP_IS_LOAD_STORE(type)) {
+ if (type != CACHEOP + DCBZ)
+ return -EINVAL;
+ PPC_WARN_ALIGNMENT(dcbz, regs);
+ r = emulate_dcbz(op.ea, regs);
+ } else {
+ if (type == LARX || type == STCX)
+ return -EIO;
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+ r = emulate_loadstore(regs, &op);
}
- /* Store result to memory or update registers */
- if (flags & ST) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __put_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
- } else if (flags & F)
- current->thread.TS_FPR(reg) = data.ll;
- else
- regs->gpr[reg] = data.ll;
-
- /* Update RA as needed */
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- return 1;
+ if (!r)
+ return 1;
+ return r;
}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6e95c2c19a7e..8cfb20e38cfe 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -746,6 +746,14 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
+ STOP_SPR(STOP_PID, pid);
+ STOP_SPR(STOP_LDBAR, ldbar);
+ STOP_SPR(STOP_FSCR, fscr);
+ STOP_SPR(STOP_HFSCR, hfscr);
+ STOP_SPR(STOP_MMCR1, mmcr1);
+ STOP_SPR(STOP_MMCR2, mmcr2);
+ STOP_SPR(STOP_MMCRA, mmcra);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 8275858a434d..3f46ca1c59f9 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -253,7 +253,7 @@ int __init btext_find_display(int allow_nonstdout)
for_each_node_by_type(np, "display") {
if (of_get_property(np, "linux,opened", NULL)) {
- printk("trying %s ...\n", np->full_name);
+ printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index c641983bbdd6..a8f20e5928e1 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -167,10 +167,10 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %s(%s) refers to cache for %s(%s)\n",
- iter->ofnode->full_name,
+ "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+ iter->ofnode,
cache_type_string(iter),
- cache->ofnode->full_name,
+ cache->ofnode,
cache_type_string(cache));
}
@@ -179,8 +179,8 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %s\n", cache->level,
- cache_type_string(cache), cache->ofnode->full_name);
+ pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
list_del(&cache->list);
@@ -194,8 +194,8 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %s(%s)\n",
- cpu, next->ofnode->full_name,
+ "CPU %i already accounted in %pOF(%s)\n",
+ cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
next = next->next_local;
@@ -355,7 +355,7 @@ static int cache_is_unified_d(const struct device_node *np)
*/
static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
{
- pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+ pr_debug("creating L%d ucache for %pOF\n", level, node);
return new_cache(cache_is_unified_d(node), level, node);
}
@@ -365,8 +365,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %s\n", level,
- node->full_name);
+ pr_debug("creating L%d dcache and icache for %pOF\n", level,
+ node);
dcache = new_cache(CACHE_TYPE_DATA, level, node);
icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
@@ -679,7 +679,6 @@ static struct kobj_type cache_index_type = {
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
{
- const char *cache_name;
const char *cache_type;
struct cache *cache;
char *buf;
@@ -690,7 +689,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
return;
cache = dir->cache;
- cache_name = cache->ofnode->full_name;
cache_type = cache_type_string(cache);
/* We don't want to create an attribute that can't provide a
@@ -707,14 +705,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%s(%s) (rc = %zd)\n",
- attr->attr.name, cache_name,
+ "%pOF(%s) (rc = %zd)\n",
+ attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %s(%s)\n",
- attr->attr.name, cache_name, cache_type);
+ pr_debug("could not create %s attribute for %pOF(%s)\n",
+ attr->attr.name, cache->ofnode, cache_type);
}
kfree(buf);
@@ -831,8 +829,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %s(%s)\n",
- cpu, cache->ofnode->full_name,
+ "CPU %i not accounted in %pOF(%s)\n",
+ cpu, cache->ofnode,
cache_type_string(cache));
cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6f849832a669..760872916013 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1259,10 +1259,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
.platform = "ppc603",
},
#endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
- .pvr_value = 0x00500000,
+ .pvr_value = PVR_8xx,
.cpu_name = "8xx",
/* CPU_FTR_MAYBE_CAN_DOZE is possible,
* if the 8xx code is there.... */
@@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_8xx,
.platform = "ppc823",
},
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
#ifdef CONFIG_40x
{ /* 403GC */
.pvr_mask = 0xffffff00,
@@ -1936,6 +1936,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
+#ifdef CONFIG_PPC_47x
{ /* 476 DD2 core */
.pvr_mask = 0xffffffff,
.pvr_value = 0x11a52080,
@@ -1992,6 +1993,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_47x,
.platform = "ppc470",
},
+#endif /* CONFIG_PPC_47x */
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 63992b2d8e15..9e816787c0d4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -44,6 +44,7 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+#include <asm/pte-walk.h>
/** Overview:
@@ -169,10 +170,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
char buffer[128];
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
@@ -352,8 +353,7 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
* worried about _PAGE_SPLITTING/collapse. Also we will not hit
* page table free, because of init_mm.
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(token, &hugepage_shift);
if (!ptep)
return token;
WARN_ON(hugepage_shift);
@@ -435,7 +435,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
int ret;
int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
- struct pci_dn *pdn;
+ struct device_node *dn;
struct pci_dev *dev;
struct eeh_pe *pe, *parent_pe, *phb_pe;
int rc = 0;
@@ -493,9 +493,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
if (pe->check_count % EEH_MAX_FAILS == 0) {
- pdn = eeh_dev_to_pdn(edev);
- if (pdn->node)
- location = of_get_property(pdn->node, "ibm,loc-code", NULL);
+ dn = pci_device_to_OF_node(dev);
+ if (dn)
+ location = of_get_property(dn, "ibm,loc-code",
+ NULL);
printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
"location=%s driver=%s pci addr=%s\n",
pe->check_count,
@@ -1064,7 +1065,7 @@ core_initcall_sync(eeh_init);
*/
void eeh_add_device_early(struct pci_dn *pdn)
{
- struct pci_controller *phb;
+ struct pci_controller *phb = pdn ? pdn->phb : NULL;
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
if (!edev)
@@ -1074,7 +1075,6 @@ void eeh_add_device_early(struct pci_dn *pdn)
return;
/* USB Bus children of PCI devices will not have BUID's */
- phb = edev->phb;
if (NULL == phb ||
(eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
return;
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index d6b2ca70d14d..ad04ecd63c20 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -50,21 +50,16 @@
*/
struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
{
- struct pci_controller *phb = pdn->phb;
struct eeh_dev *edev;
/* Allocate EEH device */
edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev) {
- pr_warn("%s: out of memory\n",
- __func__);
+ if (!edev)
return NULL;
- }
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- edev->phb = phb;
INIT_LIST_HEAD(&edev->list);
INIT_LIST_HEAD(&edev->rmv_list);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index c405c79e50cd..8b840191df59 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -428,7 +428,7 @@ static void *eeh_add_virt_device(void *data, void *userdata)
if (!(edev->physfn)) {
pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, edev->phb->global_number, pdn->busno,
+ __func__, pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
return NULL;
}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index cc4b206f77e4..2e8d1b2b5af4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -230,10 +230,15 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
+struct eeh_pe_get_flag {
+ int pe_no;
+ int config_addr;
+};
+
static void *__eeh_pe_get(void *data, void *flag)
{
struct eeh_pe *pe = (struct eeh_pe *)data;
- struct eeh_dev *edev = (struct eeh_dev *)flag;
+ struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
/* Unexpected PHB PE */
if (pe->type & EEH_PE_PHB)
@@ -244,17 +249,17 @@ static void *__eeh_pe_get(void *data, void *flag)
* have non-zero PE address
*/
if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (edev->pe_config_addr == pe->addr)
+ if (tmp->pe_no == pe->addr)
return pe;
} else {
- if (edev->pe_config_addr &&
- (edev->pe_config_addr == pe->addr))
+ if (tmp->pe_no &&
+ (tmp->pe_no == pe->addr))
return pe;
}
/* Try BDF address */
- if (edev->config_addr &&
- (edev->config_addr == pe->config_addr))
+ if (tmp->config_addr &&
+ (tmp->config_addr == pe->config_addr))
return pe;
return NULL;
@@ -262,7 +267,9 @@ static void *__eeh_pe_get(void *data, void *flag)
/**
* eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
+ * @phb: PCI controller
+ * @pe_no: PE number
+ * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -271,12 +278,14 @@ static void *__eeh_pe_get(void *data, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
+ int pe_no, int config_addr)
{
- struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *root = eeh_phb_pe_get(phb);
+ struct eeh_pe_get_flag tmp = { pe_no, config_addr };
struct eeh_pe *pe;
- pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+ pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
return pe;
}
@@ -330,11 +339,13 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
int eeh_add_to_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, edev->config_addr, edev->phb->global_number);
+ __func__, config_addr, pdn->phb->global_number);
return -EINVAL;
}
@@ -344,7 +355,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(edev);
+ pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
if (pe && !(pe->type & EEH_PE_INVALID)) {
/* Mark the PE as type of PCI bus */
pe->type = EEH_PE_BUS;
@@ -353,11 +364,11 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Put the edev to PE */
list_add_tail(&edev->list, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
list_add_tail(&edev->list, &pe->edevs);
@@ -376,25 +387,25 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
"PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
- pe->addr, pe->parent->addr);
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
+ pe->addr, pe->parent->addr);
return 0;
}
/* Create a new EEH PE */
if (edev->physfn)
- pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
else
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
pe->addr = edev->pe_config_addr;
- pe->config_addr = edev->config_addr;
+ pe->config_addr = config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -404,10 +415,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
*/
parent = eeh_pe_get_parent(edev);
if (!parent) {
- parent = eeh_phb_pe_get(edev->phb);
+ parent = eeh_phb_pe_get(pdn->phb);
if (!parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, edev->phb->global_number);
+ __func__, pdn->phb->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
@@ -424,10 +435,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
- edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF),
+ pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn),
pe->addr, pe->parent->addr);
return 0;
@@ -446,13 +457,14 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
int cnt;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!edev->pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
return -EEXIST;
}
@@ -712,10 +724,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
return;
pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ __func__, pdn->phb->global_number,
+ pdn->busno,
+ PCI_SLOT(pdn->devfn),
+ PCI_FUNC(pdn->devfn));
/* Check slot status */
cap = edev->pcie_cap;
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 1ceecdda810b..797549289798 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -51,7 +51,6 @@ static ssize_t eeh_show_##_name(struct device *dev, \
static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
static ssize_t eeh_pe_state_show(struct device *dev,
@@ -103,7 +102,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
return;
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
@@ -128,7 +126,6 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
}
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
- device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8587059ad848..e780e1fbf6c2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -43,6 +43,13 @@
#define LOAD_MSR_KERNEL(r, x) li r,(x)
#endif
+/*
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
+ */
+ .align 12
+
#ifdef CONFIG_BOOKE
.globl mcheck_transfer_to_handler
mcheck_transfer_to_handler:
@@ -586,6 +593,10 @@ ppc_swapcontext:
handle_page_fault:
stw r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_6xx
+ andis. r0,r5,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+#endif
bl do_page_fault
cmpwi r3,0
beq+ ret_from_except
@@ -599,6 +610,17 @@ handle_page_fault:
bl bad_page_fault
b ret_from_except_full
+#ifdef CONFIG_6xx
+ /* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ SAVE_NVGPRS(r1)
+ lwz r0,_TRAP(r1)
+ clrrwi r0,r0,1
+ stw r0,_TRAP(r1)
+ bl do_break
+ b ret_from_except_full
+#endif
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 49d8422767b4..4a0fd4f40245 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
- /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
- li r7,MSR_FP
+ andi. r0,r8,MSR_FP
+ beq 2f
#ifdef CONFIG_ALTIVEC
- oris r7,r7,MSR_VEC@h
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
#endif
- and r0,r8,r7
- cmpd r0,r7
- bne .Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
- cmpld r3,r11
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
-.Lsyscall_restore_math:
- /*
- * Some initial tests from restore_math to avoid the heavyweight
- * C code entry and MSR manipulations.
- */
- LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
- and. r0,r0,r8
- bne 1f
-
- ld r7,PACACURRENT(r13)
- lbz r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
- lbz r6,THREAD+THREAD_LOAD_VEC(r7)
- add r0,r0,r6
-#endif
- cmpdi r0,0
- beq .Lsyscall_restore_math_cont
-
-1: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
- /* Restore volatiles, reload MSR from updated one */
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
- b .Lsyscall_restore_math_cont
-
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs
@@ -990,16 +966,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
#ifdef CONFIG_PPC_BOOK3E
cmpwi cr0,r3,0x280
#else
- BEGIN_FTR_SECTION
- cmpwi cr0,r3,0xe80
- FTR_SECTION_ELSE
- cmpwi cr0,r3,0xa00
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ cmpwi cr0,r3,0xa00
#endif /* CONFIG_PPC_BOOK3E */
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
bl doorbell_exception
- b ret_from_except
#endif /* CONFIG_PPC_DOORBELL */
1: b ret_from_except /* What else to do here ? */
@@ -1133,7 +1104,7 @@ _ASM_NOKPROBE_SYMBOL(__enter_rtas)
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
.align 3
-1: .llong rtas_restore_regs
+1: .8byte rtas_restore_regs
rtas_restore_regs:
/* relocation is on at this point */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f14f3c04ec7e..48da0f5d2f7f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -541,7 +541,7 @@ EXC_COMMON_BEGIN(instruction_access_common)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
ld r3,_NIP(r1)
- andis. r4,r12,0x5820
+ andis. r4,r12,DSISR_BAD_FAULT_64S@h
li r5,0x400
std r3,_DAR(r1)
std r4,_DSISR(r1)
@@ -1314,7 +1314,7 @@ EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -1343,10 +1343,10 @@ EXC_COMMON_BEGIN(soft_nmi_common)
ADD_NVGPRS;ADD_RECONCILE)
b ret_from_except
-#else
+#else /* CONFIG_PPC_WATCHDOG */
#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
#define MASKED_DEC_HANDLER(_H)
-#endif
+#endif /* CONFIG_PPC_WATCHDOG */
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1370,19 +1370,16 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: cmpwi r10,PACA_IRQ_DBELL; \
- beq 2f; \
- cmpwi r10,PACA_IRQ_HMI; \
- beq 2f; \
+1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
+ bne 2f; \
mfspr r10,SPRN_##_H##SRR1; \
- rldicl r10,r10,48,1; /* clear MSR_EE */ \
- rotldi r10,r10,16; \
+ xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
2: mtcrf 0x80,r9; \
ld r9,PACA_EXGEN+EX_R9(r13); \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
- GET_SCRATCH0(r13); \
+ /* returns to kernel where r13 must be set up, so don't restore it */ \
##_H##rfid; \
b .; \
MASKED_DEC_HANDLER(_H)
@@ -1485,8 +1482,10 @@ USE_TEXT_SECTION()
*/
.balign IFETCH_ALIGN_BYTES
do_hash_page:
-#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa450 /* weird error? */
+ #ifdef CONFIG_PPC_STD_MMU_64
+ lis r0,DSISR_BAD_FAULT_64S@h
+ ori r0,r0,DSISR_BAD_FAULT_64S@l
+ and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -1669,25 +1668,27 @@ _GLOBAL(__replay_interrupt)
* we don't give a damn about, so we don't bother storing them.
*/
mfmsr r12
- LOAD_REG_ADDR(r11, 1f)
+ LOAD_REG_ADDR(r11, replay_interrupt_return)
mfcr r9
ori r12,r12,MSR_EE
cmpwi r3,0x900
beq decrementer_common
cmpwi r3,0x500
+BEGIN_FTR_SECTION
+ beq h_virt_irq_common
+FTR_SECTION_ELSE
beq hardware_interrupt_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
- cmpwi r3,0xe80
+ cmpwi r3,0xa00
beq h_doorbell_common_msgclr
- cmpwi r3,0xea0
- beq h_virt_irq_common
cmpwi r3,0xe60
beq hmi_exception_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
beq doorbell_super_common_msgclr
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-1:
+replay_interrupt_return:
blr
_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index dc0c49cfd90a..e1431800bfb9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -125,6 +125,13 @@ int is_fadump_boot_memory_area(u64 addr, ulong size)
return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
}
+int should_fadump_crash(void)
+{
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return 0;
+ return 1;
+}
+
int is_fadump_active(void)
{
return fw_dump.dump_active;
@@ -518,7 +525,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
struct fadump_crash_info_header *fdh = NULL;
int old_cpu, this_cpu;
- if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ if (!should_fadump_crash())
return;
/*
@@ -1446,6 +1453,25 @@ static void fadump_init_files(void)
return;
}
+static int fadump_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything
+ * else. If this returns, then fadump was not registered, so
+ * go through the rest of the panic path.
+ */
+ crash_fadump(NULL, ptr);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block fadump_panic_block = {
+ .notifier_call = fadump_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
/*
* Prepare for firmware-assisted dump.
*/
@@ -1478,6 +1504,9 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &fadump_panic_block);
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e22734278458..8c54166491e7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -388,7 +388,7 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
- andis. r0,r10,0xa470 /* weird error? */
+ andis. r0,r10,DSISR_BAD_FAULT_32S@h
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -403,13 +403,13 @@ DataAccess:
DO_KVM 0x400
InstructionAccess:
EXCEPTION_PROLOG
- andis. r0,r9,0x4000 /* no pte found? */
+ andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */
beq 1f /* if so, try to put a PTE */
li r3,0 /* into the hash table */
mr r4,r12 /* SRR0 is fault address */
bl hash_page
1: mr r4,r12
- mr r5,r9
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0ddc602b33a4..ff8511d6d8ea 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -92,13 +92,13 @@ END_FTR_SECTION(0, 1)
.balign 8
.globl __secondary_hold_spinloop
__secondary_hold_spinloop:
- .llong 0x0
+ .8byte 0x0
/* Secondary processors write this value with their cpu # */
/* after they enter the spin loop immediately below. */
.globl __secondary_hold_acknowledge
__secondary_hold_acknowledge:
- .llong 0x0
+ .8byte 0x0
#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
@@ -650,7 +650,7 @@ __after_prom_start:
bctr
.balign 8
-p_end: .llong _end - copy_to_here
+p_end: .8byte _end - copy_to_here
4:
/*
@@ -892,7 +892,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .llong __toc_start + 0x8000 - 0b
+p_toc: .8byte __toc_start + 0x8000 - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c032fe8c2d26..4fee00d414e8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -50,18 +50,20 @@
mtspr spr, reg
#endif
-/* Macro to test if an address is a kernel address */
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-#define IS_KERNEL(tmp, addr) \
- andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */
-#define BRANCH_UNLESS_KERNEL(label) beq label
-#else
-#define IS_KERNEL(tmp, addr) \
- rlwinm tmp, addr, 16, 16, 31; \
- cmpli cr0, tmp, PAGE_OFFSET >> 16
-#define BRANCH_UNLESS_KERNEL(label) blt label
+/* By simply checking Address >= 0x80000000, we know if its a kernel address */
+#define SIMPLE_KERNEL_ADDRESS 1
#endif
+/*
+ * We need an ITLB miss handler for kernel addresses if:
+ * - Either we have modules
+ * - Or we have not pinned the first 8M
+ */
+#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
+ defined(CONFIG_DEBUG_PAGEALLOC)
+#define ITLB_MISS_KERNEL 1
+#endif
/*
* Value for the bits that have fixed value in RPN entries.
@@ -123,7 +125,6 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
/*
@@ -170,7 +171,7 @@ turn_on_mmu:
stw r1,0(r11); \
tovirt(r1,r11); /* set new kernel sp */ \
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
+ mtmsr r10; \
stw r0,GPR0(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -300,7 +301,7 @@ SystemCall:
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+ EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
. = 0x1100
/*
@@ -325,7 +326,7 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
@@ -343,15 +344,32 @@ InstructionTLBMiss:
INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfcr r3
#endif
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- IS_KERNEL(r11, r10)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+#else
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
+#ifndef CONFIG_PIN_TLB_TEXT
+ /* It is assumed that kernel code fits into the first 8M page */
+_ENTRY(ITLBMiss_cmp)
+ cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+#endif
+#endif
#endif
mfspr r11, SPRN_M_TW /* Get level 1 table */
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
- BRANCH_UNLESS_KERNEL(3f)
+#ifdef ITLB_MISS_KERNEL
+#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
+ beq+ 3f
+#else
+ blt+ 3f
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ blt cr7, ITLBMissLinear
+#endif
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
#endif
@@ -369,7 +387,7 @@ InstructionTLBMiss:
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
4:
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtcr r3
#endif
/* Insert the APG into the TWC from the Linux PTE. */
@@ -400,7 +418,7 @@ InstructionTLBMiss:
MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r3, SPRN_SPRG_SCRATCH2
#endif
EXCEPTION_EPILOG_0
@@ -447,23 +465,23 @@ DataStoreTLBMiss:
* kernel page tables.
*/
mfspr r10, SPRN_MD_EPN
- rlwinm r10, r10, 16, 0xfff8
- cmpli cr0, r10, PAGE_OFFSET@h
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
blt+ 3f
+ rlwinm r11, r10, 16, 0xfff8
#ifndef CONFIG_PIN_TLB_IMMR
- cmpli cr0, r10, VIRT_IMMR_BASE@h
+ cmpli cr0, r11, VIRT_IMMR_BASE@h
#endif
_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
+ cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
#ifndef CONFIG_PIN_TLB_IMMR
_ENTRY(DTLBMiss_jmp)
beq- DTLBMissIMMR
#endif
blt cr7, DTLBMissLinear
+ lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
- mfspr r10, SPRN_MD_EPN
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp)
InstructionTLBError:
EXCEPTION_PROLOG
mr r4,r12
- mr r5,r9
- andis. r10,r5,0x4000
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ andis. r10,r9,SRR1_ISI_NOPT@h
beq+ 1f
tlbie r4
itlbie:
@@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
- andis. r10,r5,0x4000
+ andis. r10,r5,DSISR_NOHPTE@h
beq+ 1f
tlbie r4
dtlbie:
@@ -684,7 +702,7 @@ DTLBMissLinear:
/* Set 8M byte page and mark it valid */
li r11, MD_PS8MEG | MD_SVALID
MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
- rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
_PAGE_PRESENT
MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */
@@ -695,6 +713,22 @@ DTLBMissLinear:
EXCEPTION_EPILOG_0
rfi
+#ifndef CONFIG_PIN_TLB_TEXT
+ITLBMissLinear:
+ mtcr r3
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC
+ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)
+ rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+ _PAGE_PRESENT
+ MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */
+
+ mfspr r3, SPRN_SPRG_SCRATCH2
+ EXCEPTION_EPILOG_0
+ rfi
+#endif
+
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
@@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
- IS_KERNEL(r11, r10)
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr0, r11, PAGE_OFFSET@h
mfspr r11, SPRN_M_TW /* Get level 1 table */
- BRANCH_UNLESS_KERNEL(3f)
+ blt+ 3f
rlwinm r11, r10, 16, 0xfff8
_ENTRY(FixupDAR_cmp)
cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
@@ -915,10 +950,8 @@ start_here:
rfi
/* Load up the kernel context */
2:
- SYNC /* Force all PTE updates to finish */
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
- TLBSYNC /* ... on all CPUs */
/* set up the PTE pointers for the Abatron bdiGDB.
*/
@@ -955,15 +988,14 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-/* Always pin the first 8 MB ITLB to prevent ITLB
- misses while mucking around with SRR0/SRR1 in asm
-*/
+#ifdef CONFIG_PIN_TLB_TEXT
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+#endif
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
@@ -989,6 +1021,7 @@ initial_mmu:
* internal registers (among other things).
*/
#ifdef CONFIG_PIN_TLB_IMMR
+ oris r10, r10, MD_RSV4I@h
ori r10, r10, 0x1c00
mtspr SPRN_MD_CTR, r10
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index e6252c5a57a4..1125c9be9e06 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
std r3,_WORT(r1)
mfspr r3,SPRN_WORC
std r3,_WORC(r1)
+/*
+ * On POWER9, there are idle states such as stop4, invoked via cpuidle,
+ * that lose hypervisor resources. In such cases, we need to save
+ * additional SPRs before entering those idle states so that they can
+ * be restored to their older values on wakeup from the idle state.
+ *
+ * On POWER8, the only such deep idle state is winkle which is used
+ * only in the context of CPU-Hotplug, where these additional SPRs are
+ * reinitiazed to a sane value. Hence there is no need to save/restore
+ * these SPRs.
+ */
+BEGIN_FTR_SECTION
+ blr
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
+power9_save_additional_sprs:
+ mfspr r3, SPRN_PID
+ mfspr r4, SPRN_LDBAR
+ std r3, STOP_PID(r13)
+ std r4, STOP_LDBAR(r13)
+ mfspr r3, SPRN_FSCR
+ mfspr r4, SPRN_HFSCR
+ std r3, STOP_FSCR(r13)
+ std r4, STOP_HFSCR(r13)
+
+ mfspr r3, SPRN_MMCRA
+ mfspr r4, SPRN_MMCR1
+ std r3, STOP_MMCRA(r13)
+ std r4, STOP_MMCR1(r13)
+
+ mfspr r3, SPRN_MMCR2
+ std r3, STOP_MMCR2(r13)
+ blr
+
+power9_restore_additional_sprs:
+ ld r3,_LPCR(r1)
+ ld r4, STOP_PID(r13)
+ mtspr SPRN_LPCR,r3
+ mtspr SPRN_PID, r4
+
+ ld r3, STOP_LDBAR(r13)
+ ld r4, STOP_FSCR(r13)
+ mtspr SPRN_LDBAR, r3
+ mtspr SPRN_FSCR, r4
+
+ ld r3, STOP_HFSCR(r13)
+ ld r4, STOP_MMCRA(r13)
+ mtspr SPRN_HFSCR, r3
+ mtspr SPRN_MMCRA, r4
+ /* We have already restored PACA_MMCR0 */
+ ld r3, STOP_MMCR1(r13)
+ ld r4, STOP_MMCR2(r13)
+ mtspr SPRN_MMCR1, r3
+ mtspr SPRN_MMCR2, r4
blr
/*
@@ -141,7 +195,16 @@ pnv_powersave_common:
std r5,_CCR(r1)
std r1,PACAR1(r13)
+BEGIN_FTR_SECTION
+ /*
+ * POWER9 does not require real mode to stop, and presently does not
+ * set hwthread_state for KVM (threads don't share MMU context), so
+ * we can remain in virtual mode for this.
+ */
+ bctr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/*
+ * POWER8
* Go to real mode to do the nap, as required by the architecture.
* Also, we need to be in real mode before setting hwthread_state,
* because as soon as we do that, another thread can switch
@@ -151,6 +214,20 @@ pnv_powersave_common:
mtmsrd r7,0
bctr
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+236: cmpd cr0,r0,r0; \
+ bne 236b; \
+ IDLE_INST;
+
+
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -242,20 +319,27 @@ enter_winkle:
/*
* r3 - PSSCR value corresponding to the requested stop state.
*/
-power_enter_stop:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
+power_enter_stop_kvm_rm:
+ /*
+ * This is currently unused because POWER9 KVM does not have to
+ * gather secondary threads into sibling mode, but the code is
+ * here in case that function is required.
+ *
+ * Tell KVM we're entering idle.
+ */
li r4,KVM_HWTHREAD_IN_IDLE
/* DO THIS IN REAL MODE! See comment above. */
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
+power_enter_stop:
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
bne .Lhandle_esl_ec_set
- IDLE_STATE_ENTER_SEQ(PPC_STOP)
+ PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
/*
@@ -288,7 +372,8 @@ power_enter_stop:
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
cmpd r3,r4
bge .Lhandle_deep_stop
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
+
.Lhandle_deep_stop:
/*
* Entering deep idle state.
@@ -310,7 +395,7 @@ lwarx_loop_stop:
bl save_sprs_to_stack
- IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
+ PPC_STOP /* Does not return (system reset interrupt) */
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
@@ -411,6 +496,18 @@ pnv_powersave_wakeup_mce:
b pnv_powersave_wakeup
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm_start_guest_check:
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beqlr
+ b kvm_start_guest
+#endif
+
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
@@ -435,15 +532,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
+BEGIN_FTR_SECTION
+ bl kvm_start_guest_check
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#endif
/* Return SRR1 from power7_nap() */
@@ -809,9 +900,16 @@ no_segments:
mtctr r12
bctrl
+/*
+ * On POWER9, we can come here on wakeup from a cpuidle stop state.
+ * Hence restore the additional SPRs to the saved value.
+ *
+ * On POWER8, we come here only on winkle. Since winkle is used
+ * only in the case of CPU-Hotplug, we don't need to restore
+ * the additional SPRs.
+ */
BEGIN_FTR_SECTION
- ld r4,_LPCR(r1)
- mtspr SPRN_LPCR,r4
+ bl power9_restore_additional_sprs
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
hypervisor_state_restored:
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index a582e0d42525..aa9f1b8261db 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/ppc-pci.h>
#include <asm/io-workarounds.h>
+#include <asm/pte-walk.h>
+
#define IOWA_MAX_BUS 8
@@ -75,8 +77,7 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
* We won't find huge pages here (iomem). Also can't hit
* a page table free due to init_mm
*/
- ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
- NULL, &hugepage_shift);
+ ptep = find_init_mm_pte(vaddr, &hugepage_shift);
if (ptep == NULL)
paddr = 0;
else {
@@ -192,7 +193,7 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
if (iowa_bus_count >= IOWA_MAX_BUS) {
pr_err("IOWA:Too many pci bridges, "
- "workarounds disabled for %s\n", np->full_name);
+ "workarounds disabled for %pOF\n", np);
return;
}
@@ -207,6 +208,6 @@ void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
iowa_bus_count++;
- pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+ pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np);
}
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 233ca3fe4754..af7a20dc6e09 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -127,8 +127,7 @@ static ssize_t fail_iommu_store(struct device *dev,
return count;
}
-static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
- fail_iommu_store);
+static DEVICE_ATTR_RW(fail_iommu);
static int fail_iommu_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
@@ -190,7 +189,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
unsigned int pool_nr;
struct iommu_pool *pool;
- align_mask = 0xffffffffffffffffl >> (64 - align_order);
+ align_mask = (1ull << align_order) - 1;
/* This allocator was derived from x86_64's bit string search */
@@ -208,7 +207,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
* We don't need to disable preemption here because any CPU can
* safely use any IOMMU pool.
*/
- pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
+ pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
if (largealloc)
pool = &(tbl->large_pool);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f291f7826abc..4e65bf82f5e0 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -24,7 +24,7 @@
* mask register (of which only 16 are defined), hence the weird shifting
* and complement of the cached_irq_mask. I want to be able to stuff
* this right into the SIU SMASK register.
- * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
* to reduce code space and undefined function references.
*/
@@ -143,9 +143,10 @@ notrace unsigned int __check_irq_replay(void)
*/
unsigned char happened = local_paca->irq_happened;
- /* Clear bit 0 which we wouldn't clear otherwise */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
if (happened & PACA_IRQ_HARD_DIS) {
+ /* Clear bit 0 which we wouldn't clear otherwise */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
/*
* We may have missed a decrementer interrupt if hard disabled.
* Check the decrementer register in case we had a rollover
@@ -173,41 +174,39 @@ notrace unsigned int __check_irq_replay(void)
* This is a higher priority interrupt than the others, so
* replay it first.
*/
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- if (happened & PACA_IRQ_HMI)
+ if (happened & PACA_IRQ_HMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_HMI;
return 0xe60;
+ }
- /*
- * We may have missed a decrementer interrupt. We check the
- * decrementer itself rather than the paca irq_happened field
- * in case we also had a rollover while hard disabled
- */
- local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if (happened & PACA_IRQ_DEC)
+ if (happened & PACA_IRQ_DEC) {
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
return 0x900;
+ }
- /* Finally check if an external interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_EE;
- if (happened & PACA_IRQ_EE)
+ if (happened & PACA_IRQ_EE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
+ }
#ifdef CONFIG_PPC_BOOK3E
- /* Finally check if an EPR external interrupt happened
- * this bit is typically set if we need to handle another
- * "edge" interrupt from within the MPIC "EPR" handler
+ /*
+ * Check if an EPR external interrupt happened this bit is typically
+ * set if we need to handle another "edge" interrupt from within the
+ * MPIC "EPR" handler.
*/
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- if (happened & PACA_IRQ_EE_EDGE)
+ if (happened & PACA_IRQ_EE_EDGE) {
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
return 0x500;
+ }
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- if (happened & PACA_IRQ_DBELL)
+ if (happened & PACA_IRQ_DBELL) {
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0x280;
+ }
#else
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
if (happened & PACA_IRQ_DBELL) {
- if (cpu_has_feature(CPU_FTR_HVMODE))
- return 0xe80;
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
return 0xa00;
}
#endif /* CONFIG_PPC_BOOK3E */
@@ -483,6 +482,18 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Hypervisor Maintenance Interrupts\n");
}
+ seq_printf(p, "%*s: ", prec, "NMI");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs);
+ seq_printf(p, " System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+ seq_printf(p, "%*s: ", prec, "WDG");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs);
+ seq_printf(p, " Watchdog soft-NMI interrupts\n");
+#endif
+
#ifdef CONFIG_PPC_DOORBELL
if (cpu_has_feature(CPU_FTR_DBELL)) {
seq_printf(p, "%*s: ", prec, "DBL");
@@ -507,6 +518,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+ sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index bb6f8993412e..1df6c74aa731 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -164,7 +164,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (early) is %s\n", np->full_name);
+ pr_debug("ISA bridge (early) is %pOF\n", np);
}
/**
@@ -187,15 +187,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
pna = of_n_addr_cells(np);
if (of_property_read_u32(np, "#address-cells", &na) ||
of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %s is missing address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
+ np);
return;
}
/* Check it's a supported address format */
if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
+ np);
return;
}
rs = na + ns + pna;
@@ -203,8 +203,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Grab the ranges property */
ranges = of_get_property(np, "ranges", &rlen);
if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
+ np);
return;
}
@@ -220,8 +220,8 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Got something ? */
if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
+ np);
return;
}
@@ -233,15 +233,15 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
/* Map pbase */
pbase = of_translate_address(np, pbasep);
if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
+ np);
return;
}
/* We need page alignment */
if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n",
- np->full_name);
+ pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
+ np);
return;
}
@@ -255,7 +255,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
__ioremap_at(pbase, (void *)ISA_IO_BASE,
size, pgprot_val(pgprot_noncached(__pgprot(0))));
- pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name);
+ pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
/**
@@ -277,8 +277,8 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (late) is %s on %s\n",
- devnode->full_name, pci_name(pdev));
+ pr_debug("ISA bridge (late) is %pOF on %s\n",
+ devnode, pci_name(pdev));
}
/**
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index dbf098121ce6..35e240a0a408 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -67,9 +67,9 @@ static struct hard_trap_info
#endif
#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
-#else /* ! CONFIG_8xx */
+#else /* ! CONFIG_PPC_8xx */
{ 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
{ 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
{ 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 1086ea37c832..9ad37f827a97 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,7 +25,6 @@
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/nmi.h> /* hardlockup_detector_disable() */
#include <asm/reg.h>
#include <asm/sections.h>
@@ -719,12 +718,6 @@ static __init void kvm_free_tmp(void)
static int __init kvm_guest_init(void)
{
- /*
- * The hardlockup detector is likely to get false positives in
- * KVM guests, so disable it by default.
- */
- hardlockup_detector_disable();
-
if (!kvm_para_available())
goto free_tmp;
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 97ec8557f974..6408f09dbbd9 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
isync
@@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
dcbf 0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 0694d20f85b6..5e5a64a8b4e4 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -147,8 +147,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].serial_out = tsi_serial_out;
}
- printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
- index, np->full_name);
+ printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n",
+ index, np);
printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
(iotype == UPIO_PORT) ? "port" : "mem",
(unsigned long long)base, (unsigned long long)taddr, irq,
@@ -207,7 +207,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
int index = -1;
u64 taddr;
- DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_isa_port(%pOF)\n", np);
/* Get the ISA port number */
reg = of_get_property(np, "reg", NULL);
@@ -256,7 +256,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
unsigned int flags;
int iotype, index = -1, lindex = 0;
- DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_pci_port(%pOF)\n", np);
/* We only support ports that have a clock frequency properly
* encoded in the device-tree (that is have an fcode). Anything
@@ -374,7 +374,7 @@ void __init find_legacy_serial_ports(void)
if (path != NULL) {
stdout = of_find_node_by_path(path);
if (stdout)
- DBG("stdout is %s\n", stdout->full_name);
+ DBG("stdout is %pOF\n", stdout);
} else {
DBG(" no linux,stdout-path !\n");
}
@@ -603,7 +603,7 @@ static int __init check_legacy_serial_console(void)
DBG(" can't find stdout package %s !\n", name);
return -ENODEV;
}
- DBG("stdout is %s\n", prom_stdout->full_name);
+ DBG("stdout is %pOF\n", prom_stdout);
name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index e0e131e662ed..9b2ea7e71c06 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -22,11 +22,14 @@
#undef DEBUG
#define pr_fmt(fmt) "mce: " fmt
+#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
#include <asm/mce.h>
static DEFINE_PER_CPU(int, mce_nest_count);
@@ -446,3 +449,33 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
return 0;
}
EXPORT_SYMBOL(get_mce_fault_addr);
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ __this_cpu_inc(irq_stat.mce_exceptions);
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
+}
+
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ wait_for_subcore_guest_exit();
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(regs);
+
+ wait_for_tb_resync();
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 34aeac54f120..becaec990140 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -45,7 +45,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
if (ppc_md.pci_setup_phb == NULL)
return -ENODEV;
- pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+ pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node);
/* Alloc and setup PHB data structure */
phb = pcibios_alloc_controller(dev->dev.of_node);
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 4937bef7652f..52fc864cdec4 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -60,10 +60,6 @@ optprobe_template_entry:
std r5,_CCR(r1)
lbz r5,PACASOFTIRQEN(r13)
std r5,SOFTE(r1)
- mfdar r5
- std r5,_DAR(r1)
- mfdsisr r5
- std r5,_DSISR(r1)
/*
* We may get here from a module, so load the kernel TOC in r2.
@@ -122,10 +118,6 @@ optprobe_template_call_emulate:
mtxer r5
ld r5,_CCR(r1)
mtcr r5
- ld r5,_DAR(r1)
- mtdar r5
- ld r5,_DSISR(r1)
- mtdsisr r5
REST_GPR(0,r1)
REST_10GPRS(2,r1)
REST_10GPRS(12,r1)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 8d63627e067f..2ff2b8a19f71 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -99,18 +99,27 @@ static inline void free_lppacas(void) { }
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow *slb_shadow;
+static struct slb_shadow * __initdata slb_shadow;
static void __init allocate_slb_shadows(int nr_cpus, int limit)
{
int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+
+ if (early_radix_enabled())
+ return;
+
slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
memset(slb_shadow, 0, size);
}
static struct slb_shadow * __init init_slb_shadow(int cpu)
{
- struct slb_shadow *s = &slb_shadow[cpu];
+ struct slb_shadow *s;
+
+ if (early_radix_enabled())
+ return NULL;
+
+ s = &slb_shadow[cpu];
/*
* When we come through here to initialise boot_paca, the slb_shadow
@@ -215,7 +224,7 @@ void __init allocate_pacas(void)
paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
memset(paca, 0, paca_size);
- printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
paca_size, nr_cpu_ids, paca);
allocate_lppacas(nr_cpu_ids, limit);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 341a7469cab8..02831a396419 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -373,9 +373,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
} else {
- pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
- oirq.args_count, oirq.args[0], oirq.args[1],
- of_node_full_name(oirq.np));
+ pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n",
+ oirq.args_count, oirq.args[0], oirq.args[1], oirq.np);
virq = irq_create_of_mapping(&oirq);
}
@@ -741,8 +740,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct of_pci_range range;
struct of_pci_range_parser parser;
- printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
- dev->full_name, primary ? "(primary)" : "");
+ printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+ dev, primary ? "(primary)" : "");
/* Check for ranges property */
if (of_pci_range_parser_init(&parser, dev))
@@ -1556,8 +1555,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
if (!res->flags) {
pr_debug("PCI: I/O resource not set for host"
- " bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ " bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
} else {
offset = pcibios_io_space_offset(hose);
@@ -1668,7 +1667,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
struct device_node *node = hose->dn;
int mode;
- pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+ pr_debug("PCI: Scanning PHB %pOF\n", node);
/* Get some IO space for the new PHB */
pcibios_setup_phb_io_space(hose);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 41c86c6b6e4d..1d817f4d97d9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -79,8 +79,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
return;
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, "
- "assuming it starts at 0\n", node->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, "
+ "assuming it starts at 0\n", node);
pci_to_OF_bus_map[pci_bus] = 0;
} else
pci_to_OF_bus_map[pci_bus] = bus_range[0];
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ed5e9ff61a68..932b9741aa8f 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -111,7 +111,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
if (hose->io_base_alloc == NULL)
return 0;
- pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
/* This is a PHB, we fully unmap the IO area */
@@ -151,7 +151,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
hose->io_base_virt = (void __iomem *)(area->addr +
hose->io_base_phys - phys_page);
- pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 592693437070..0e395afbf0f4 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,7 +139,6 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
- struct pci_dev *pdev,
int vf_index,
int busno, int devfn)
{
@@ -150,10 +149,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
return NULL;
pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
- if (!pdn) {
- dev_warn(&pdev->dev, "%s: Out of memory!\n", __func__);
+ if (!pdn)
return NULL;
- }
pdn->phb = parent->phb;
pdn->parent = parent;
@@ -167,13 +164,6 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
INIT_LIST_HEAD(&pdn->list);
list_add_tail(&pdn->list, &parent->child_list);
- /*
- * If we already have PCI device instance, lets
- * bind them.
- */
- if (pdev)
- pdev->dev.archdata.pci_data = pdn;
-
return pdn;
}
#endif
@@ -201,7 +191,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
struct eeh_dev *edev __maybe_unused;
- pdn = add_one_dev_pci_data(parent, NULL, i,
+ pdn = add_one_dev_pci_data(parent, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -303,7 +293,6 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
if (pdn == NULL)
return NULL;
dn->data = pdn;
- pdn->node = dn;
pdn->phb = hose;
#ifdef CONFIG_PPC_POWERNV
pdn->pe_number = IODA_INVALID_PE;
@@ -352,6 +341,7 @@ EXPORT_SYMBOL_GPL(pci_add_device_node_info);
void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+ struct device_node *parent;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -364,8 +354,10 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
- if (pdn->parent)
- of_node_put(pdn->parent->node);
+
+ parent = of_get_parent(dn);
+ if (parent)
+ of_node_put(parent);
dn->data = NULL;
kfree(pdn);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ea3d98115b88..0d790f8432d2 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -211,19 +211,19 @@ void of_scan_pci_bridge(struct pci_dev *dev)
unsigned int flags;
u64 size;
- pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+ pr_debug("of_scan_pci_bridge(%pOF)\n", node);
/* parse bus-range property */
busrange = of_get_property(node, "bus-range", &len);
if (busrange == NULL || len != 8) {
- printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+ node);
return;
}
ranges = of_get_property(node, "ranges", &len);
if (ranges == NULL) {
- printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+ node);
return;
}
@@ -233,8 +233,8 @@ void of_scan_pci_bridge(struct pci_dev *dev)
bus = pci_add_new_bus(dev->bus, dev,
of_read_number(busrange, 1));
if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
+ printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+ node);
return;
}
}
@@ -262,13 +262,13 @@ void of_scan_pci_bridge(struct pci_dev *dev)
res = bus->resource[0];
if (res->flags) {
printk(KERN_ERR "PCI: ignoring extra I/O range"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
res = bus->resource[i];
@@ -307,7 +307,7 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
#endif
- pr_debug(" * %s\n", dn->full_name);
+ pr_debug(" * %pOF\n", dn);
if (!of_device_is_available(dn))
return NULL;
@@ -350,8 +350,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
struct device_node *child;
struct pci_dev *dev;
- pr_debug("of_scan_bus(%s) bus no %d...\n",
- node->full_name, bus->number);
+ pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+ node, bus->number);
/* Scan direct children */
for_each_child_of_node(node, child) {
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f3e2c932dcc..a0c74bbf3454 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -230,7 +230,8 @@ void enable_kernel_fp(void)
}
EXPORT_SYMBOL(enable_kernel_fp);
-static int restore_fp(struct task_struct *tsk) {
+static int restore_fp(struct task_struct *tsk)
+{
if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
@@ -330,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#ifdef CONFIG_VSX
static void __giveup_vsx(struct task_struct *tsk)
{
- if (tsk->thread.regs->msr & MSR_FP)
+ unsigned long msr = tsk->thread.regs->msr;
+
+ /*
+ * We should never be ssetting MSR_VSX without also setting
+ * MSR_FP and MSR_VEC
+ */
+ WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+ /* __giveup_fpu will clear MSR_VSX */
+ if (msr & MSR_FP)
__giveup_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
+ if (msr & MSR_VEC)
__giveup_altivec(tsk);
- tsk->thread.regs->msr &= ~MSR_VSX;
}
static void giveup_vsx(struct task_struct *tsk)
@@ -346,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk)
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
-static void save_vsx(struct task_struct *tsk)
-{
- if (tsk->thread.regs->msr & MSR_FP)
- save_fpu(tsk);
- if (tsk->thread.regs->msr & MSR_VEC)
- save_altivec(tsk);
-}
-
void enable_kernel_vsx(void)
{
unsigned long cpumsr;
@@ -362,7 +363,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
- if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+ if (current->thread.regs &&
+ (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current);
/*
* If a thread has already been reclaimed then the
@@ -373,10 +375,6 @@ void enable_kernel_vsx(void)
*/
if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
return;
- if (current->thread.regs->msr & MSR_FP)
- __giveup_fpu(current);
- if (current->thread.regs->msr & MSR_VEC)
- __giveup_altivec(current);
__giveup_vsx(current);
}
}
@@ -386,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
- if (tsk->thread.regs->msr & MSR_VSX) {
+ if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
giveup_vsx(tsk);
}
@@ -406,7 +404,6 @@ static int restore_vsx(struct task_struct *tsk)
}
#else
static inline int restore_vsx(struct task_struct *tsk) { return 0; }
-static inline void save_vsx(struct task_struct *tsk) { }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -486,6 +483,8 @@ void giveup_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
check_if_tm_restore_required(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
__giveup_fpu(tsk);
@@ -494,10 +493,6 @@ void giveup_all(struct task_struct *tsk)
if (usermsr & MSR_VEC)
__giveup_altivec(tsk);
#endif
-#ifdef CONFIG_VSX
- if (usermsr & MSR_VSX)
- __giveup_vsx(tsk);
-#endif
#ifdef CONFIG_SPE
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -511,10 +506,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- /*
- * Syscall exit makes a similar initial check before branching
- * to restore_math. Keep them in synch.
- */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -556,19 +547,13 @@ void save_all(struct task_struct *tsk)
msr_check_and_set(msr_all_available);
- /*
- * Saving the way the register space is in hardware, save_vsx boils
- * down to a save_fpu() and save_altivec()
- */
- if (usermsr & MSR_VSX) {
- save_vsx(tsk);
- } else {
- if (usermsr & MSR_FP)
- save_fpu(tsk);
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
- if (usermsr & MSR_VEC)
- save_altivec(tsk);
- }
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
if (usermsr & MSR_SPE)
__giveup_spe(tsk);
@@ -1395,13 +1380,13 @@ void show_regs(struct pt_regs * regs)
show_regs_print_info(KERN_DEFAULT);
- printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %p TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
- printk("MSR: "REG" ", regs->msr);
+ printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
- printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
+ pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
@@ -1994,11 +1979,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
void notrace __ppc64_runlatch_on(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /*
+ * Least significant bit (RUN) is the only writable bit of
+ * the CTRL register, so we can avoid mfspr. 2.06 is not the
+ * earliest ISA where this is the case, but it's convenient.
+ */
+ mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+ } else {
+ unsigned long ctrl;
+
+ /*
+ * Some architectures (e.g., Cell) have writable fields other
+ * than RUN, so do the read-modify-write.
+ */
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
ti->local_flags |= _TLF_RUNLATCH;
}
@@ -2007,13 +2006,18 @@ void notrace __ppc64_runlatch_on(void)
void notrace __ppc64_runlatch_off(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
ti->local_flags &= ~_TLF_RUNLATCH;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ mtspr(SPRN_CTRLT, 0);
+ } else {
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 613f79f03877..02190e90c7ae 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -177,6 +177,7 @@ struct platform_support {
bool hash_mmu;
bool radix_mmu;
bool radix_gtse;
+ bool xive;
};
/* Platforms codes are now obsolete in the kernel. Now only used within this
@@ -1041,6 +1042,27 @@ static void __init prom_parse_mmu_model(u8 val,
}
}
+static void __init prom_parse_xive_model(u8 val,
+ struct platform_support *support)
+{
+ switch (val) {
+ case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+ prom_debug("XIVE - either mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+ prom_debug("XIVE - exploitation mode supported\n");
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+ prom_debug("XIVE - legacy mode supported\n");
+ break;
+ default:
+ prom_debug("Unknown xive support option: 0x%x\n", val);
+ break;
+ }
+}
+
static void __init prom_parse_platform_support(u8 index, u8 val,
struct platform_support *support)
{
@@ -1054,6 +1076,10 @@ static void __init prom_parse_platform_support(u8 index, u8 val,
support->radix_gtse = true;
}
break;
+ case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+ support);
+ break;
}
}
@@ -1062,7 +1088,8 @@ static void __init prom_check_platform_support(void)
struct platform_support supported = {
.hash_mmu = false,
.radix_mmu = false,
- .radix_gtse = false
+ .radix_gtse = false,
+ .xive = false
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
@@ -1095,6 +1122,11 @@ static void __init prom_check_platform_support(void)
/* We're probably on a legacy hypervisor */
prom_debug("Assuming legacy hash support\n");
}
+
+ if (supported.xive) {
+ prom_debug("Asking for XIVE\n");
+ ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+ }
}
static void __init prom_send_capabilities(void)
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 660ed39e9c9a..07cd22e35405 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1594,11 +1594,8 @@ static int ppr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
@@ -1606,11 +1603,8 @@ static int ppr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
@@ -1618,22 +1612,16 @@ static int dscr_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
static int dscr_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.dscr, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
}
#endif
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1642,22 +1630,16 @@ static int tar_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int tar_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.tar, 0, sizeof(u64));
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
}
static int ebb_active(struct task_struct *target,
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index d88736fbece6..e8cfc69f59ae 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -82,7 +82,7 @@ _GLOBAL(relocate)
6: blr
.balign 8
-p_dyn: .llong __dynamic_start - 0b
-p_rela: .llong __rela_dyn_start - 0b
-p_st: .llong _stext - 0b
+p_dyn: .8byte __dynamic_start - 0b
+p_rela: .8byte __rela_dyn_start - 0b
+p_st: .8byte _stext - 0b
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 73f1934582c2..c2b148b1634a 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -91,26 +91,14 @@ static int rtas_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
int ret;
- /* Search only direct children of the bus */
*val = 0xFFFFFFFF;
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here */
ret = rtas_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
@@ -153,24 +141,11 @@ static int rtas_pci_write_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
-
- /* Search only direct children of the bus */
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ /* Validity of pdn is checked in here. */
return rtas_write_config(pdn, where, size, val);
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 94a948207cd2..0ac741fae90e 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -481,7 +481,7 @@ void __init smp_setup_cpu_maps(void)
__be32 cpu_be;
int j, len;
- DBG(" * %s...\n", dn->full_name);
+ DBG(" * %pOF...\n", dn);
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
@@ -551,7 +551,7 @@ void __init smp_setup_cpu_maps(void)
if (maxcpus > nr_cpu_ids) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
- "operating system maximum is %d.\n",
+ "operating system maximum is %u.\n",
maxcpus, nr_cpu_ids);
maxcpus = nr_cpu_ids;
} else
@@ -704,30 +704,6 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-static int ppc_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything else.
- */
- crash_fadump(NULL, ptr);
- ppc_md.panic(ptr); /* May not return */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_panic_block = {
- .notifier_call = ppc_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
-void __init setup_panic(void)
-{
- if (!ppc_md.panic)
- return;
- atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
-}
-
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -872,9 +848,6 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
- /* Setup panic notifier if requested by the platform. */
- setup_panic();
-
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
@@ -916,13 +889,6 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- /*
- * Reserve any gigantic pages requested on the command line.
- * memblock needs to have been initialized by the time this is
- * called since this will reserve memory.
- */
- reserve_hugetlb_gpages();
-
klp_init_thread_info(&init_thread_info);
init_mm.start_code = (unsigned long)_stext;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 2f88f6cf1a42..51ebc01fff52 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -98,6 +98,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ unsigned int *addr = &memset_nocache_branch;
+ unsigned long insn;
+
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -105,7 +108,9 @@ notrace void __init machine_init(u64 dt_ptr)
udbg_early_init();
patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP);
- patch_instruction(&memset_nocache_branch, PPC_INST_NOP);
+
+ insn = create_cond_branch(addr, branch_target(addr), 0x820000);
+ patch_instruction(addr, insn); /* replace b by bne cr0 */
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index af23d4b576ec..b89c6aac48c9 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -564,6 +564,9 @@ static __init u64 safe_stack_limit(void)
/* Other BookE, we assume the first GB is bolted */
return 1ul << 30;
#else
+ if (early_radix_enabled())
+ return ULONG_MAX;
+
/* BookS, the first segment is bolted */
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
@@ -578,7 +581,8 @@ void __init irqstack_early_init(void)
/*
* Interrupt stacks must be in the first segment since we
- * cannot afford to take SLB misses on them.
+ * cannot afford to take SLB misses on them. They are not
+ * accessed in realmode.
*/
for_each_possible_cpu(i) {
softirq_ctx[i] = (struct thread_info *)
@@ -649,8 +653,9 @@ void __init emergency_stack_init(void)
* aligned.
*
* Since we use these as temporary stacks during secondary CPU
- * bringup, we need to get at them in real mode. This means they
- * must also be within the RMO region.
+ * bringup, machine check, system reset, and HMI, we need to get
+ * at them in real mode. This means they must also be within the RMO
+ * region.
*
* The IRQ stacks allocated elsewhere in this file are zeroed and
* initialized in kernel/irq.c. These are initialized here in order
@@ -751,3 +756,31 @@ unsigned long memory_block_size_bytes(void)
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in VM guests, so disable it there
+ * by default too.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+ hardlockup_detector_disable();
+#else
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ hardlockup_detector_disable();
+#endif
+
+ return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cf0e1245b8cc..e0a4c1f82e25 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -75,9 +75,11 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
struct thread_info *secondary_ti;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* SMP operations for this machine */
@@ -351,7 +353,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -360,7 +362,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
@@ -475,7 +477,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
- cpu_relax();
+ spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
@@ -571,6 +573,26 @@ static void smp_store_cpu_info(int id)
#endif
}
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_set_cpu(i, get_cpumask(j));
+ cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+ struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_clear_cpu(i, get_cpumask(j));
+ cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
@@ -590,6 +612,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
/*
@@ -602,7 +626,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
+ /* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+ cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
if (smp_ops && smp_ops->probe)
@@ -828,33 +854,6 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
-{
- const struct cpumask *mask;
- struct device_node *np;
- int i, plen;
- const __be32 *prop;
-
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
- np = of_get_cpu_node(i, NULL);
- if (!np)
- continue;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int) &&
- of_read_number(prop, 1) == chipid) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
- of_node_put(np);
- }
-}
-
/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
@@ -877,52 +876,93 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int))
{
struct device_node *l2_cache, *np;
- const struct cpumask *mask;
- int i, chip, plen;
- const __be32 *prop;
-
- /* First see if we have ibm,chip-id properties in cpu nodes */
- np = of_get_cpu_node(cpu, NULL);
- if (np) {
- chip = -1;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int))
- chip = of_read_number(prop, 1);
- of_node_put(np);
- if (chip >= 0) {
- traverse_siblings_chip_id(cpu, add, chip);
- return;
- }
- }
+ int i;
l2_cache = cpu_to_l2cache(cpu);
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
+ if (!l2_cache)
+ return false;
+
+ for_each_cpu(i, cpu_online_mask) {
+ /*
+ * when updating the marks the current CPU has not been marked
+ * online, but we need to update the cache masks
+ */
np = cpu_to_l2cache(i);
if (!np)
continue;
- if (np == l2_cache) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
+
+ if (np == l2_cache)
+ set_cpus_related(cpu, i, mask_fn);
+
of_node_put(np);
}
of_node_put(l2_cache);
+
+ return true;
}
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+ int i;
+
+ /* NB: cpu_core_mask is a superset of the others */
+ for_each_cpu(i, cpu_core_mask(cpu)) {
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+ set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+ set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ }
+}
+#endif
+
+static void add_cpu_to_masks(int cpu)
+{
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int chipid = cpu_to_chip_id(cpu);
+ int i;
+
+ /*
+ * This CPU will not be in the online mask yet so we need to manually
+ * add it to it's own thread sibling mask.
+ */
+ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++)
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_sibling_mask);
+
+ /*
+ * Copy the thread sibling mask into the cache sibling mask
+ * and mark any CPUs that share an L2 with this CPU.
+ */
+ for_each_cpu(i, cpu_sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+ update_mask_by_l2(cpu, cpu_l2_cache_mask);
+
+ /*
+ * Copy the cache sibling mask into core sibling mask and mark
+ * any CPUs on the same chip as this CPU.
+ */
+ for_each_cpu(i, cpu_l2_cache_mask(cpu))
+ set_cpus_related(cpu, i, cpu_core_mask);
+
+ if (chipid == -1)
+ return;
+
+ for_each_cpu(i, cpu_online_mask)
+ if (cpu_to_chip_id(i) == chipid)
+ set_cpus_related(cpu, i, cpu_core_mask);
+}
+
+static bool shared_caches;
+
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
- int i, base;
mmgrab(&init_mm);
current->active_mm = &init_mm;
@@ -945,22 +985,15 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
- /* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
- if (cpu_is_offline(base + i) && (cpu != base + i))
- continue;
- cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+ /* Update topology CPU masks */
+ add_cpu_to_masks(cpu);
- /* cpu_core_map should be a superset of
- * cpu_sibling_map even if we don't have cache
- * information, so update the former here, too.
- */
- cpumask_set_cpu(cpu, cpu_core_mask(base + i));
- cpumask_set_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, true);
+ /*
+ * Check for any shared caches. Note that this must be done on a
+ * per-core basis because one core in the pair might be disabled.
+ */
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
@@ -1003,6 +1036,35 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ return cpu_l2_cache_mask(cpu);
+}
+
+static struct sched_domain_topology_level power9_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
@@ -1016,14 +1078,23 @@ void __init smp_cpus_done(unsigned int max_cpus)
dump_numa_cpu_topology();
- set_sched_topology(powerpc_topology);
+ /*
+ * If any CPU detects that it's sharing a cache with another CPU then
+ * use the deeper topology that is aware of this sharing.
+ */
+ if (shared_caches) {
+ pr_info("Using shared cache scheduler topology\n");
+ set_sched_topology(power9_topology);
+ } else {
+ pr_info("Using standard scheduler topology\n");
+ set_sched_topology(powerpc_topology);
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
int cpu = smp_processor_id();
- int base, i;
int err;
if (!smp_ops->cpu_disable)
@@ -1034,14 +1105,7 @@ int __cpu_disable(void)
return err;
/* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
- cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
- cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, false);
+ remove_cpu_from_masks(cpu);
return 0;
}
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 988f38dced0f..82d8aae81c6a 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -179,7 +179,7 @@ nothing_to_copy:
sld r3, r3, r0
li r0, 0
1:
- dcbf r0,r3
+ dcbf 0,r3
addi r3,r3,0x20
bdnz 1b
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 4d6b1d3a747f..7ccb7f81f8db 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -17,13 +17,13 @@
#include <asm/ppc_asm.h>
#ifdef CONFIG_PPC64
-#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func) .llong DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264)
+#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func)
+#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
+#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func)
+#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
+#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
+#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
+#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264)
#else
#define SYSCALL(func) .long sys_##func
#define COMPAT_SYS(func) .long sys_##func
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bfcfd9ef09f2..ec74e203ee04 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -114,6 +114,28 @@ static void pmac_backlight_unblank(void)
static inline void pmac_backlight_unblank(void) { }
#endif
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+ if (should_fadump_crash())
+ return true;
+ if (kexec_should_crash(current))
+ return true;
+ if (in_interrupt() || panic_on_oops ||
+ !current->pid || is_global_init(current))
+ return true;
+
+ return false;
+}
+
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -162,21 +184,9 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
crash_fadump(regs, "die oops");
- /*
- * A system reset (0x100) is a request to dump, so we always send
- * it through the crashdump code.
- */
- if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
+ if (kexec_should_crash(current))
crash_kexec(regs);
- /*
- * We aren't the primary crash CPU. We need to send it
- * to a holding pattern to avoid it ending up in the panic
- * code.
- */
- crash_kexec_secondary(regs);
- }
-
if (!signr)
return;
@@ -202,18 +212,25 @@ NOKPROBE_SYMBOL(oops_end);
static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
+
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ printk("LE ");
+ else
+ printk("BE ");
+
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ pr_cont("PREEMPT ");
+
+ if (IS_ENABLED(CONFIG_SMP))
+ pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
+
if (debug_pagealloc_enabled())
- printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_NUMA
- printk("NUMA ");
-#endif
- printk("%s\n", ppc_md.name ? ppc_md.name : "");
+ pr_cont("DEBUG_PAGEALLOC ");
+
+ if (IS_ENABLED(CONFIG_NUMA))
+ pr_cont("NUMA ");
+
+ pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -288,23 +305,52 @@ void system_reset_exception(struct pt_regs *regs)
if (!nested)
nmi_enter();
+ __this_cpu_inc(irq_stat.sreset_irqs);
+
/* See if any machine dependent calls */
if (ppc_md.system_reset_exception) {
if (ppc_md.system_reset_exception(regs))
goto out;
}
- die("System Reset", regs, SIGABRT);
+ if (debugger(regs))
+ goto out;
+
+ /*
+ * A system reset is a request to dump, so we always send
+ * it through the crashdump code (if fadump or kdump are
+ * registered).
+ */
+ crash_fadump(regs, "System Reset");
+
+ crash_kexec(regs);
+
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
+
+ /*
+ * No debugger or crash dump registered, print logs then
+ * panic.
+ */
+ __die("System Reset", regs, SIGABRT);
+
+ mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ nmi_panic(regs, "System Reset");
out:
#ifdef CONFIG_PPC_BOOK3S_64
BUG_ON(get_paca()->in_nmi == 0);
if (get_paca()->in_nmi > 1)
- panic("Unrecoverable nested System Reset");
+ nmi_panic(regs, "Unrecoverable nested System Reset");
#endif
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable System Reset");
+ nmi_panic(regs, "Unrecoverable System Reset");
if (!nested)
nmi_exit();
@@ -312,39 +358,6 @@ out:
/* What should we do here? We could issue a shutdown or hard reset. */
}
-#ifdef CONFIG_PPC64
-/*
- * This function is called in real mode. Strictly no printk's please.
- *
- * regs->nip and regs->msr contains srr0 and ssr1.
- */
-long machine_check_early(struct pt_regs *regs)
-{
- long handled = 0;
-
- __this_cpu_inc(irq_stat.mce_exceptions);
-
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
- return handled;
-}
-
-long hmi_exception_realmode(struct pt_regs *regs)
-{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
- wait_for_subcore_guest_exit();
-
- if (ppc_md.hmi_exception_early)
- ppc_md.hmi_exception_early(regs);
-
- wait_for_tb_resync();
-
- return 0;
-}
-
-#endif
-
/*
* I/O accesses can cause machine checks on powermacs.
* Check if the NIP corresponds to the address of a sync
@@ -397,11 +410,6 @@ static inline int check_io_access(struct pt_regs *regs)
/* On 4xx, the reason for the machine check or program exception
is in the ESR. */
#define get_reason(regs) ((regs)->dsisr)
-#ifndef CONFIG_FSL_BOOKE
-#define get_mc_reason(regs) ((regs)->dsisr)
-#else
-#define get_mc_reason(regs) (mfspr(SPRN_MCSR))
-#endif
#define REASON_FP ESR_FP
#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
#define REASON_PRIVILEGED ESR_PPR
@@ -415,108 +423,17 @@ static inline int check_io_access(struct pt_regs *regs)
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
#define get_reason(regs) ((regs)->msr)
-#define get_mc_reason(regs) ((regs)->msr)
-#define REASON_TM 0x200000
-#define REASON_FP 0x100000
-#define REASON_ILLEGAL 0x80000
-#define REASON_PRIVILEGED 0x40000
-#define REASON_TRAP 0x20000
+#define REASON_TM SRR1_PROGTM
+#define REASON_FP SRR1_PROGFPE
+#define REASON_ILLEGAL SRR1_PROGILL
+#define REASON_PRIVILEGED SRR1_PROGPRIV
+#define REASON_TRAP SRR1_PROGTRAP
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
#endif
-#if defined(CONFIG_4xx)
-int machine_check_4xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- if (reason & ESR_IMCP) {
- printk("Instruction");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- } else
- printk("Data");
- printk(" machine check in kernel mode.\n");
-
- return 0;
-}
-
-int machine_check_440A(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- printk("Machine check in kernel mode.\n");
- if (reason & ESR_IMCP){
- printk("Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- }
- else {
- u32 mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk("Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk("Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk("Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk("TLB Parity Error\n");
- if (mcsr & MCSR_ICP){
- flush_instruction_cache();
- printk("I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk("D-Cache Search Parity Error\n");
- if (mcsr & MCSR_DCFP)
- printk("D-Cache Flush Parity Error\n");
- if (mcsr & MCSR_IMPE)
- printk("Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
- }
- return 0;
-}
-
-int machine_check_47x(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
- u32 mcsr;
-
- printk(KERN_ERR "Machine check in kernel mode.\n");
- if (reason & ESR_IMCP) {
- printk(KERN_ERR
- "Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- return 0;
- }
- mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk(KERN_ERR "Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk(KERN_ERR "Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk(KERN_ERR "Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk(KERN_ERR "TLB Parity Error\n");
- if (mcsr & MCSR_ICP) {
- flush_instruction_cache();
- printk(KERN_ERR "I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk(KERN_ERR "D-Cache Search Parity Error\n");
- if (mcsr & PPC47x_MCSR_GPR)
- printk(KERN_ERR "GPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_FPR)
- printk(KERN_ERR "FPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_IPR)
- printk(KERN_ERR "Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
-
- return 0;
-}
-#elif defined(CONFIG_E500)
+#if defined(CONFIG_E500)
int machine_check_e500mc(struct pt_regs *regs)
{
unsigned long mcsr = mfspr(SPRN_MCSR);
@@ -618,7 +535,7 @@ silent_out:
int machine_check_e500(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
if (reason & MCSR_BUS_RBERR) {
if (fsl_rio_mcheck_exception(regs))
@@ -665,7 +582,7 @@ int machine_check_generic(struct pt_regs *regs)
#elif defined(CONFIG_E200)
int machine_check_e200(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
printk("Machine check in kernel mode.\n");
printk("Caused by (from MCSR=%lx): ", reason);
@@ -687,35 +604,10 @@ int machine_check_e200(struct pt_regs *regs)
return 0;
}
-#elif defined(CONFIG_PPC_8xx)
-int machine_check_8xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- pr_err("Machine check in kernel mode.\n");
- pr_err("Caused by (from SRR1=%lx): ", reason);
- if (reason & 0x40000000)
- pr_err("Fetch error at address %lx\n", regs->nip);
- else
- pr_err("Data access error at address %lx\n", regs->dar);
-
-#ifdef CONFIG_PCI
- /* the qspan pci read routines can cause machine checks -- Cort
- *
- * yuck !!! that totally needs to go away ! There are better ways
- * to deal with that than having a wart in the mcheck handler.
- * -- BenH
- */
- bad_page_fault(regs, regs->dar, SIGBUS);
- return 1;
-#else
- return 0;
-#endif
-}
-#else
+#elif defined(CONFIG_PPC32)
int machine_check_generic(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = regs->msr;
printk("Machine check in kernel mode.\n");
printk("Caused by (from SRR1=%lx): ", reason);
@@ -752,10 +644,14 @@ int machine_check_generic(struct pt_regs *regs)
void machine_check_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
int recover = 0;
+ bool nested = in_nmi();
+ if (!nested)
+ nmi_enter();
- __this_cpu_inc(irq_stat.mce_exceptions);
+ /* 64s accounts the mce in machine_check_early when in HVMODE */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -783,10 +679,11 @@ void machine_check_exception(struct pt_regs *regs)
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
- panic("Unrecoverable Machine check");
+ nmi_panic(regs, "Unrecoverable Machine check");
bail:
- exception_exit(prev_state);
+ if (!nested)
+ nmi_exit();
}
void SMIException(struct pt_regs *regs)
@@ -1672,24 +1569,6 @@ void performance_monitor_exception(struct pt_regs *regs)
perf_irq(regs);
}
-#ifdef CONFIG_8xx
-void SoftwareEmulation(struct pt_regs *regs)
-{
- CHECK_FULL_REGS(regs);
-
- if (!user_mode(regs)) {
- debugger(regs);
- die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
- regs, SIGFPE);
- }
-
- if (!emulate_math(regs))
- return;
-
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-}
-#endif /* CONFIG_8xx */
-
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
{
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b20964ea0..5d105b8eeece 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -205,3 +205,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->gpr[1] <= ret->stack;
+ else
+ return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index 6b2b69616e77..769c2624e0a6 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -232,15 +232,9 @@ __do_get_tspec:
lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
/* Get a stable TB value */
-#ifdef CONFIG_8xx
-2: mftbu r3
- mftbl r4
- mftbu r0
-#else
-2: mfspr r3, SPRN_TBRU
- mfspr r4, SPRN_TBRL
- mfspr r0, SPRN_TBRU
-#endif
+2: MFTBU(r3)
+ MFTBL(r4)
+ MFTBU(r0)
cmplw cr0,r3,r0
bne- 2b
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b1a250560198..882628fa6987 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,7 +8,7 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
-#ifdef CONFIG_STRICT_KERNEL_RWX
+#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
#define STRICT_ALIGN_SIZE (1 << 24)
#else
#define STRICT_ALIGN_SIZE PAGE_SIZE
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b67f8b03a32d..2f6eadd9408d 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
- local_irq_save(*flags);
- while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
- cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable(); /* Make it soft-NMI safe */
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(!test_bit(0, &__wd_smp_lock));
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
- local_irq_restore(*flags);
+ raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP");
}
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
- cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
- cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+ cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ set_cpumask_stuck(cpumask_of(cpu), tb);
+}
static void watchdog_smp_panic(int cpu, u64 tb)
{
@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
}
smp_flush_nmi_ipi(1000000);
- /* Take the stuck CPU out of the watch group */
- for_each_cpu(c, &wd_smp_cpus_pending)
- set_cpu_stuck(c, tb);
+ /* Take the stuck CPUs out of the watch group */
+ set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-out:
wd_smp_unlock(&flags);
printk_safe_flush();
@@ -152,6 +159,11 @@ out:
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+
+ return;
+
+out:
+ wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@@ -204,6 +216,9 @@ void soft_nmi_interrupt(struct pt_regs *regs)
return;
nmi_enter();
+
+ __this_cpu_inc(irq_stat.soft_nmi_irqs);
+
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
per_cpu(wd_timer_tb, cpu) = tb;
@@ -258,9 +273,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- watchdog_timer_interrupt(cpu);
+ if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
+ watchdog_timer_interrupt(cpu);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -283,6 +300,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
@@ -297,12 +316,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
+ wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
- smp_wmb();
+ wd_smp_unlock(&flags);
+
start_watchdog_timer_on(cpu);
return 0;
@@ -310,12 +331,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_unlock(&flags);
+
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b42812e014c0..7c62967d672c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,7 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
+#include <asm/pte-walk.h>
#include "trace_hv.h"
@@ -599,8 +600,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* hugepage split and collapse.
*/
local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
+ ptep = find_current_mm_pte(current->mm->pgd,
+ hva, NULL, NULL);
if (ptep) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (__pte_write(pte))
@@ -1940,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
+ kfree(ctx);
kvm_put_kvm(kvm);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index f6b3e67c5762..c5d7435455f1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -17,6 +17,7 @@
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
/*
* Supported radix tree geometry.
@@ -322,13 +323,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
gpa = vcpu->arch.fault_gpa & ~0xfffUL;
gpa &= ~0xF000000000000000ul;
gfn = gpa >> PAGE_SHIFT;
- if (!(dsisr & DSISR_PGDIRFAULT))
+ if (!(dsisr & DSISR_PRTABLE_FAULT))
gpa |= ea & 0xfff;
memslot = gfn_to_memslot(kvm, gfn);
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
- if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS |
+ if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
DSISR_SET_RC)) {
/*
* Bad address in guest page table tree, or other
@@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (writing)
pgflags |= _PAGE_DIRTY;
local_irq_save(flags);
- ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva,
- NULL, NULL);
+ ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
if (ptep) {
pte = READ_ONCE(*ptep);
if (pte_present(pte) &&
@@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
spin_unlock(&kvm->mmu_lock);
return RESUME_GUEST;
}
- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable,
- gpa, NULL, &shift);
+ /*
+ * We are walking the secondary page table here. We can do this
+ * without disabling irq.
+ */
+ ptep = __find_linux_pte(kvm->arch.pgtable,
+ gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
gpa, shift);
@@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pgflags |= _PAGE_WRITE;
} else {
local_irq_save(flags);
- ptep = __find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
+ ptep = find_current_mm_pte(current->mm->pgd,
+ hva, NULL, NULL);
if (ptep && pte_write(*ptep) && pte_dirty(*ptep))
pgflags |= _PAGE_WRITE;
local_irq_restore(flags);
@@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
unsigned long old;
- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
- NULL, &shift);
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
gpa, shift);
@@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
- NULL, &shift);
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
gpa, shift);
@@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned int shift;
int ref = 0;
- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
- NULL, &shift);
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
return ref;
@@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
unsigned int shift;
int ret = 0;
- ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
- NULL, &shift);
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
if (shift)
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index a160c14304eb..8f2da8bba737 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+ struct kvm *kvm = stt->kvm;
+ mutex_lock(&kvm->lock);
list_del_rcu(&stt->list);
+ mutex_unlock(&kvm->lock);
list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
WARN_ON(!kref_read(&stit->kref));
@@ -294,6 +297,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
+ struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size;
int ret = -ENOMEM;
int i;
@@ -301,25 +305,17 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (!args->size)
return -EINVAL;
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
-
size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
- if (ret) {
- stt = NULL;
- goto fail;
- }
+ if (ret)
+ return ret;
ret = -ENOMEM;
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
if (!stt)
- goto fail;
+ goto fail_acct;
stt->liobn = args->liobn;
stt->page_shift = args->page_shift;
@@ -334,24 +330,39 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
}
- kvm_get_kvm(kvm);
-
mutex_lock(&kvm->lock);
- list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ /* Check this LIOBN hasn't been previously allocated */
+ ret = 0;
+ list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
+ if (siter->liobn == args->liobn) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ if (!ret)
+ ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR | O_CLOEXEC);
+
+ if (ret >= 0) {
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+ kvm_get_kvm(kvm);
+ }
mutex_unlock(&kvm->lock);
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR | O_CLOEXEC);
+ if (ret >= 0)
+ return ret;
-fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
+ fail:
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
- kfree(stt);
- }
+ kfree(stt);
+ fail_acct:
+ kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 3adfd2f5301c..c32e9bfe75b1 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -39,6 +39,7 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
+#include <asm/pte-walk.h>
#ifdef CONFIG_BUG
@@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
pte_t *ptep, pte;
unsigned shift = 0;
- ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
+ /*
+ * Called in real mode with MSR_EE = 0. We are safe here.
+ * It is ok to do the lookup with arch.pgdir here, because
+ * we are doing this on secondary cpus and current task there
+ * is not the hypervisor. Also this is safe against THP in the
+ * host, because an IPI to primary thread will wait for the secondary
+ * to exit which will agains result in the below page table walk
+ * to finish.
+ */
+ ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift);
if (!ptep || !pte_present(*ptep))
return -ENXIO;
pte = *ptep;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 359c79cdf0cc..18e974a34fce 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
switch (subfunc) {
case H_VPA_REG_VPA: /* register VPA */
- if (len < sizeof(struct lppaca))
+ /*
+ * The size of our lppaca is 1kB because of the way we align
+ * it for the guest to avoid crossing a 4kB boundary. We only
+ * use 640 bytes of the structure though, so we should accept
+ * clients that set a size of 640.
+ */
+ if (len < 640)
break;
vpap = &tvcpu->arch.vpa;
err = 0;
@@ -2111,6 +2117,15 @@ static int kvmppc_grab_hwthread(int cpu)
struct paca_struct *tpaca;
long timeout = 10000;
+ /*
+ * ISA v3.0 idle routines do not set hwthread_state or test
+ * hwthread_req, so they can not grab idle threads.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ WARN(1, "KVM: can not control sibling threads\n");
+ return -EBUSY;
+ }
+
tpaca = &paca[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
@@ -2145,10 +2160,12 @@ static void kvmppc_release_hwthread(int cpu)
struct paca_struct *tpaca;
tpaca = &paca[cpu];
- tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.kvm_split_mode = NULL;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ tpaca->kvm_hstate.hwthread_req = 0;
+
}
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
@@ -3325,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
if (radix_enabled())
return -EINVAL;
+ /*
+ * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
+ * POWER7 doesn't support keys for instruction accesses,
+ * POWER8 and POWER9 do.
+ */
+ info->data_keys = 32;
+ info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
+
info->flags = KVM_PPC_PAGE_SIZES_REAL;
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
info->flags |= KVM_PPC_1T_SEGMENTS;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 584c74c8119f..4efe364f1188 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -22,6 +22,7 @@
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */
static void *real_vmalloc_addr(void *x)
@@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x)
/*
* assume we don't have huge pages in vmalloc space...
* So don't worry about THP collapse/split. Called
- * Only in realmode, hence won't need irq_save/restore.
+ * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
*/
- p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL);
+ p = find_init_mm_pte(addr, NULL);
if (!p || !pte_present(*p))
return NULL;
addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
@@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
* If we had a page table table change after lookup, we would
* retry via mmu_notifier_retry.
*/
- if (realmode)
- ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL,
- &hpage_shift);
- else {
+ if (!realmode)
local_irq_save(irq_flags);
- ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL,
- &hpage_shift);
- }
+ /*
+ * If called in real mode we have MSR_EE = 0. Otherwise
+ * we disable irq above.
+ */
+ ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift);
if (ptep) {
pte_t pte;
unsigned int host_pte_size;
@@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (!realmode)
local_irq_restore(irq_flags);
- ptel &= ~(HPTE_R_PP0 - psize);
+ ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
ptel |= pa;
if (pa)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c52184a8efdf..663a4a861e7f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
+BEGIN_FTR_SECTION
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/*
* For external interrupts we need to call the Linux
@@ -314,6 +316,7 @@ kvm_novcpu_exit:
* Relocation is off and most register values are lost.
* r13 points to the PACA.
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
+ * This is not used by ISAv3.0B processors.
*/
.globl kvm_start_guest
kvm_start_guest:
@@ -432,6 +435,9 @@ kvm_secondary_got_guest:
* While waiting we also need to check if we get given a vcpu to run.
*/
kvm_no_guest:
+BEGIN_FTR_SECTION
+ twi 31,0,0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 53f
@@ -976,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, r0
+ cmpldi cr0, r10, 0
beq no_xive
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
@@ -1280,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
- cmpwi r3,0
+ EXTEND_HDEC(r3)
+ cmpdi r3,0
mr r4,r9
bge fast_guest_return
2:
@@ -1291,6 +1298,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
+BEGIN_FTR_SECTION
+ PPC_MSGSYNC
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq 4f
@@ -2509,8 +2519,10 @@ kvm_do_nap:
clrrdi r0, r0, 1
mtspr SPRN_CTRLT, r0
+BEGIN_FTR_SECTION
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4636ca6e7d38..d1ed2c41b5d2 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -16,7 +16,22 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
u8 cppr;
u16 ack;
- /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
+ /*
+ * Ensure any previous store to CPPR is ordered vs.
+ * the subsequent loads from PIPR or ACK.
+ */
+ eieio();
+
+ /*
+ * DD1 bug workaround: If PIPR is less favored than CPPR
+ * ignore the interrupt or we might incorrectly lose an IPB
+ * bit.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
+ if (pipr >= xc->hw_cppr)
+ return;
+ }
/* Perform the acknowledge OS to register cycle. */
ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
@@ -235,6 +250,11 @@ skip_ipi:
/*
* If we found an interrupt, adjust what the guest CPPR should
* be as if we had just fetched that interrupt from HW.
+ *
+ * Note: This can only make xc->cppr smaller as the previous
+ * loop will only exit with hirq != 0 if prio is lower than
+ * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+ * for pending IPIs.
*/
if (hirq)
xc->cppr = prio;
@@ -381,6 +401,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
xc->cppr = cppr;
/*
+ * Order the above update of xc->cppr with the subsequent
+ * read of xc->mfrr inside push_pending_to_hw()
+ */
+ smp_mb();
+
+ /*
* We are masking less, we need to look for pending things
* to deliver and set VP pending bits accordingly to trigger
* a new interrupt otherwise we might miss MFRR changes for
@@ -420,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
* used to signal MFRR changes is EOId when fetched from
* the queue.
*/
- if (irq == XICS_IPI || irq == 0)
+ if (irq == XICS_IPI || irq == 0) {
+ /*
+ * This barrier orders the setting of xc->cppr vs.
+ * subsquent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
goto bail;
+ }
/* Find interrupt source */
sb = kvmppc_xive_find_source(xive, irq, &src);
if (!sb) {
pr_devel(" source not found !\n");
rc = H_PARAMETER;
+ /* Same as above */
+ smp_mb();
goto bail;
}
state = &sb->irq_state[src];
kvmppc_xive_select_irq(state, &hw_num, &xd);
state->in_eoi = true;
- mb();
+
+ /*
+ * This barrier orders both setting of in_eoi above vs,
+ * subsequent test of guest_priority, and the setting
+ * of xc->cppr vs. subsquent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
again:
if (state->guest_priority == MASKED) {
@@ -461,6 +503,14 @@ again:
}
+ /*
+ * This barrier orders the above guest_priority check
+ * and spin_lock/unlock with clearing in_eoi below.
+ *
+ * It also has to be a full mb() as it must ensure
+ * the MMIOs done in source_eoi() are completed before
+ * state->in_eoi is visible.
+ */
mb();
state->in_eoi = false;
bail:
@@ -495,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
/* Locklessly write over MFRR */
xc->mfrr = mfrr;
+ /*
+ * The load of xc->cppr below and the subsequent MMIO store
+ * to the IPI must happen after the above mfrr update is
+ * globally visible so that:
+ *
+ * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+ * updating xc->cppr then reading xc->mfrr.
+ *
+ * - The target of the IPI sees the xc->mfrr update
+ */
+ mb();
+
/* Shoot the IPI if most favored than target cppr */
if (mfrr < xc->cppr)
__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 32fdab57d604..f9f6468f4171 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
if (err)
goto free_vcpu;
- if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
+ if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) {
+ err = -ENOMEM;
goto uninit_vcpu;
+ }
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- if (!vcpu->arch.shared)
+ if (!vcpu->arch.shared) {
+ err = -ENOMEM;
goto uninit_tlb;
+ }
return vcpu;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 77fd043b3ecc..c6c734424c70 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -30,6 +30,7 @@
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <asm/kvm_ppc.h>
+#include <asm/pte-walk.h>
#include "e500.h"
#include "timing.h"
@@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
* can't run hence pfn won't change.
*/
local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL);
+ ptep = find_linux_pte(pgdir, hva, NULL, NULL);
if (ptep) {
pte_t pte = READ_ONCE(*ptep);
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index f48a0c22e8f9..d0b6b5788afc 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
goto uninit_vcpu;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (!vcpu->arch.shared)
+ if (!vcpu->arch.shared) {
+ err = -ENOMEM;
goto uninit_tlb;
+ }
return vcpu;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1a75c0b5f4ca..3480faaf1ef8 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return 1;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3c3146ba62da..50d5bf954cff 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -31,7 +31,8 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
obj-y += checksum_$(BITS).o checksum_wrappers.o
-obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
+obj-y += sstep.o ldstfp.o quad.o
+obj64-y += quad.o
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 8aedbb5f4b86..da425bb6b369 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -67,6 +67,20 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+_GLOBAL(memset16)
+ rlwinm. r0 ,r5, 31, 1, 31
+ addi r6, r3, -4
+ beq- 2f
+ rlwimi r4 ,r4 ,16 ,0 ,15
+ mtctr r0
+1: stwu r4, 4(r6)
+ bdnz 1b
+2: andi. r0, r5, 1
+ beqlr
+ sth r4, 4(r6)
+ blr
+EXPORT_SYMBOL(memset16)
+
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
@@ -77,22 +91,24 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
* replaced by a nop once cache is active. This is done in machine_init()
*/
_GLOBAL(memset)
+ cmplwi 0,r5,4
+ blt 7f
+
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
+ stw r4,0(r3)
beqlr
- andi. r0,r6,3
+ andi. r0,r3,3
add r5,r0,r5
- subf r6,r0,r6
+ subf r6,r0,r3
cmplwi 0,r4,0
- bne 2f /* Use normal procedure if r4 is not zero */
-EXPORT_SYMBOL(memset)
+ /*
+ * Skip optimised bloc until cache is enabled. Will be replaced
+ * by 'bne' during boot to use normal procedure if r4 is not zero
+ */
_GLOBAL(memset_nocache_branch)
- b 2f /* Skip optimised bloc until cache is enabled */
+ b 2f
clrlwi r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
@@ -119,7 +135,6 @@ _GLOBAL(memset_nocache_branch)
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
-7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
@@ -127,6 +142,15 @@ _GLOBAL(memset_nocache_branch)
bdnz 8b
blr
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+9: stbu r4,1(r6)
+ bdnz 9b
+ blr
+EXPORT_SYMBOL(memset)
+
/*
* This version uses dcbz on the complete cache lines in the
* destination area to reduce memory traffic. This requires that
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index a84d333ecb09..ca5fc8fa7efc 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -45,13 +45,13 @@ _GLOBAL(copypage_power7)
.machine push
.machine "power4"
/* setup read stream 0 */
- dcbt r0,r4,0b01000 /* addr from */
- dcbt r0,r7,0b01010 /* length and depth from */
+ dcbt 0,r4,0b01000 /* addr from */
+ dcbt 0,r7,0b01010 /* length and depth from */
/* setup write stream 1 */
- dcbtst r0,r9,0b01000 /* addr to */
- dcbtst r0,r10,0b01010 /* length and depth to */
+ dcbtst 0,r9,0b01000 /* addr to */
+ dcbtst 0,r10,0b01010 /* length and depth to */
eieio
- dcbt r0,r8,0b01010 /* all streams GO */
+ dcbt 0,r8,0b01010 /* all streams GO */
.machine pop
#ifdef CONFIG_ALTIVEC
@@ -83,7 +83,7 @@ _GLOBAL(copypage_power7)
li r12,112
.align 5
-1: lvx v7,r0,r4
+1: lvx v7,0,r4
lvx v6,r4,r6
lvx v5,r4,r7
lvx v4,r4,r8
@@ -92,7 +92,7 @@ _GLOBAL(copypage_power7)
lvx v1,r4,r11
lvx v0,r4,r12
addi r4,r4,128
- stvx v7,r0,r3
+ stvx v7,0,r3
stvx v6,r3,r6
stvx v5,r3,r7
stvx v4,r3,r8
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 706b7cc19846..d416a4a66578 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -315,13 +315,13 @@ err1; stb r0,0(r3)
.machine push
.machine "power4"
/* setup read stream 0 */
- dcbt r0,r6,0b01000 /* addr from */
- dcbt r0,r7,0b01010 /* length and depth from */
+ dcbt 0,r6,0b01000 /* addr from */
+ dcbt 0,r7,0b01010 /* length and depth from */
/* setup write stream 1 */
- dcbtst r0,r9,0b01000 /* addr to */
- dcbtst r0,r10,0b01010 /* length and depth to */
+ dcbtst 0,r9,0b01000 /* addr to */
+ dcbtst 0,r10,0b01010 /* length and depth to */
eieio
- dcbt r0,r8,0b01010 /* all streams GO */
+ dcbt 0,r8,0b01010 /* all streams GO */
.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -376,26 +376,26 @@ err3; std r0,0(r3)
li r11,48
bf cr7*4+3,5f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
addi r4,r4,16
-err3; stvx v1,r0,r3
+err3; stvx v1,0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx v1,r0,r3
+err3; stvx v1,0,r3
err3; stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx v3,r0,r4
+err3; lvx v3,0,r4
err3; lvx v2,r4,r9
err3; lvx v1,r4,r10
err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx v3,r0,r3
+err3; stvx v3,0,r3
err3; stvx v2,r3,r9
err3; stvx v1,r3,r10
err3; stvx v0,r3,r11
@@ -421,7 +421,7 @@ err3; stvx v0,r3,r11
*/
.align 5
8:
-err4; lvx v7,r0,r4
+err4; lvx v7,0,r4
err4; lvx v6,r4,r9
err4; lvx v5,r4,r10
err4; lvx v4,r4,r11
@@ -430,7 +430,7 @@ err4; lvx v2,r4,r14
err4; lvx v1,r4,r15
err4; lvx v0,r4,r16
addi r4,r4,128
-err4; stvx v7,r0,r3
+err4; stvx v7,0,r3
err4; stvx v6,r3,r9
err4; stvx v5,r3,r10
err4; stvx v4,r3,r11
@@ -451,29 +451,29 @@ err4; stvx v0,r3,r16
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx v3,r0,r4
+err3; lvx v3,0,r4
err3; lvx v2,r4,r9
err3; lvx v1,r4,r10
err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx v3,r0,r3
+err3; stvx v3,0,r3
err3; stvx v2,r3,r9
err3; stvx v1,r3,r10
err3; stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx v1,r0,r3
+err3; stvx v1,0,r3
err3; stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
addi r4,r4,16
-err3; stvx v1,r0,r3
+err3; stvx v1,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -553,25 +553,25 @@ err3; lvx v0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
addi r3,r3,16
vor v0,v1,v1
5: bf cr7*4+2,6f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
VPERM(v8,v0,v1,v16)
err3; lvx v0,r4,r9
VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
err3; stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx v3,r0,r4
+err3; lvx v3,0,r4
VPERM(v8,v0,v3,v16)
err3; lvx v2,r4,r9
VPERM(v9,v3,v2,v16)
@@ -580,7 +580,7 @@ err3; lvx v1,r4,r10
err3; lvx v0,r4,r11
VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
err3; stvx v9,r3,r9
err3; stvx v10,r3,r10
err3; stvx v11,r3,r11
@@ -606,7 +606,7 @@ err3; stvx v11,r3,r11
*/
.align 5
8:
-err4; lvx v7,r0,r4
+err4; lvx v7,0,r4
VPERM(v8,v0,v7,v16)
err4; lvx v6,r4,r9
VPERM(v9,v7,v6,v16)
@@ -623,7 +623,7 @@ err4; lvx v1,r4,r15
err4; lvx v0,r4,r16
VPERM(v15,v1,v0,v16)
addi r4,r4,128
-err4; stvx v8,r0,r3
+err4; stvx v8,0,r3
err4; stvx v9,r3,r9
err4; stvx v10,r3,r10
err4; stvx v11,r3,r11
@@ -644,7 +644,7 @@ err4; stvx v15,r3,r16
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx v3,r0,r4
+err3; lvx v3,0,r4
VPERM(v8,v0,v3,v16)
err3; lvx v2,r4,r9
VPERM(v9,v3,v2,v16)
@@ -653,27 +653,27 @@ err3; lvx v1,r4,r10
err3; lvx v0,r4,r11
VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
err3; stvx v9,r3,r9
err3; stvx v10,r3,r10
err3; stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
VPERM(v8,v0,v1,v16)
err3; lvx v0,r4,r9
VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
err3; stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx v1,r0,r4
+err3; lvx v1,0,r4
VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx v8,r0,r3
+err3; stvx v8,0,r3
addi r3,r3,16
/* Up to 15B to go */
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index a58777c1b2cb..ae15eba49c1f 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -21,27 +21,19 @@
#define STKFRM (PPC_MIN_STKFRM + 16)
- .macro inst32 op
-reg = 0
- .rept 32
-20: \op reg,0,r4
- b 3f
- EX_TABLE(20b,99f)
-reg = reg + 1
- .endr
- .endm
-
-/* Get the contents of frN into fr0; N is in r3. */
+/* Get the contents of frN into *p; N is in r3 and p is in r4. */
_GLOBAL(get_fpr)
mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* fr0 is already in fr0 */
- nop
-reg = 1
- .rept 31
- fmr fr0,reg
- blr
+reg = 0
+ .rept 32
+ stfd reg, 0(r4)
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -49,18 +41,23 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
+2: MTMSRD(r6)
+ isync
+ blr
-/* Put the contents of fr0 into frN; N is in r3. */
+/* Put the contents of *p into frN; N is in r3 and p is in r4. */
_GLOBAL(put_fpr)
mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* fr0 is already in fr0 */
- nop
-reg = 1
- .rept 31
- fmr reg,fr0
- blr
+reg = 0
+ .rept 32
+ lfd reg, 0(r4)
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -68,127 +65,24 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
-
-/* Load FP reg N from float at *p. N is in r3, p in r4. */
-_GLOBAL(do_lfs)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
-1: li r9,-EFAULT
-2: lfs fr0,0(r4)
- li r9,0
-3: bl put_fpr
- beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- EX_TABLE(2b,3b)
-
-/* Load FP reg N from double at *p. N is in r3, p in r4. */
-_GLOBAL(do_lfd)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
-1: li r9,-EFAULT
-2: lfd fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- bl put_fpr
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
+2: MTMSRD(r6)
isync
- mr r3,r9
- addi r1,r1,STKFRM
blr
- EX_TABLE(2b,3b)
-/* Store FP reg N to float at *p. N is in r3, p in r4. */
-_GLOBAL(do_stfs)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
- bl get_fpr
-1: li r9,-EFAULT
-2: stfs fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- EX_TABLE(2b,3b)
-
-/* Store FP reg N to double at *p. N is in r3, p in r4. */
-_GLOBAL(do_stfd)
- PPC_STLU r1,-STKFRM(r1)
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_vr)
mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
+ oris r7, r6, MSR_VEC@h
MTMSRD(r7)
isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
- bl get_fpr
-1: li r9,-EFAULT
-2: stfd fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- EX_TABLE(2b,3b)
-
-#ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into v0; N is in r3. */
-_GLOBAL(get_vr)
- mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* v0 is already in v0 */
- nop
-reg = 1
- .rept 31
- vor v0,reg,reg /* assembler doesn't know vmr? */
- blr
+reg = 0
+ .rept 32
+ stvx reg, 0, r4
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -196,18 +90,23 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
+2: MTMSRD(r6)
+ isync
+ blr
-/* Put the contents of v0 into vrN; N is in r3. */
+/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
_GLOBAL(put_vr)
mflr r0
+ mfmsr r6
+ oris r7, r6, MSR_VEC@h
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* v0 is already in v0 */
- nop
-reg = 1
- .rept 31
- vor reg,v0,v0
- blr
+reg = 0
+ .rept 32
+ lvx reg, 0, r4
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -215,62 +114,9 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
-
-/* Load vector reg N from *p. N is in r3, p in r4. */
-_GLOBAL(do_lvx)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- oris r7,r6,MSR_VEC@h
- cmpwi cr7,r3,0
- li r8,STKFRM-16
- MTMSRD(r7)
- isync
- beq cr7,1f
- stvx v0,r1,r8
-1: li r9,-EFAULT
-2: lvx v0,0,r4
- li r9,0
-3: beq cr7,4f
- bl put_vr
- lvx v0,r1,r8
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
+2: MTMSRD(r6)
isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- EX_TABLE(2b,3b)
-
-/* Store vector reg N to *p. N is in r3, p in r4. */
-_GLOBAL(do_stvx)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- oris r7,r6,MSR_VEC@h
- cmpwi cr7,r3,0
- li r8,STKFRM-16
- MTMSRD(r7)
- isync
- beq cr7,1f
- stvx v0,r1,r8
- bl get_vr
-1: li r9,-EFAULT
-2: stvx v0,0,r4
- li r9,0
-3: beq cr7,4f
- lvx v0,r1,r8
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
blr
- EX_TABLE(2b,3b)
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
@@ -313,7 +159,7 @@ reg = reg + 1
bctr
/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
-_GLOBAL(do_lxvd2x)
+_GLOBAL(load_vsrn)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
@@ -325,49 +171,74 @@ _GLOBAL(do_lxvd2x)
isync
beq cr7,1f
STXVD2X(0,R1,R8)
-1: li r9,-EFAULT
-2: LXVD2X(0,R0,R4)
- li r9,0
-3: beq cr7,4f
+1: LXVD2X(0,R0,R4)
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ beq cr7,4f
bl put_vsr
LXVD2X(0,R1,R8)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
- mr r3,r9
addi r1,r1,STKFRM
blr
- EX_TABLE(2b,3b)
/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
-_GLOBAL(do_stxvd2x)
+_GLOBAL(store_vsrn)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VSX@h
- cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
- beq cr7,1f
STXVD2X(0,R1,R8)
bl get_vsr
-1: li r9,-EFAULT
-2: STXVD2X(0,R0,R4)
- li r9,0
-3: beq cr7,4f
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ STXVD2X(0,R0,R4)
LXVD2X(0,R1,R8)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
- EX_TABLE(2b,3b)
-
#endif /* CONFIG_VSX */
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(float *sp, double *dp) */
+_GLOBAL(conv_sp_to_dp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfs fr0, 0(r3)
+ stfd fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(double *dp, float *sp) */
+_GLOBAL(conv_dp_to_sp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfd fr0, 0(r3)
+ stfs fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
+
#endif /* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 85fa9869aec5..ec531de99996 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -13,6 +13,23 @@
#include <asm/ppc_asm.h>
#include <asm/export.h>
+_GLOBAL(__memset16)
+ rlwimi r4,r4,16,0,15
+ /* fall through */
+
+_GLOBAL(__memset32)
+ rldimi r4,r4,32,0
+ /* fall through */
+
+_GLOBAL(__memset64)
+ neg r0,r3
+ andi. r0,r0,7
+ cmplw cr1,r5,r0
+ b .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+
_GLOBAL(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
@@ -20,7 +37,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- PPC_MTOCRF(1,r0)
+.Lms: PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 786234fd4e91..193909abd18b 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -261,12 +261,12 @@ _GLOBAL(memcpy_power7)
.machine push
.machine "power4"
- dcbt r0,r6,0b01000
- dcbt r0,r7,0b01010
- dcbtst r0,r9,0b01000
- dcbtst r0,r10,0b01010
+ dcbt 0,r6,0b01000
+ dcbt 0,r7,0b01010
+ dcbtst 0,r9,0b01000
+ dcbtst 0,r10,0b01010
eieio
- dcbt r0,r8,0b01010 /* GO */
+ dcbt 0,r8,0b01010 /* GO */
.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -321,26 +321,26 @@ _GLOBAL(memcpy_power7)
li r11,48
bf cr7*4+3,5f
- lvx v1,r0,r4
+ lvx v1,0,r4
addi r4,r4,16
- stvx v1,r0,r3
+ stvx v1,0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
- lvx v1,r0,r4
+ lvx v1,0,r4
lvx v0,r4,r9
addi r4,r4,32
- stvx v1,r0,r3
+ stvx v1,0,r3
stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx v3,r0,r4
+ lvx v3,0,r4
lvx v2,r4,r9
lvx v1,r4,r10
lvx v0,r4,r11
addi r4,r4,64
- stvx v3,r0,r3
+ stvx v3,0,r3
stvx v2,r3,r9
stvx v1,r3,r10
stvx v0,r3,r11
@@ -366,7 +366,7 @@ _GLOBAL(memcpy_power7)
*/
.align 5
8:
- lvx v7,r0,r4
+ lvx v7,0,r4
lvx v6,r4,r9
lvx v5,r4,r10
lvx v4,r4,r11
@@ -375,7 +375,7 @@ _GLOBAL(memcpy_power7)
lvx v1,r4,r15
lvx v0,r4,r16
addi r4,r4,128
- stvx v7,r0,r3
+ stvx v7,0,r3
stvx v6,r3,r9
stvx v5,r3,r10
stvx v4,r3,r11
@@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7)
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx v3,r0,r4
+ lvx v3,0,r4
lvx v2,r4,r9
lvx v1,r4,r10
lvx v0,r4,r11
addi r4,r4,64
- stvx v3,r0,r3
+ stvx v3,0,r3
stvx v2,r3,r9
stvx v1,r3,r10
stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx v1,r0,r4
+ lvx v1,0,r4
lvx v0,r4,r9
addi r4,r4,32
- stvx v1,r0,r3
+ stvx v1,0,r3
stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx v1,r0,r4
+ lvx v1,0,r4
addi r4,r4,16
- stvx v1,r0,r3
+ stvx v1,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -499,25 +499,25 @@ _GLOBAL(memcpy_power7)
addi r4,r4,16
bf cr7*4+3,5f
- lvx v1,r0,r4
+ lvx v1,0,r4
VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx v8,r0,r3
+ stvx v8,0,r3
addi r3,r3,16
vor v0,v1,v1
5: bf cr7*4+2,6f
- lvx v1,r0,r4
+ lvx v1,0,r4
VPERM(v8,v0,v1,v16)
lvx v0,r4,r9
VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx v8,r0,r3
+ stvx v8,0,r3
stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx v3,r0,r4
+ lvx v3,0,r4
VPERM(v8,v0,v3,v16)
lvx v2,r4,r9
VPERM(v9,v3,v2,v16)
@@ -526,7 +526,7 @@ _GLOBAL(memcpy_power7)
lvx v0,r4,r11
VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx v8,r0,r3
+ stvx v8,0,r3
stvx v9,r3,r9
stvx v10,r3,r10
stvx v11,r3,r11
@@ -552,7 +552,7 @@ _GLOBAL(memcpy_power7)
*/
.align 5
8:
- lvx v7,r0,r4
+ lvx v7,0,r4
VPERM(v8,v0,v7,v16)
lvx v6,r4,r9
VPERM(v9,v7,v6,v16)
@@ -569,7 +569,7 @@ _GLOBAL(memcpy_power7)
lvx v0,r4,r16
VPERM(v15,v1,v0,v16)
addi r4,r4,128
- stvx v8,r0,r3
+ stvx v8,0,r3
stvx v9,r3,r9
stvx v10,r3,r10
stvx v11,r3,r11
@@ -590,7 +590,7 @@ _GLOBAL(memcpy_power7)
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx v3,r0,r4
+ lvx v3,0,r4
VPERM(v8,v0,v3,v16)
lvx v2,r4,r9
VPERM(v9,v3,v2,v16)
@@ -599,27 +599,27 @@ _GLOBAL(memcpy_power7)
lvx v0,r4,r11
VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx v8,r0,r3
+ stvx v8,0,r3
stvx v9,r3,r9
stvx v10,r3,r10
stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx v1,r0,r4
+ lvx v1,0,r4
VPERM(v8,v0,v1,v16)
lvx v0,r4,r9
VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx v8,r0,r3
+ stvx v8,0,r3
stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx v1,r0,r4
+ lvx v1,0,r4
VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx v8,r0,r3
+ stvx v8,0,r3
addi r3,r3,16
/* Up to 15B to go */
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
new file mode 100644
index 000000000000..c4d12fae8724
--- /dev/null
+++ b/arch/powerpc/lib/quad.S
@@ -0,0 +1,62 @@
+/*
+ * Quadword loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+/* do_lq(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lq)
+1: lq r6, 0(r3)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
+_GLOBAL(do_stq)
+1: stq r4, 0(r3)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_lqarx(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lqarx)
+1: PPC_LQARX(6, 0, 3, 0)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp) */
+
+_GLOBAL(do_stqcx)
+1: PPC_STQCX(4, 0, 3)
+ mfcr r5
+ stw r5, 0(r6)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ee33327686ae..fb9f58b868e7 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -36,14 +36,33 @@ extern char system_call_common[];
/*
* Functions in ldstfp.S
*/
-extern int do_lfs(int rn, unsigned long ea);
-extern int do_lfd(int rn, unsigned long ea);
-extern int do_stfs(int rn, unsigned long ea);
-extern int do_stfd(int rn, unsigned long ea);
-extern int do_lvx(int rn, unsigned long ea);
-extern int do_stvx(int rn, unsigned long ea);
-extern int do_lxvd2x(int rn, unsigned long ea);
-extern int do_stxvd2x(int rn, unsigned long ea);
+extern void get_fpr(int rn, double *p);
+extern void put_fpr(int rn, const double *p);
+extern void get_vr(int rn, __vector128 *p);
+extern void put_vr(int rn, __vector128 *p);
+extern void load_vsrn(int vsr, const void *p);
+extern void store_vsrn(int vsr, void *p);
+extern void conv_sp_to_dp(const float *sp, double *dp);
+extern void conv_dp_to_sp(const double *dp, float *sp);
+#endif
+
+#ifdef __powerpc64__
+/*
+ * Functions in quad.S
+ */
+extern int do_lq(unsigned long ea, unsigned long *regs);
+extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1);
+extern int do_lqarx(unsigned long ea, unsigned long *regs);
+extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp);
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define IS_LE 1
+#define IS_BE 0
+#else
+#define IS_LE 0
+#define IS_BE 1
#endif
/*
@@ -62,15 +81,17 @@ static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
/*
* Determine whether a conditional branch instruction would branch.
*/
-static nokprobe_inline int branch_taken(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline int branch_taken(unsigned int instr,
+ const struct pt_regs *regs,
+ struct instruction_op *op)
{
unsigned int bo = (instr >> 21) & 0x1f;
unsigned int bi;
if ((bo & 4) == 0) {
/* decrement counter */
- --regs->ctr;
- if (((bo >> 1) & 1) ^ (regs->ctr == 0))
+ op->type |= DECCTR;
+ if (((bo >> 1) & 1) ^ (regs->ctr == 1))
return 0;
}
if ((bo & 0x10) == 0) {
@@ -82,17 +103,26 @@ static nokprobe_inline int branch_taken(unsigned int instr, struct pt_regs *regs
return 1;
}
-static nokprobe_inline long address_ok(struct pt_regs *regs, unsigned long ea, int nb)
+static nokprobe_inline long address_ok(struct pt_regs *regs,
+ unsigned long ea, int nb)
{
if (!user_mode(regs))
return 1;
- return __access_ok(ea, nb, USER_DS);
+ if (__access_ok(ea, nb, USER_DS))
+ return 1;
+ if (__access_ok(ea, 1, USER_DS))
+ /* Access overlaps the end of the user region */
+ regs->dar = USER_DS.seg;
+ else
+ regs->dar = ea;
+ return 0;
}
/*
* Calculate effective address for a D-form instruction
*/
-static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dform_ea(unsigned int instr,
+ const struct pt_regs *regs)
{
int ra;
unsigned long ea;
@@ -102,14 +132,15 @@ static nokprobe_inline unsigned long dform_ea(unsigned int instr, struct pt_regs
if (ra)
ea += regs->gpr[ra];
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
}
#ifdef __powerpc64__
/*
* Calculate effective address for a DS-form instruction
*/
-static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dsform_ea(unsigned int instr,
+ const struct pt_regs *regs)
{
int ra;
unsigned long ea;
@@ -119,7 +150,24 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_reg
if (ra)
ea += regs->gpr[ra];
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
+}
+
+/*
+ * Calculate effective address for a DQ-form instruction
+ */
+static nokprobe_inline unsigned long dqform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) (instr & ~0xf); /* sign-extend */
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
}
#endif /* __powerpc64 */
@@ -127,7 +175,7 @@ static nokprobe_inline unsigned long dsform_ea(unsigned int instr, struct pt_reg
* Calculate effective address for an X-form instruction
*/
static nokprobe_inline unsigned long xform_ea(unsigned int instr,
- struct pt_regs *regs)
+ const struct pt_regs *regs)
{
int ra, rb;
unsigned long ea;
@@ -138,7 +186,7 @@ static nokprobe_inline unsigned long xform_ea(unsigned int instr,
if (ra)
ea += regs->gpr[ra];
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
}
/*
@@ -151,7 +199,6 @@ static nokprobe_inline unsigned long max_align(unsigned long x)
return x & -x; /* isolates rightmost bit */
}
-
static nokprobe_inline unsigned long byterev_2(unsigned long x)
{
return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
@@ -170,8 +217,36 @@ static nokprobe_inline unsigned long byterev_8(unsigned long x)
}
#endif
+static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
+{
+ switch (nb) {
+ case 2:
+ *(u16 *)ptr = byterev_2(*(u16 *)ptr);
+ break;
+ case 4:
+ *(u32 *)ptr = byterev_4(*(u32 *)ptr);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr);
+ break;
+ case 16: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
static nokprobe_inline int read_mem_aligned(unsigned long *dest,
- unsigned long ea, int nb)
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
{
int err = 0;
unsigned long x = 0;
@@ -194,59 +269,77 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
}
if (!err)
*dest = x;
+ else
+ regs->dar = ea;
return err;
}
-static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
- unsigned long ea, int nb, struct pt_regs *regs)
+/*
+ * Copy from userspace to a buffer, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+ struct pt_regs *regs)
{
- int err;
- unsigned long x, b, c;
-#ifdef __LITTLE_ENDIAN__
- int len = nb; /* save a copy of the length for byte reversal */
-#endif
+ int err = 0;
+ int c;
- /* unaligned, do this in pieces */
- x = 0;
for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
- c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
c = max_align(ea);
-#endif
if (c > nb)
c = max_align(nb);
- err = read_mem_aligned(&b, ea, c);
- if (err)
- return err;
- x = (x << (8 * c)) + b;
- ea += c;
- }
-#ifdef __LITTLE_ENDIAN__
- switch (len) {
- case 2:
- *dest = byterev_2(x);
- break;
- case 4:
- *dest = byterev_4(x);
- break;
+ switch (c) {
+ case 1:
+ err = __get_user(*dest, (unsigned char __user *) ea);
+ break;
+ case 2:
+ err = __get_user(*(u16 *)dest,
+ (unsigned short __user *) ea);
+ break;
+ case 4:
+ err = __get_user(*(u32 *)dest,
+ (unsigned int __user *) ea);
+ break;
#ifdef __powerpc64__
- case 8:
- *dest = byterev_8(x);
- break;
+ case 8:
+ err = __get_user(*(unsigned long *)dest,
+ (unsigned long __user *) ea);
+ break;
#endif
+ }
+ if (err) {
+ regs->dar = ea;
+ return err;
+ }
+ dest += c;
+ ea += c;
}
-#endif
-#ifdef __BIG_ENDIAN__
- *dest = x;
-#endif
return 0;
}
+static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+ int err;
+
+ u.ul = 0;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ err = copy_mem_in(&u.b[i], ea, nb, regs);
+ if (!err)
+ *dest = u.ul;
+ return err;
+}
+
/*
* Read memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
+ * If nb < sizeof(long), the result is right-justified on BE systems.
*/
static int read_mem(unsigned long *dest, unsigned long ea, int nb,
struct pt_regs *regs)
@@ -254,13 +347,14 @@ static int read_mem(unsigned long *dest, unsigned long ea, int nb,
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & (nb - 1)) == 0)
- return read_mem_aligned(dest, ea, nb);
+ return read_mem_aligned(dest, ea, nb, regs);
return read_mem_unaligned(dest, ea, nb, regs);
}
NOKPROBE_SYMBOL(read_mem);
static nokprobe_inline int write_mem_aligned(unsigned long val,
- unsigned long ea, int nb)
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
{
int err = 0;
@@ -280,51 +374,72 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
break;
#endif
}
+ if (err)
+ regs->dar = ea;
return err;
}
-static nokprobe_inline int write_mem_unaligned(unsigned long val,
- unsigned long ea, int nb, struct pt_regs *regs)
+/*
+ * Copy from a buffer to userspace, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+ struct pt_regs *regs)
{
- int err;
- unsigned long c;
+ int err = 0;
+ int c;
-#ifdef __LITTLE_ENDIAN__
- switch (nb) {
- case 2:
- val = byterev_2(val);
- break;
- case 4:
- val = byterev_4(val);
- break;
-#ifdef __powerpc64__
- case 8:
- val = byterev_8(val);
- break;
-#endif
- }
-#endif
- /* unaligned or little-endian, do this in pieces */
for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
- c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
c = max_align(ea);
-#endif
if (c > nb)
c = max_align(nb);
- err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
- if (err)
+ switch (c) {
+ case 1:
+ err = __put_user(*dest, (unsigned char __user *) ea);
+ break;
+ case 2:
+ err = __put_user(*(u16 *)dest,
+ (unsigned short __user *) ea);
+ break;
+ case 4:
+ err = __put_user(*(u32 *)dest,
+ (unsigned int __user *) ea);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ err = __put_user(*(unsigned long *)dest,
+ (unsigned long __user *) ea);
+ break;
+#endif
+ }
+ if (err) {
+ regs->dar = ea;
return err;
+ }
+ dest += c;
ea += c;
}
return 0;
}
+static nokprobe_inline int write_mem_unaligned(unsigned long val,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+
+ u.ul = val;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ return copy_mem_out(&u.b[i], ea, nb, regs);
+}
+
/*
* Write memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
*/
static int write_mem(unsigned long val, unsigned long ea, int nb,
struct pt_regs *regs)
@@ -332,163 +447,465 @@ static int write_mem(unsigned long val, unsigned long ea, int nb,
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & (nb - 1)) == 0)
- return write_mem_aligned(val, ea, nb);
+ return write_mem_aligned(val, ea, nb, regs);
return write_mem_unaligned(val, ea, nb, regs);
}
NOKPROBE_SYMBOL(write_mem);
#ifdef CONFIG_PPC_FPU
/*
- * Check the address and alignment, and call func to do the actual
- * load or store.
+ * These access either the real FP register or the image in the
+ * thread_struct, depending on regs->msr & MSR_FP.
*/
-static int do_fp_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static int do_fp_load(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
{
- int err;
+ int err, rn, nb;
union {
- double dbl;
- unsigned long ul[2];
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned _pad_;
- unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
- unsigned word;
- unsigned _pad_;
-#endif
- } single;
- } data;
- unsigned long ptr;
-
+ int i;
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
if (!address_ok(regs, ea, nb))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- ptr = (unsigned long) &data.ul;
- if (sizeof(unsigned long) == 8 || nb == 4) {
- err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
- if (nb == 4)
- ptr = (unsigned long)&(data.single.word);
- } else {
- /* reading a double on 32-bit */
- err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
- if (!err)
- err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
- }
+ rn = op->reg;
+ err = copy_mem_in(u.b, ea, nb, regs);
if (err)
return err;
- return (*func)(rn, ptr);
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ preempt_disable();
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_sp_to_dp(&u.f, &u.d[0]);
+ else if (op->type & SIGNEXT)
+ u.l[0] = u.i;
+ else
+ u.l[0] = u.u;
+ }
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[0]);
+ else
+ current->thread.TS_FPR(rn) = u.l[0];
+ if (nb == 16) {
+ /* lfdp */
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[1]);
+ else
+ current->thread.TS_FPR(rn) = u.l[1];
+ }
+ preempt_enable();
+ return 0;
}
NOKPROBE_SYMBOL(do_fp_load);
-static int do_fp_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static int do_fp_store(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
{
- int err;
+ int rn, nb;
union {
- double dbl;
- unsigned long ul[2];
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned _pad_;
- unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
- unsigned word;
- unsigned _pad_;
-#endif
- } single;
- } data;
- unsigned long ptr;
-
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
if (!address_ok(regs, ea, nb))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- ptr = (unsigned long) &data.ul[0];
- if (sizeof(unsigned long) == 8 || nb == 4) {
- if (nb == 4)
- ptr = (unsigned long)&(data.single.word);
- err = (*func)(rn, ptr);
- if (err)
- return err;
- err = write_mem_unaligned(data.ul[0], ea, nb, regs);
- } else {
- /* writing a double on 32-bit */
- err = (*func)(rn, ptr);
- if (err)
- return err;
- err = write_mem_unaligned(data.ul[0], ea, 4, regs);
- if (!err)
- err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
+ rn = op->reg;
+ preempt_disable();
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[0]);
+ else
+ u.l[0] = current->thread.TS_FPR(rn);
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_dp_to_sp(&u.d[0], &u.f);
+ else
+ u.u = u.l[0];
}
- return err;
+ if (nb == 16) {
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[1]);
+ else
+ u.l[1] = current->thread.TS_FPR(rn);
+ }
+ preempt_enable();
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ return copy_mem_out(u.b, ea, nb, regs);
}
NOKPROBE_SYMBOL(do_fp_store);
#endif
#ifdef CONFIG_ALTIVEC
/* For Altivec/VMX, no need to worry about alignment */
-static nokprobe_inline int do_vec_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
{
+ int err;
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u = {};
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
- return (*func)(rn, ea);
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+ err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs);
+ if (err)
+ return err;
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], size);
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ put_vr(rn, &u.v);
+ else
+ current->thread.vr_state.vr[rn] = u.v;
+ preempt_enable();
+ return 0;
}
-static nokprobe_inline int do_vec_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
{
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
- return (*func)(rn, ea);
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ get_vr(rn, &u.v);
+ else
+ u.v = current->thread.vr_state.vr[rn];
+ preempt_enable();
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], size);
+ return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
}
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-static nokprobe_inline int do_vsx_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+#ifdef __powerpc64__
+static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
{
int err;
- unsigned long val[2];
if (!address_ok(regs, ea, 16))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- err = read_mem_unaligned(&val[0], ea, 8, regs);
- if (!err)
- err = read_mem_unaligned(&val[1], ea + 8, 8, regs);
- if (!err)
- err = (*func)(rn, (unsigned long) &val[0]);
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0) {
+ err = do_lq(ea, &regs->gpr[reg]);
+ } else {
+ err = read_mem(&regs->gpr[reg + IS_LE], ea, 8, regs);
+ if (!err)
+ err = read_mem(&regs->gpr[reg + IS_BE], ea + 8, 8, regs);
+ }
+ if (!err && unlikely(cross_endian))
+ do_byte_reverse(&regs->gpr[reg], 16);
return err;
}
-static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
{
int err;
- unsigned long val[2];
+ unsigned long vals[2];
if (!address_ok(regs, ea, 16))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- err = (*func)(rn, (unsigned long) &val[0]);
- if (err)
- return err;
- err = write_mem_unaligned(val[0], ea, 8, regs);
+ vals[0] = regs->gpr[reg];
+ vals[1] = regs->gpr[reg + 1];
+ if (unlikely(cross_endian))
+ do_byte_reverse(vals, 16);
+
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0)
+ return do_stq(ea, vals[0], vals[1]);
+
+ err = write_mem(vals[IS_LE], ea, 8, regs);
if (!err)
- err = write_mem_unaligned(val[1], ea + 8, 8, regs);
+ err = write_mem(vals[IS_BE], ea + 8, 8, regs);
return err;
}
+#endif /* __powerpc64 */
+
+#ifdef CONFIG_VSX
+void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+ const void *mem, bool rev)
+{
+ int size, read_size;
+ int i, j;
+ const unsigned int *wp;
+ const unsigned short *hp;
+ const unsigned char *bp;
+
+ size = GETSIZE(op->type);
+ reg->d[0] = reg->d[1] = 0;
+
+ switch (op->element_size) {
+ case 16:
+ /* whole vector; lxv[x] or lxvl[l] */
+ if (size == 0)
+ break;
+ memcpy(reg, mem, size);
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev)
+ do_byte_reverse(reg, 16);
+ break;
+ case 8:
+ /* scalar loads, lxvd2x, lxvdsx */
+ read_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - read_size;
+ memcpy(&reg->b[i], mem, read_size);
+ if (rev)
+ do_byte_reverse(&reg->b[i], 8);
+ if (size < 8) {
+ if (op->type & SIGNEXT) {
+ /* size == 4 is the only case here */
+ reg->d[IS_LE] = (signed int) reg->d[IS_LE];
+ } else if (op->vsx_flags & VSX_FPCONV) {
+ preempt_disable();
+ conv_sp_to_dp(&reg->fp[1 + IS_LE],
+ &reg->dp[IS_LE]);
+ preempt_enable();
+ }
+ } else {
+ if (size == 16) {
+ unsigned long v = *(unsigned long *)(mem + 8);
+ reg->d[IS_BE] = !rev ? v : byterev_8(v);
+ } else if (op->vsx_flags & VSX_SPLAT)
+ reg->d[IS_BE] = reg->d[IS_LE];
+ }
+ break;
+ case 4:
+ /* lxvw4x, lxvwsx */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = !rev ? *wp++ : byterev_4(*wp++);
+ }
+ if (op->vsx_flags & VSX_SPLAT) {
+ u32 val = reg->w[IS_LE ? 3 : 0];
+ for (; j < 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = val;
+ }
+ }
+ break;
+ case 2:
+ /* lxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ reg->h[i] = !rev ? *hp++ : byterev_2(*hp++);
+ }
+ break;
+ case 1:
+ /* lxvb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ reg->b[i] = *bp++;
+ }
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_load);
+NOKPROBE_SYMBOL(emulate_vsx_load);
+
+void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
+ void *mem, bool rev)
+{
+ int size, write_size;
+ int i, j;
+ union vsx_reg buf;
+ unsigned int *wp;
+ unsigned short *hp;
+ unsigned char *bp;
+
+ size = GETSIZE(op->type);
+
+ switch (op->element_size) {
+ case 16:
+ /* stxv, stxvx, stxvl, stxvll */
+ if (size == 0)
+ break;
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev) {
+ /* reverse 16 bytes */
+ buf.d[0] = byterev_8(reg->d[1]);
+ buf.d[1] = byterev_8(reg->d[0]);
+ reg = &buf;
+ }
+ memcpy(mem, reg, size);
+ break;
+ case 8:
+ /* scalar stores, stxvd2x */
+ write_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - write_size;
+ if (size < 8 && op->vsx_flags & VSX_FPCONV) {
+ buf.d[0] = buf.d[1] = 0;
+ preempt_disable();
+ conv_dp_to_sp(&reg->dp[IS_LE], &buf.fp[1 + IS_LE]);
+ preempt_enable();
+ reg = &buf;
+ }
+ memcpy(mem, &reg->b[i], write_size);
+ if (size == 16)
+ memcpy(mem + 8, &reg->d[IS_BE], 8);
+ if (unlikely(rev)) {
+ do_byte_reverse(mem, write_size);
+ if (size == 16)
+ do_byte_reverse(mem + 8, 8);
+ }
+ break;
+ case 4:
+ /* stxvw4x */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]);
+ }
+ break;
+ case 2:
+ /* stxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]);
+ }
+ break;
+ case 1:
+ /* stvxb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ *bp++ = reg->b[i];
+ }
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_store);
+NOKPROBE_SYMBOL(emulate_vsx_store);
+
+static nokprobe_inline int do_vsx_load(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ u8 mem[16];
+ union vsx_reg buf;
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
+ return -EFAULT;
+
+ emulate_vsx_load(op, &buf, mem, cross_endian);
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ load_vsrn(reg, &buf);
+ } else {
+ current->thread.fp_state.fpr[reg][0] = buf.d[0];
+ current->thread.fp_state.fpr[reg][1] = buf.d[1];
+ }
+ } else {
+ if (regs->msr & MSR_VEC)
+ load_vsrn(reg, &buf);
+ else
+ current->thread.vr_state.vr[reg - 32] = buf.v;
+ }
+ preempt_enable();
+ return 0;
+}
+
+static nokprobe_inline int do_vsx_store(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ u8 mem[16];
+ union vsx_reg buf;
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ store_vsrn(reg, &buf);
+ } else {
+ buf.d[0] = current->thread.fp_state.fpr[reg][0];
+ buf.d[1] = current->thread.fp_state.fpr[reg][1];
+ }
+ } else {
+ if (regs->msr & MSR_VEC)
+ store_vsrn(reg, &buf);
+ else
+ buf.v = current->thread.vr_state.vr[reg - 32];
+ }
+ preempt_enable();
+ emulate_vsx_store(op, &buf, mem, cross_endian);
+ return copy_mem_out(mem, ea, size, regs);
+}
#endif /* CONFIG_VSX */
+int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
+{
+ int err;
+ unsigned long i, size;
+
+#ifdef __powerpc64__
+ size = ppc64_caches.l1d.block_size;
+ if (!(regs->msr & MSR_64BIT))
+ ea &= 0xffffffffUL;
+#else
+ size = L1_CACHE_BYTES;
+#endif
+ ea &= ~(size - 1);
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ for (i = 0; i < size; i += sizeof(long)) {
+ err = __put_user(0, (unsigned long __user *) (ea + i));
+ if (err) {
+ regs->dar = ea;
+ return err;
+ }
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(emulate_dcbz);
+
#define __put_user_asmx(x, addr, err, op, cr) \
__asm__ __volatile__( \
"1: " op " %2,0,%3\n" \
@@ -526,24 +943,27 @@ static nokprobe_inline int do_vsx_store(int rn, int (*func)(int, unsigned long),
: "=r" (err) \
: "r" (addr), "i" (-EFAULT), "0" (err))
-static nokprobe_inline void set_cr0(struct pt_regs *regs, int rd)
+static nokprobe_inline void set_cr0(const struct pt_regs *regs,
+ struct instruction_op *op, int rd)
{
long val = regs->gpr[rd];
- regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+ op->type |= SETCC;
+ op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
#ifdef __powerpc64__
if (!(regs->msr & MSR_64BIT))
val = (int) val;
#endif
if (val < 0)
- regs->ccr |= 0x80000000;
+ op->ccval |= 0x80000000;
else if (val > 0)
- regs->ccr |= 0x40000000;
+ op->ccval |= 0x40000000;
else
- regs->ccr |= 0x20000000;
+ op->ccval |= 0x20000000;
}
-static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd,
+static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
+ struct instruction_op *op, int rd,
unsigned long val1, unsigned long val2,
unsigned long carry_in)
{
@@ -551,24 +971,29 @@ static nokprobe_inline void add_with_carry(struct pt_regs *regs, int rd,
if (carry_in)
++val;
- regs->gpr[rd] = val;
+ op->type = COMPUTE + SETREG + SETXER;
+ op->reg = rd;
+ op->val = val;
#ifdef __powerpc64__
if (!(regs->msr & MSR_64BIT)) {
val = (unsigned int) val;
val1 = (unsigned int) val1;
}
#endif
+ op->xerval = regs->xer;
if (val < val1 || (carry_in && val == val1))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
}
-static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2,
- int crfld)
+static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
+ struct instruction_op *op,
+ long v1, long v2, int crfld)
{
unsigned int crval, shift;
+ op->type = COMPUTE + SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -577,14 +1002,17 @@ static nokprobe_inline void do_cmp_signed(struct pt_regs *regs, long v1, long v2
else
crval |= 2;
shift = (7 - crfld) * 4;
- regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
}
-static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
- unsigned long v2, int crfld)
+static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1,
+ unsigned long v2, int crfld)
{
unsigned int crval, shift;
+ op->type = COMPUTE + SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -593,7 +1021,90 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long
else
crval |= 2;
shift = (7 - crfld) * 4;
- regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmpb(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned long long out_val, mask;
+ int i;
+
+ out_val = 0;
+ for (i = 0; i < 8; i++) {
+ mask = 0xffUL << (i * 8);
+ if ((v1 & mask) == (v2 & mask))
+ out_val |= mask;
+ }
+ op->val = out_val;
+}
+
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, int size)
+{
+ unsigned long long out = v1;
+
+ out -= (out >> 1) & 0x5555555555555555;
+ out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2));
+ out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+
+ if (size == 8) { /* popcntb */
+ op->val = out;
+ return;
+ }
+ out += out >> 8;
+ out += out >> 16;
+ if (size == 32) { /* popcntw */
+ op->val = out & 0x0000003f0000003f;
+ return;
+ }
+
+ out = (out + (out >> 32)) & 0x7f;
+ op->val = out; /* popcntd */
+}
+
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned char perm, idx;
+ unsigned int i;
+
+ perm = 0;
+ for (i = 0; i < 8; i++) {
+ idx = (v1 >> (i * 8)) & 0xff;
+ if (idx < 64)
+ if (v2 & PPC_BIT(idx))
+ perm |= 1 << i;
+ }
+ op->val = perm;
+}
+#endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v, int size)
+{
+ unsigned long long res = v ^ (v >> 8);
+
+ res ^= res >> 16;
+ if (size == 32) { /* prtyw */
+ op->val = res & 0x0000000100000001;
+ return;
+ }
+
+ res ^= res >> 32;
+ op->val = res & 1; /*prtyd */
}
static nokprobe_inline int trap_compare(long v1, long v2)
@@ -629,14 +1140,18 @@ static nokprobe_inline int trap_compare(long v1, long v2)
#define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
/*
- * Decode an instruction, and execute it if that can be done just by
- * modifying *regs (i.e. integer arithmetic and logical instructions,
- * branches, and barrier instructions).
- * Returns 1 if the instruction has been executed, or 0 if not.
- * Sets *op to indicate what the instruction does.
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ * Integer arithmetic and logical instructions, branches, and barrier
+ * instructions can be emulated just using the information in *op.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
*/
-int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
- unsigned int instr)
+int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+ unsigned int instr)
{
unsigned int opcode, ra, rb, rd, spr, u;
unsigned long int imm;
@@ -653,12 +1168,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
imm = (signed short)(instr & 0xfffc);
if ((instr & 2) == 0)
imm += regs->nip;
- regs->nip += 4;
- regs->nip = truncate_if_32bit(regs->msr, regs->nip);
+ op->val = truncate_if_32bit(regs->msr, imm);
if (instr & 1)
- regs->link = regs->nip;
- if (branch_taken(instr, regs))
- regs->nip = truncate_if_32bit(regs->msr, imm);
+ op->type |= SETLK;
+ if (branch_taken(instr, regs, op))
+ op->type |= BRTAKEN;
return 1;
#ifdef CONFIG_PPC64
case 17: /* sc */
@@ -669,38 +1183,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
return 0;
#endif
case 18: /* b */
- op->type = BRANCH;
+ op->type = BRANCH | BRTAKEN;
imm = instr & 0x03fffffc;
if (imm & 0x02000000)
imm -= 0x04000000;
if ((instr & 2) == 0)
imm += regs->nip;
+ op->val = truncate_if_32bit(regs->msr, imm);
if (instr & 1)
- regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
- imm = truncate_if_32bit(regs->msr, imm);
- regs->nip = imm;
+ op->type |= SETLK;
return 1;
case 19:
switch ((instr >> 1) & 0x3ff) {
case 0: /* mcrf */
+ op->type = COMPUTE + SETCC;
rd = 7 - ((instr >> 23) & 0x7);
ra = 7 - ((instr >> 18) & 0x7);
rd *= 4;
ra *= 4;
val = (regs->ccr >> ra) & 0xf;
- regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
- goto instr_done;
+ op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+ return 1;
case 16: /* bclr */
case 528: /* bcctr */
op->type = BRANCH;
imm = (instr & 0x400)? regs->ctr: regs->link;
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
- imm = truncate_if_32bit(regs->msr, imm);
+ op->val = truncate_if_32bit(regs->msr, imm);
if (instr & 1)
- regs->link = regs->nip;
- if (branch_taken(instr, regs))
- regs->nip = imm;
+ op->type |= SETLK;
+ if (branch_taken(instr, regs, op))
+ op->type |= BRTAKEN;
return 1;
case 18: /* rfid, scary */
@@ -710,9 +1223,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
return 0;
case 150: /* isync */
- op->type = BARRIER;
- isync();
- goto instr_done;
+ op->type = BARRIER | BARRIER_ISYNC;
+ return 1;
case 33: /* crnor */
case 129: /* crandc */
@@ -722,45 +1234,44 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 289: /* creqv */
case 417: /* crorc */
case 449: /* cror */
+ op->type = COMPUTE + SETCC;
ra = (instr >> 16) & 0x1f;
rb = (instr >> 11) & 0x1f;
rd = (instr >> 21) & 0x1f;
ra = (regs->ccr >> (31 - ra)) & 1;
rb = (regs->ccr >> (31 - rb)) & 1;
val = (instr >> (6 + ra * 2 + rb)) & 1;
- regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) |
+ op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
(val << (31 - rd));
- goto instr_done;
+ return 1;
}
break;
case 31:
switch ((instr >> 1) & 0x3ff) {
case 598: /* sync */
- op->type = BARRIER;
+ op->type = BARRIER + BARRIER_SYNC;
#ifdef __powerpc64__
switch ((instr >> 21) & 3) {
case 1: /* lwsync */
- asm volatile("lwsync" : : : "memory");
- goto instr_done;
+ op->type = BARRIER + BARRIER_LWSYNC;
+ break;
case 2: /* ptesync */
- asm volatile("ptesync" : : : "memory");
- goto instr_done;
+ op->type = BARRIER + BARRIER_PTESYNC;
+ break;
}
#endif
- mb();
- goto instr_done;
+ return 1;
case 854: /* eieio */
- op->type = BARRIER;
- eieio();
- goto instr_done;
+ op->type = BARRIER + BARRIER_EIEIO;
+ return 1;
}
break;
}
/* Following cases refer to regs->gpr[], so we need all regs */
if (!FULL_REGS(regs))
- return 0;
+ return -1;
rd = (instr >> 21) & 0x1f;
ra = (instr >> 16) & 0x1f;
@@ -771,21 +1282,21 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 2: /* tdi */
if (rd & trap_compare(regs->gpr[ra], (short) instr))
goto trap;
- goto instr_done;
+ return 1;
#endif
case 3: /* twi */
if (rd & trap_compare((int)regs->gpr[ra], (short) instr))
goto trap;
- goto instr_done;
+ return 1;
case 7: /* mulli */
- regs->gpr[rd] = regs->gpr[ra] * (short) instr;
- goto instr_done;
+ op->val = regs->gpr[ra] * (short) instr;
+ goto compute_done;
case 8: /* subfic */
imm = (short) instr;
- add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
- goto instr_done;
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
+ return 1;
case 10: /* cmpli */
imm = (unsigned short) instr;
@@ -794,8 +1305,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
if ((rd & 1) == 0)
val = (unsigned int) val;
#endif
- do_cmp_unsigned(regs, val, imm, rd >> 2);
- goto instr_done;
+ do_cmp_unsigned(regs, op, val, imm, rd >> 2);
+ return 1;
case 11: /* cmpi */
imm = (short) instr;
@@ -804,47 +1315,58 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
if ((rd & 1) == 0)
val = (int) val;
#endif
- do_cmp_signed(regs, val, imm, rd >> 2);
- goto instr_done;
+ do_cmp_signed(regs, op, val, imm, rd >> 2);
+ return 1;
case 12: /* addic */
imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
- goto instr_done;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ return 1;
case 13: /* addic. */
imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
- set_cr0(regs, rd);
- goto instr_done;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ set_cr0(regs, op, rd);
+ return 1;
case 14: /* addi */
imm = (short) instr;
if (ra)
imm += regs->gpr[ra];
- regs->gpr[rd] = imm;
- goto instr_done;
+ op->val = imm;
+ goto compute_done;
case 15: /* addis */
imm = ((short) instr) << 16;
if (ra)
imm += regs->gpr[ra];
- regs->gpr[rd] = imm;
- goto instr_done;
+ op->val = imm;
+ goto compute_done;
+
+ case 19:
+ if (((instr >> 1) & 0x1f) == 2) {
+ /* addpcis */
+ imm = (short) (instr & 0xffc1); /* d0 + d2 fields */
+ imm |= (instr >> 15) & 0x3e; /* d1 field */
+ op->val = regs->nip + (imm << 16) + 4;
+ goto compute_done;
+ }
+ op->type = UNKNOWN;
+ return 0;
case 20: /* rlwimi */
mb = (instr >> 6) & 0x1f;
me = (instr >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
imm = MASK32(mb, me);
- regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+ op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
goto logical_done;
case 21: /* rlwinm */
mb = (instr >> 6) & 0x1f;
me = (instr >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
- regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 23: /* rlwnm */
@@ -852,40 +1374,37 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
me = (instr >> 1) & 0x1f;
rb = regs->gpr[rb] & 0x1f;
val = DATA32(regs->gpr[rd]);
- regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 24: /* ori */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] | imm;
- goto instr_done;
+ op->val = regs->gpr[rd] | (unsigned short) instr;
+ goto logical_done_nocc;
case 25: /* oris */
imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] | (imm << 16);
- goto instr_done;
+ op->val = regs->gpr[rd] | (imm << 16);
+ goto logical_done_nocc;
case 26: /* xori */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] ^ imm;
- goto instr_done;
+ op->val = regs->gpr[rd] ^ (unsigned short) instr;
+ goto logical_done_nocc;
case 27: /* xoris */
imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16);
- goto instr_done;
+ op->val = regs->gpr[rd] ^ (imm << 16);
+ goto logical_done_nocc;
case 28: /* andi. */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] & imm;
- set_cr0(regs, ra);
- goto instr_done;
+ op->val = regs->gpr[rd] & (unsigned short) instr;
+ set_cr0(regs, op, ra);
+ goto logical_done_nocc;
case 29: /* andis. */
imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] & (imm << 16);
- set_cr0(regs, ra);
- goto instr_done;
+ op->val = regs->gpr[rd] & (imm << 16);
+ set_cr0(regs, op, ra);
+ goto logical_done_nocc;
#ifdef __powerpc64__
case 30: /* rld* */
@@ -896,48 +1415,60 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
val = ROTATE(val, sh);
switch ((instr >> 2) & 3) {
case 0: /* rldicl */
- regs->gpr[ra] = val & MASK64_L(mb);
- goto logical_done;
+ val &= MASK64_L(mb);
+ break;
case 1: /* rldicr */
- regs->gpr[ra] = val & MASK64_R(mb);
- goto logical_done;
+ val &= MASK64_R(mb);
+ break;
case 2: /* rldic */
- regs->gpr[ra] = val & MASK64(mb, 63 - sh);
- goto logical_done;
+ val &= MASK64(mb, 63 - sh);
+ break;
case 3: /* rldimi */
imm = MASK64(mb, 63 - sh);
- regs->gpr[ra] = (regs->gpr[ra] & ~imm) |
+ val = (regs->gpr[ra] & ~imm) |
(val & imm);
- goto logical_done;
}
+ op->val = val;
+ goto logical_done;
} else {
sh = regs->gpr[rb] & 0x3f;
val = ROTATE(val, sh);
switch ((instr >> 1) & 7) {
case 0: /* rldcl */
- regs->gpr[ra] = val & MASK64_L(mb);
+ op->val = val & MASK64_L(mb);
goto logical_done;
case 1: /* rldcr */
- regs->gpr[ra] = val & MASK64_R(mb);
+ op->val = val & MASK64_R(mb);
goto logical_done;
}
}
#endif
- break; /* illegal instruction */
+ op->type = UNKNOWN; /* illegal instruction */
+ return 0;
case 31:
+ /* isel occupies 32 minor opcodes */
+ if (((instr >> 1) & 0x1f) == 15) {
+ mb = (instr >> 6) & 0x1f; /* bc field */
+ val = (regs->ccr >> (31 - mb)) & 1;
+ val2 = (ra) ? regs->gpr[ra] : 0;
+
+ op->val = (val) ? val2 : regs->gpr[rb];
+ goto compute_done;
+ }
+
switch ((instr >> 1) & 0x3ff) {
case 4: /* tw */
if (rd == 0x1f ||
(rd & trap_compare((int)regs->gpr[ra],
(int)regs->gpr[rb])))
goto trap;
- goto instr_done;
+ return 1;
#ifdef __powerpc64__
case 68: /* td */
if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
goto trap;
- goto instr_done;
+ return 1;
#endif
case 83: /* mfmsr */
if (regs->msr & MSR_PR)
@@ -966,74 +1497,50 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
#endif
case 19: /* mfcr */
+ imm = 0xffffffffUL;
if ((instr >> 20) & 1) {
imm = 0xf0000000UL;
for (sh = 0; sh < 8; ++sh) {
- if (instr & (0x80000 >> sh)) {
- regs->gpr[rd] = regs->ccr & imm;
+ if (instr & (0x80000 >> sh))
break;
- }
imm >>= 4;
}
-
- goto instr_done;
}
-
- regs->gpr[rd] = regs->ccr;
- regs->gpr[rd] &= 0xffffffffUL;
- goto instr_done;
+ op->val = regs->ccr & imm;
+ goto compute_done;
case 144: /* mtcrf */
+ op->type = COMPUTE + SETCC;
imm = 0xf0000000UL;
val = regs->gpr[rd];
+ op->val = regs->ccr;
for (sh = 0; sh < 8; ++sh) {
if (instr & (0x80000 >> sh))
- regs->ccr = (regs->ccr & ~imm) |
+ op->val = (op->val & ~imm) |
(val & imm);
imm >>= 4;
}
- goto instr_done;
+ return 1;
case 339: /* mfspr */
spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
- switch (spr) {
- case SPRN_XER: /* mfxer */
- regs->gpr[rd] = regs->xer;
- regs->gpr[rd] &= 0xffffffffUL;
- goto instr_done;
- case SPRN_LR: /* mflr */
- regs->gpr[rd] = regs->link;
- goto instr_done;
- case SPRN_CTR: /* mfctr */
- regs->gpr[rd] = regs->ctr;
- goto instr_done;
- default:
- op->type = MFSPR;
- op->reg = rd;
- op->spr = spr;
- return 0;
- }
- break;
+ op->type = MFSPR;
+ op->reg = rd;
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
case 467: /* mtspr */
spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
- switch (spr) {
- case SPRN_XER: /* mtxer */
- regs->xer = (regs->gpr[rd] & 0xffffffffUL);
- goto instr_done;
- case SPRN_LR: /* mtlr */
- regs->link = regs->gpr[rd];
- goto instr_done;
- case SPRN_CTR: /* mtctr */
- regs->ctr = regs->gpr[rd];
- goto instr_done;
- default:
- op->type = MTSPR;
- op->val = regs->gpr[rd];
- op->spr = spr;
- return 0;
- }
- break;
+ op->type = MTSPR;
+ op->val = regs->gpr[rd];
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
/*
* Compare instructions
@@ -1048,8 +1555,8 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
val2 = (int) val2;
}
#endif
- do_cmp_signed(regs, val, val2, rd >> 2);
- goto instr_done;
+ do_cmp_signed(regs, op, val, val2, rd >> 2);
+ return 1;
case 32: /* cmpl */
val = regs->gpr[ra];
@@ -1061,109 +1568,113 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
val2 = (unsigned int) val2;
}
#endif
- do_cmp_unsigned(regs, val, val2, rd >> 2);
- goto instr_done;
+ do_cmp_unsigned(regs, op, val, val2, rd >> 2);
+ return 1;
+
+ case 508: /* cmpb */
+ do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
/*
* Arithmetic instructions
*/
case 8: /* subfc */
- add_with_carry(regs, rd, ~regs->gpr[ra],
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
regs->gpr[rb], 1);
goto arith_done;
#ifdef __powerpc64__
case 9: /* mulhdu */
- asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhdu %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
#endif
case 10: /* addc */
- add_with_carry(regs, rd, regs->gpr[ra],
+ add_with_carry(regs, op, rd, regs->gpr[ra],
regs->gpr[rb], 0);
goto arith_done;
case 11: /* mulhwu */
- asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhwu %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
case 40: /* subf */
- regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+ op->val = regs->gpr[rb] - regs->gpr[ra];
goto arith_done;
#ifdef __powerpc64__
case 73: /* mulhd */
- asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhd %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
#endif
case 75: /* mulhw */
- asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhw %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
case 104: /* neg */
- regs->gpr[rd] = -regs->gpr[ra];
+ op->val = -regs->gpr[ra];
goto arith_done;
case 136: /* subfe */
- add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
- regs->xer & XER_CA);
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
goto arith_done;
case 138: /* adde */
- add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
- regs->xer & XER_CA);
+ add_with_carry(regs, op, rd, regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
goto arith_done;
case 200: /* subfze */
- add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L,
regs->xer & XER_CA);
goto arith_done;
case 202: /* addze */
- add_with_carry(regs, rd, regs->gpr[ra], 0L,
+ add_with_carry(regs, op, rd, regs->gpr[ra], 0L,
regs->xer & XER_CA);
goto arith_done;
case 232: /* subfme */
- add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L,
regs->xer & XER_CA);
goto arith_done;
#ifdef __powerpc64__
case 233: /* mulld */
- regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+ op->val = regs->gpr[ra] * regs->gpr[rb];
goto arith_done;
#endif
case 234: /* addme */
- add_with_carry(regs, rd, regs->gpr[ra], -1L,
+ add_with_carry(regs, op, rd, regs->gpr[ra], -1L,
regs->xer & XER_CA);
goto arith_done;
case 235: /* mullw */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
+ op->val = (unsigned int) regs->gpr[ra] *
(unsigned int) regs->gpr[rb];
goto arith_done;
case 266: /* add */
- regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+ op->val = regs->gpr[ra] + regs->gpr[rb];
goto arith_done;
#ifdef __powerpc64__
case 457: /* divdu */
- regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+ op->val = regs->gpr[ra] / regs->gpr[rb];
goto arith_done;
#endif
case 459: /* divwu */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+ op->val = (unsigned int) regs->gpr[ra] /
(unsigned int) regs->gpr[rb];
goto arith_done;
#ifdef __powerpc64__
case 489: /* divd */
- regs->gpr[rd] = (long int) regs->gpr[ra] /
+ op->val = (long int) regs->gpr[ra] /
(long int) regs->gpr[rb];
goto arith_done;
#endif
case 491: /* divw */
- regs->gpr[rd] = (int) regs->gpr[ra] /
+ op->val = (int) regs->gpr[ra] /
(int) regs->gpr[rb];
goto arith_done;
@@ -1172,57 +1683,79 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
* Logical instructions
*/
case 26: /* cntlzw */
- asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
- "r" (regs->gpr[rd]));
+ op->val = __builtin_clz((unsigned int) regs->gpr[rd]);
goto logical_done;
#ifdef __powerpc64__
case 58: /* cntlzd */
- asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) :
- "r" (regs->gpr[rd]));
+ op->val = __builtin_clzl(regs->gpr[rd]);
goto logical_done;
#endif
case 28: /* and */
- regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb];
+ op->val = regs->gpr[rd] & regs->gpr[rb];
goto logical_done;
case 60: /* andc */
- regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
+ op->val = regs->gpr[rd] & ~regs->gpr[rb];
goto logical_done;
+ case 122: /* popcntb */
+ do_popcnt(regs, op, regs->gpr[rd], 8);
+ goto logical_done_nocc;
+
case 124: /* nor */
- regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
+ case 154: /* prtyw */
+ do_prty(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
+ case 186: /* prtyd */
+ do_prty(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#ifdef CONFIG_PPC64
+ case 252: /* bpermd */
+ do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
+#endif
case 284: /* xor */
- regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]);
goto logical_done;
case 316: /* xor */
- regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
+ op->val = regs->gpr[rd] ^ regs->gpr[rb];
goto logical_done;
+ case 378: /* popcntw */
+ do_popcnt(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
case 412: /* orc */
- regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
+ op->val = regs->gpr[rd] | ~regs->gpr[rb];
goto logical_done;
case 444: /* or */
- regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb];
+ op->val = regs->gpr[rd] | regs->gpr[rb];
goto logical_done;
case 476: /* nand */
- regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
-
+#ifdef CONFIG_PPC64
+ case 506: /* popcntd */
+ do_popcnt(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#endif
case 922: /* extsh */
- regs->gpr[ra] = (signed short) regs->gpr[rd];
+ op->val = (signed short) regs->gpr[rd];
goto logical_done;
case 954: /* extsb */
- regs->gpr[ra] = (signed char) regs->gpr[rd];
+ op->val = (signed char) regs->gpr[rd];
goto logical_done;
#ifdef __powerpc64__
case 986: /* extsw */
- regs->gpr[ra] = (signed int) regs->gpr[rd];
+ op->val = (signed int) regs->gpr[rd];
goto logical_done;
#endif
@@ -1232,75 +1765,83 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 24: /* slw */
sh = regs->gpr[rb] & 0x3f;
if (sh < 32)
- regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL;
+ op->val = (regs->gpr[rd] << sh) & 0xffffffffUL;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 536: /* srw */
sh = regs->gpr[rb] & 0x3f;
if (sh < 32)
- regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+ op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 792: /* sraw */
+ op->type = COMPUTE + SETREG + SETXER;
sh = regs->gpr[rb] & 0x3f;
ival = (signed int) regs->gpr[rd];
- regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
+ op->val = ival >> (sh < 32 ? sh : 31);
+ op->xerval = regs->xer;
if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
goto logical_done;
case 824: /* srawi */
+ op->type = COMPUTE + SETREG + SETXER;
sh = rb;
ival = (signed int) regs->gpr[rd];
- regs->gpr[ra] = ival >> sh;
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
goto logical_done;
#ifdef __powerpc64__
case 27: /* sld */
sh = regs->gpr[rb] & 0x7f;
if (sh < 64)
- regs->gpr[ra] = regs->gpr[rd] << sh;
+ op->val = regs->gpr[rd] << sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 539: /* srd */
sh = regs->gpr[rb] & 0x7f;
if (sh < 64)
- regs->gpr[ra] = regs->gpr[rd] >> sh;
+ op->val = regs->gpr[rd] >> sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 794: /* srad */
+ op->type = COMPUTE + SETREG + SETXER;
sh = regs->gpr[rb] & 0x7f;
ival = (signed long int) regs->gpr[rd];
- regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
+ op->val = ival >> (sh < 64 ? sh : 63);
+ op->xerval = regs->xer;
if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
goto logical_done;
case 826: /* sradi with sh_5 = 0 */
case 827: /* sradi with sh_5 = 1 */
+ op->type = COMPUTE + SETREG + SETXER;
sh = rb | ((instr & 2) << 4);
ival = (signed long int) regs->gpr[rd];
- regs->gpr[ra] = ival >> sh;
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
goto logical_done;
#endif /* __powerpc64__ */
@@ -1333,18 +1874,24 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
op->type = MKOP(CACHEOP, ICBI, 0);
op->ea = xform_ea(instr, regs);
return 0;
+
+ case 1014: /* dcbz */
+ op->type = MKOP(CACHEOP, DCBZ, 0);
+ op->ea = xform_ea(instr, regs);
+ return 0;
}
break;
}
- /*
- * Loads and stores.
- */
+/*
+ * Loads and stores.
+ */
op->type = UNKNOWN;
op->update_reg = ra;
op->reg = rd;
op->val = regs->gpr[rd];
u = (instr >> 20) & UPDATE;
+ op->vsx_flags = 0;
switch (opcode) {
case 31:
@@ -1368,9 +1915,30 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
op->type = MKOP(STCX, 0, 8);
break;
- case 21: /* ldx */
- case 53: /* ldux */
- op->type = MKOP(LOAD, u, 8);
+ case 52: /* lbarx */
+ op->type = MKOP(LARX, 0, 1);
+ break;
+
+ case 694: /* stbcx. */
+ op->type = MKOP(STCX, 0, 1);
+ break;
+
+ case 116: /* lharx */
+ op->type = MKOP(LARX, 0, 2);
+ break;
+
+ case 726: /* sthcx. */
+ op->type = MKOP(STCX, 0, 2);
+ break;
+
+ case 276: /* lqarx */
+ if (!((rd & 1) || rd == ra || rd == rb))
+ op->type = MKOP(LARX, 0, 16);
+ break;
+
+ case 182: /* stqcx. */
+ if (!(rd & 1))
+ op->type = MKOP(STCX, 0, 16);
break;
#endif
@@ -1385,22 +1953,58 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
break;
#ifdef CONFIG_ALTIVEC
+ /*
+ * Note: for the load/store vector element instructions,
+ * bits of the EA say which field of the VMX register to use.
+ */
+ case 7: /* lvebx */
+ op->type = MKOP(LOAD_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 39: /* lvehx */
+ op->type = MKOP(LOAD_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 71: /* lvewx */
+ op->type = MKOP(LOAD_VMX, 0, 4);
+ op->element_size = 4;
+ break;
+
case 103: /* lvx */
case 359: /* lvxl */
- if (!(regs->msr & MSR_VEC))
- goto vecunavail;
op->type = MKOP(LOAD_VMX, 0, 16);
+ op->element_size = 16;
+ break;
+
+ case 135: /* stvebx */
+ op->type = MKOP(STORE_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 167: /* stvehx */
+ op->type = MKOP(STORE_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 199: /* stvewx */
+ op->type = MKOP(STORE_VMX, 0, 4);
+ op->element_size = 4;
break;
case 231: /* stvx */
case 487: /* stvxl */
- if (!(regs->msr & MSR_VEC))
- goto vecunavail;
op->type = MKOP(STORE_VMX, 0, 16);
break;
#endif /* CONFIG_ALTIVEC */
#ifdef __powerpc64__
+ case 21: /* ldx */
+ case 53: /* ldux */
+ op->type = MKOP(LOAD, u, 8);
+ break;
+
case 149: /* stdx */
case 181: /* stdux */
op->type = MKOP(STORE, u, 8);
@@ -1457,41 +2061,52 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
if (rb == 0)
rb = 32; /* # bytes to load */
op->type = MKOP(LOAD_MULTI, 0, rb);
- op->ea = 0;
- if (ra)
- op->ea = truncate_if_32bit(regs->msr,
- regs->gpr[ra]);
+ op->ea = ra ? regs->gpr[ra] : 0;
break;
#ifdef CONFIG_PPC_FPU
case 535: /* lfsx */
case 567: /* lfsux */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
- op->type = MKOP(LOAD_FP, u, 4);
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
break;
case 599: /* lfdx */
case 631: /* lfdux */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
op->type = MKOP(LOAD_FP, u, 8);
break;
case 663: /* stfsx */
case 695: /* stfsux */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
- op->type = MKOP(STORE_FP, u, 4);
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
break;
case 727: /* stfdx */
case 759: /* stfdux */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
op->type = MKOP(STORE_FP, u, 8);
break;
-#endif
+
+#ifdef __powerpc64__
+ case 791: /* lfdpx */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+
+ case 855: /* lfiwax */
+ op->type = MKOP(LOAD_FP, SIGNEXT, 4);
+ break;
+
+ case 887: /* lfiwzx */
+ op->type = MKOP(LOAD_FP, 0, 4);
+ break;
+
+ case 919: /* stfdpx */
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 983: /* stfiwx */
+ op->type = MKOP(STORE_FP, 0, 4);
+ break;
+#endif /* __powerpc64 */
+#endif /* CONFIG_PPC_FPU */
#ifdef __powerpc64__
case 660: /* stdbrx */
@@ -1509,14 +2124,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
op->val = byterev_4(regs->gpr[rd]);
break;
- case 725:
+ case 725: /* stswi */
if (rb == 0)
rb = 32; /* # bytes to store */
op->type = MKOP(STORE_MULTI, 0, rb);
- op->ea = 0;
- if (ra)
- op->ea = truncate_if_32bit(regs->msr,
- regs->gpr[ra]);
+ op->ea = ra ? regs->gpr[ra] : 0;
break;
case 790: /* lhbrx */
@@ -1529,20 +2141,184 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
break;
#ifdef CONFIG_VSX
+ case 12: /* lxsiwzx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 76: /* lxsiwax */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
+ op->element_size = 8;
+ break;
+
+ case 140: /* stxsiwx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 268: /* lxvx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 269: /* lxvl */
+ case 301: { /* lxvll */
+ int nb;
+ op->reg = rd | ((instr & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(LOAD_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 332: /* lxvdsx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_SPLAT;
+ break;
+
+ case 364: /* lxvwsx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 4;
+ op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
+ break;
+
+ case 396: /* stxvx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 397: /* stxvl */
+ case 429: { /* stxvll */
+ int nb;
+ op->reg = rd | ((instr & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(STORE_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((instr & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 524: /* lxsspx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 588: /* lxsdx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 652: /* stxsspx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 716: /* stxsdx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 780: /* lxvw4x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 781: /* lxsibzx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 812: /* lxvh8x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 813: /* lxsihzx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
case 844: /* lxvd2x */
- case 876: /* lxvd2ux */
- if (!(regs->msr & MSR_VSX))
- goto vsxunavail;
op->reg = rd | ((instr & 1) << 5);
- op->type = MKOP(LOAD_VSX, u, 16);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 876: /* lxvb16x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 908: /* stxvw4x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 909: /* stxsibx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 940: /* stxvh8x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 941: /* stxsihx */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
break;
case 972: /* stxvd2x */
- case 1004: /* stxvd2ux */
- if (!(regs->msr & MSR_VSX))
- goto vsxunavail;
op->reg = rd | ((instr & 1) << 5);
- op->type = MKOP(STORE_VSX, u, 16);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 1004: /* stxvb16x */
+ op->reg = rd | ((instr & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
break;
#endif /* CONFIG_VSX */
@@ -1606,38 +2382,63 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
#ifdef CONFIG_PPC_FPU
case 48: /* lfs */
case 49: /* lfsu */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
- op->type = MKOP(LOAD_FP, u, 4);
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
op->ea = dform_ea(instr, regs);
break;
case 50: /* lfd */
case 51: /* lfdu */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
op->type = MKOP(LOAD_FP, u, 8);
op->ea = dform_ea(instr, regs);
break;
case 52: /* stfs */
case 53: /* stfsu */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
- op->type = MKOP(STORE_FP, u, 4);
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
op->ea = dform_ea(instr, regs);
break;
case 54: /* stfd */
case 55: /* stfdu */
- if (!(regs->msr & MSR_FP))
- goto fpunavail;
op->type = MKOP(STORE_FP, u, 8);
op->ea = dform_ea(instr, regs);
break;
#endif
#ifdef __powerpc64__
+ case 56: /* lq */
+ if (!((rd & 1) || (rd == ra)))
+ op->type = MKOP(LOAD, 0, 16);
+ op->ea = dqform_ea(instr, regs);
+ break;
+#endif
+
+#ifdef CONFIG_VSX
+ case 57: /* lfdp, lxsd, lxssp */
+ op->ea = dsform_ea(instr, regs);
+ switch (instr & 3) {
+ case 0: /* lfdp */
+ if (rd & 1)
+ break; /* reg must be even */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+ case 2: /* lxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 3: /* lxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
case 58: /* ld[u], lwa */
op->ea = dsform_ea(instr, regs);
switch (instr & 3) {
@@ -1652,7 +2453,57 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
break;
}
break;
+#endif
+#ifdef CONFIG_VSX
+ case 61: /* stfdp, lxv, stxsd, stxssp, stxv */
+ switch (instr & 7) {
+ case 0: /* stfdp with LSB of DS field = 0 */
+ case 4: /* stfdp with LSB of DS field = 1 */
+ op->ea = dsform_ea(instr, regs);
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 1: /* lxv */
+ op->ea = dqform_ea(instr, regs);
+ if (instr & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 2: /* stxsd with LSB of DS field = 0 */
+ case 6: /* stxsd with LSB of DS field = 1 */
+ op->ea = dsform_ea(instr, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 3: /* stxssp with LSB of DS field = 0 */
+ case 7: /* stxssp with LSB of DS field = 1 */
+ op->ea = dsform_ea(instr, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+
+ case 5: /* stxv */
+ op->ea = dqform_ea(instr, regs);
+ if (instr & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
case 62: /* std[u] */
op->ea = dsform_ea(instr, regs);
switch (instr & 3) {
@@ -1662,6 +2513,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 1: /* stdu */
op->type = MKOP(STORE, UPDATE, 8);
break;
+ case 2: /* stq */
+ if (!(rd & 1))
+ op->type = MKOP(STORE, 0, 16);
+ break;
}
break;
#endif /* __powerpc64__ */
@@ -1671,15 +2526,18 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
logical_done:
if (instr & 1)
- set_cr0(regs, ra);
- goto instr_done;
+ set_cr0(regs, op, ra);
+ logical_done_nocc:
+ op->reg = ra;
+ op->type |= SETREG;
+ return 1;
arith_done:
if (instr & 1)
- set_cr0(regs, rd);
-
- instr_done:
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+ set_cr0(regs, op, rd);
+ compute_done:
+ op->reg = rd;
+ op->type |= SETREG;
return 1;
priv:
@@ -1691,24 +2549,6 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
op->type = INTERRUPT | 0x700;
op->val = SRR1_PROGTRAP;
return 0;
-
-#ifdef CONFIG_PPC_FPU
- fpunavail:
- op->type = INTERRUPT | 0x800;
- return 0;
-#endif
-
-#ifdef CONFIG_ALTIVEC
- vecunavail:
- op->type = INTERRUPT | 0xf20;
- return 0;
-#endif
-
-#ifdef CONFIG_VSX
- vsxunavail:
- op->type = INTERRUPT | 0xf40;
- return 0;
-#endif
}
EXPORT_SYMBOL_GPL(analyse_instr);
NOKPROBE_SYMBOL(analyse_instr);
@@ -1771,190 +2611,412 @@ static nokprobe_inline void do_byterev(unsigned long *valp, int size)
}
/*
- * Emulate instructions that cause a transfer of control,
- * loads and stores, and a few other instructions.
- * Returns 1 if the step was emulated, 0 if not,
- * or -1 if the instruction is one that should not be stepped,
- * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
*/
-int emulate_step(struct pt_regs *regs, unsigned int instr)
+void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
{
- struct instruction_op op;
- int r, err, size;
- unsigned long val;
- unsigned int cr;
- int i, rd, nb;
+ unsigned long next_pc;
+
+ next_pc = truncate_if_32bit(regs->msr, regs->nip + 4);
+ switch (op->type & INSTR_TYPE_MASK) {
+ case COMPUTE:
+ if (op->type & SETREG)
+ regs->gpr[op->reg] = op->val;
+ if (op->type & SETCC)
+ regs->ccr = op->ccval;
+ if (op->type & SETXER)
+ regs->xer = op->xerval;
+ break;
- r = analyse_instr(&op, regs, instr);
- if (r != 0)
- return r;
+ case BRANCH:
+ if (op->type & SETLK)
+ regs->link = next_pc;
+ if (op->type & BRTAKEN)
+ next_pc = op->val;
+ if (op->type & DECCTR)
+ --regs->ctr;
+ break;
- err = 0;
- size = GETSIZE(op.type);
- switch (op.type & INSTR_TYPE_MASK) {
- case CACHEOP:
- if (!address_ok(regs, op.ea, 8))
- return 0;
- switch (op.type & CACHEOP_MASK) {
- case DCBST:
- __cacheop_user_asmx(op.ea, err, "dcbst");
+ case BARRIER:
+ switch (op->type & BARRIER_MASK) {
+ case BARRIER_SYNC:
+ mb();
break;
- case DCBF:
- __cacheop_user_asmx(op.ea, err, "dcbf");
+ case BARRIER_ISYNC:
+ isync();
break;
- case DCBTST:
- if (op.reg == 0)
- prefetchw((void *) op.ea);
+ case BARRIER_EIEIO:
+ eieio();
break;
- case DCBT:
- if (op.reg == 0)
- prefetch((void *) op.ea);
+ case BARRIER_LWSYNC:
+ asm volatile("lwsync" : : : "memory");
break;
- case ICBI:
- __cacheop_user_asmx(op.ea, err, "icbi");
+ case BARRIER_PTESYNC:
+ asm volatile("ptesync" : : : "memory");
break;
}
- if (err)
- return 0;
- goto instr_done;
+ break;
+
+ case MFSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->gpr[op->reg] = regs->xer & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->gpr[op->reg] = regs->link;
+ break;
+ case SPRN_CTR:
+ regs->gpr[op->reg] = regs->ctr;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ case MTSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->xer = op->val & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->link = op->val;
+ break;
+ case SPRN_CTR:
+ regs->ctr = op->val;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ }
+ regs->nip = next_pc;
+}
+
+/*
+ * Emulate a previously-analysed load or store instruction.
+ * Return values are:
+ * 0 = instruction emulated successfully
+ * -EFAULT = address out of range or access faulted (regs->dar
+ * contains the faulting address)
+ * -EACCES = misaligned access, instruction requires alignment
+ * -EINVAL = unknown operation in *op
+ */
+int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+{
+ int err, size, type;
+ int i, rd, nb;
+ unsigned int cr;
+ unsigned long val;
+ unsigned long ea;
+ bool cross_endian;
+
+ err = 0;
+ size = GETSIZE(op->type);
+ type = op->type & INSTR_TYPE_MASK;
+ cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+ ea = truncate_if_32bit(regs->msr, op->ea);
+ switch (type) {
case LARX:
- if (op.ea & (size - 1))
- break; /* can't handle misaligned */
- if (!address_ok(regs, op.ea, size))
- return 0;
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
err = 0;
+ val = 0;
switch (size) {
+#ifdef __powerpc64__
+ case 1:
+ __get_user_asmx(val, ea, err, "lbarx");
+ break;
+ case 2:
+ __get_user_asmx(val, ea, err, "lharx");
+ break;
+#endif
case 4:
- __get_user_asmx(val, op.ea, err, "lwarx");
+ __get_user_asmx(val, ea, err, "lwarx");
break;
#ifdef __powerpc64__
case 8:
- __get_user_asmx(val, op.ea, err, "ldarx");
+ __get_user_asmx(val, ea, err, "ldarx");
+ break;
+ case 16:
+ err = do_lqarx(ea, &regs->gpr[op->reg]);
break;
#endif
default:
- return 0;
+ return -EINVAL;
}
- if (!err)
- regs->gpr[op.reg] = val;
- goto ldst_done;
+ if (err) {
+ regs->dar = ea;
+ break;
+ }
+ if (size < 16)
+ regs->gpr[op->reg] = val;
+ break;
case STCX:
- if (op.ea & (size - 1))
- break; /* can't handle misaligned */
- if (!address_ok(regs, op.ea, size))
- return 0;
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
err = 0;
switch (size) {
+#ifdef __powerpc64__
+ case 1:
+ __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ break;
+ case 2:
+ __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ break;
+#endif
case 4:
- __put_user_asmx(op.val, op.ea, err, "stwcx.", cr);
+ __put_user_asmx(op->val, ea, err, "stwcx.", cr);
break;
#ifdef __powerpc64__
case 8:
- __put_user_asmx(op.val, op.ea, err, "stdcx.", cr);
+ __put_user_asmx(op->val, ea, err, "stdcx.", cr);
+ break;
+ case 16:
+ err = do_stqcx(ea, regs->gpr[op->reg],
+ regs->gpr[op->reg + 1], &cr);
break;
#endif
default:
- return 0;
+ return -EINVAL;
}
if (!err)
regs->ccr = (regs->ccr & 0x0fffffff) |
(cr & 0xe0000000) |
((regs->xer >> 3) & 0x10000000);
- goto ldst_done;
+ else
+ regs->dar = ea;
+ break;
case LOAD:
- err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_lq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ err = read_mem(&regs->gpr[op->reg], ea, size, regs);
if (!err) {
- if (op.type & SIGNEXT)
- do_signext(&regs->gpr[op.reg], size);
- if (op.type & BYTEREV)
- do_byterev(&regs->gpr[op.reg], size);
+ if (op->type & SIGNEXT)
+ do_signext(&regs->gpr[op->reg], size);
+ if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV))
+ do_byterev(&regs->gpr[op->reg], size);
}
- goto ldst_done;
+ break;
#ifdef CONFIG_PPC_FPU
case LOAD_FP:
- if (size == 4)
- err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
- else
- err = do_fp_load(op.reg, do_lfd, op.ea, size, regs);
- goto ldst_done;
+ /*
+ * If the instruction is in userspace, we can emulate it even
+ * if the VMX state is not live, because we have the state
+ * stored in the thread_struct. If the instruction is in
+ * the kernel, we must not touch the state in the thread_struct.
+ */
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_load(op, ea, regs, cross_endian);
+ break;
#endif
#ifdef CONFIG_ALTIVEC
case LOAD_VMX:
- err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
- goto ldst_done;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+ break;
#endif
#ifdef CONFIG_VSX
- case LOAD_VSX:
- err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
- goto ldst_done;
+ case LOAD_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_load(op, ea, regs, cross_endian);
+ break;
+ }
#endif
case LOAD_MULTI:
- if (regs->msr & MSR_LE)
- return 0;
- rd = op.reg;
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
for (i = 0; i < size; i += 4) {
+ unsigned int v32 = 0;
+
nb = size - i;
if (nb > 4)
nb = 4;
- err = read_mem(&regs->gpr[rd], op.ea, nb, regs);
+ err = copy_mem_in((u8 *) &v32, ea, nb, regs);
if (err)
- return 0;
- if (nb < 4) /* left-justify last bytes */
- regs->gpr[rd] <<= 32 - 8 * nb;
- op.ea += 4;
- ++rd;
+ break;
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ regs->gpr[rd] = v32;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for lsw[ix] */
+ rd = (rd + 1) & 0x1f;
}
- goto instr_done;
+ break;
case STORE:
- if ((op.type & UPDATE) && size == sizeof(long) &&
- op.reg == 1 && op.update_reg == 1 &&
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_stq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ if ((op->type & UPDATE) && size == sizeof(long) &&
+ op->reg == 1 && op->update_reg == 1 &&
!(regs->msr & MSR_PR) &&
- op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
- err = handle_stack_update(op.ea, regs);
- goto ldst_done;
+ ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+ err = handle_stack_update(ea, regs);
+ break;
}
- err = write_mem(op.val, op.ea, size, regs);
- goto ldst_done;
+ if (unlikely(cross_endian))
+ do_byterev(&op->val, size);
+ err = write_mem(op->val, ea, size, regs);
+ break;
#ifdef CONFIG_PPC_FPU
case STORE_FP:
- if (size == 4)
- err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
- else
- err = do_fp_store(op.reg, do_stfd, op.ea, size, regs);
- goto ldst_done;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_store(op, ea, regs, cross_endian);
+ break;
#endif
#ifdef CONFIG_ALTIVEC
case STORE_VMX:
- err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
- goto ldst_done;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+ break;
#endif
#ifdef CONFIG_VSX
- case STORE_VSX:
- err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
- goto ldst_done;
+ case STORE_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_store(op, ea, regs, cross_endian);
+ break;
+ }
#endif
case STORE_MULTI:
- if (regs->msr & MSR_LE)
- return 0;
- rd = op.reg;
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
for (i = 0; i < size; i += 4) {
- val = regs->gpr[rd];
+ unsigned int v32 = regs->gpr[rd];
+
nb = size - i;
if (nb > 4)
nb = 4;
- else
- val >>= 32 - 8 * nb;
- err = write_mem(val, op.ea, nb, regs);
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ err = copy_mem_out((u8 *) &v32, ea, nb, regs);
if (err)
- return 0;
- op.ea += 4;
- ++rd;
+ break;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for stsw[ix] */
+ rd = (rd + 1) & 0x1f;
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (err)
+ return err;
+
+ if (op->type & UPDATE)
+ regs->gpr[op->update_reg] = op->ea;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(emulate_loadstore);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, unsigned int instr)
+{
+ struct instruction_op op;
+ int r, err, type;
+ unsigned long val;
+ unsigned long ea;
+
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ emulate_update_regs(regs, &op);
+ return 1;
+ }
+
+ err = 0;
+ type = op.type & INSTR_TYPE_MASK;
+
+ if (OP_IS_LOAD_STORE(type)) {
+ err = emulate_loadstore(regs, &op);
+ if (err)
+ return 0;
+ goto instr_done;
+ }
+
+ switch (type) {
+ case CACHEOP:
+ ea = truncate_if_32bit(regs->msr, op.ea);
+ if (!address_ok(regs, ea, 8))
+ return 0;
+ switch (op.type & CACHEOP_MASK) {
+ case DCBST:
+ __cacheop_user_asmx(ea, err, "dcbst");
+ break;
+ case DCBF:
+ __cacheop_user_asmx(ea, err, "dcbf");
+ break;
+ case DCBTST:
+ if (op.reg == 0)
+ prefetchw((void *) ea);
+ break;
+ case DCBT:
+ if (op.reg == 0)
+ prefetch((void *) ea);
+ break;
+ case ICBI:
+ __cacheop_user_asmx(ea, err, "icbi");
+ break;
+ case DCBZ:
+ err = emulate_dcbz(ea, regs);
+ break;
+ }
+ if (err) {
+ regs->dar = ea;
+ return 0;
}
goto instr_done;
@@ -1998,12 +3060,6 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
}
return 0;
- ldst_done:
- if (err)
- return 0;
- if (op.type & UPDATE)
- regs->gpr[op.update_reg] = op.ea;
-
instr_done:
regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
return 1;
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index d5b4d9498c54..56aac4c22025 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -184,7 +184,7 @@ err1; std r0,8(r3)
mtctr r6
mr r8,r3
14:
-err1; dcbz r0,r3
+err1; dcbz 0,r3
add r3,r3,r9
bdnz 14b
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index f4c6472f2fc4..f29212e40f40 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -22,8 +22,11 @@
extern int __map_without_ltlbs;
+static unsigned long block_mapped_ram;
+
/*
- * Return PA for this VA if it is in IMMR area, or 0
+ * Return PA for this VA if it is in an area mapped with LTLBs.
+ * Otherwise, returns 0
*/
phys_addr_t v_block_mapped(unsigned long va)
{
@@ -33,11 +36,13 @@ phys_addr_t v_block_mapped(unsigned long va)
return 0;
if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
return p + va - VIRT_IMMR_BASE;
+ if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
+ return __pa(va);
return 0;
}
/*
- * Return VA for a given PA or 0 if not mapped
+ * Return VA for a given PA mapped with LTLBs or 0 if not mapped
*/
unsigned long p_block_mapped(phys_addr_t pa)
{
@@ -47,6 +52,8 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
if (pa >= p && pa < p + IMMR_SIZE)
return VIRT_IMMR_BASE + pa - p;
+ if (pa < block_mapped_ram)
+ return (unsigned long)__va(pa);
return 0;
}
@@ -58,7 +65,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
void __init MMU_init_hw(void)
{
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
-#ifdef CONFIG_PIN_TLB
+#ifdef CONFIG_PIN_TLB_DATA
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
#ifdef CONFIG_PIN_TLB_IMMR
@@ -80,7 +87,7 @@ void __init MMU_init_hw(void)
#endif
}
-static void mmu_mapin_immr(void)
+static void __init mmu_mapin_immr(void)
{
unsigned long p = PHYS_IMMR_BASE;
unsigned long v = VIRT_IMMR_BASE;
@@ -96,8 +103,11 @@ static void mmu_mapin_immr(void)
extern unsigned int DTLBMiss_jmp;
#endif
extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
+#ifndef CONFIG_PIN_TLB_TEXT
+extern unsigned int ITLBMiss_cmp;
+#endif
-void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
{
unsigned int instr = *addr;
@@ -116,6 +126,9 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
#ifndef CONFIG_PIN_TLB_IMMR
patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ mmu_patch_cmp_limit(&ITLBMiss_cmp, 0);
+#endif
} else {
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
}
@@ -133,11 +146,13 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
if (mapped)
memblock_set_current_limit(mapped);
+ block_mapped_ram = mapped;
+
return mapped;
}
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
+void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
{
/* We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 7414034df1c3..fb844d2f266e 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -8,7 +8,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
- init-common.o
+ init-common.o mmu_context.o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
@@ -22,8 +22,6 @@ ifeq ($(CONFIG_PPC_STD_MMU_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
endif
-obj-$(CONFIG_PPC_ICSWX) += icswx.o
-obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o
obj-$(CONFIG_40x) += 40x_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index b1c144b03fcf..5c4c93dcff19 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -205,7 +205,7 @@ static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
aps_index = calculate_pagesize(st, aps, "actual");
if (aps_index != 2)
seq_printf(st->seq, "LP enc: %lx", lp);
- seq_puts(st->seq, "\n");
+ seq_putc(st->seq, '\n');
}
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c
index 44fe4833910f..c9282d27b203 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -350,7 +350,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->current_flags,
pg_level[st->level].num);
- seq_puts(st->seq, "\n");
+ seq_putc(st->seq, '\n');
}
/*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4c422632047b..4797d08581ce 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -45,43 +45,39 @@
#include <asm/siginfo.h>
#include <asm/debug.h>
-#include "icswx.h"
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline bool notify_page_fault(struct pt_regs *regs)
{
- int ret = 0;
+ bool ret = false;
+#ifdef CONFIG_KPROBES
/* kprobe_running() needs smp_processor_id() */
if (!user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = 1;
+ ret = true;
preempt_enable();
}
+#endif /* CONFIG_KPROBES */
+
+ if (unlikely(debugger_fault_handler(regs)))
+ ret = true;
return ret;
}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
- return 0;
-}
-#endif
/*
* Check whether the instruction at regs->nip is a store using
* an update addressing form which will update r1.
*/
-static int store_updates_sp(struct pt_regs *regs)
+static bool store_updates_sp(struct pt_regs *regs)
{
unsigned int inst;
if (get_user(inst, (unsigned int __user *)regs->nip))
- return 0;
+ return false;
/* check for 1 in the rA field */
if (((inst >> 16) & 0x1f) != 1)
- return 0;
+ return false;
/* check major opcode */
switch (inst >> 26) {
case 37: /* stwu */
@@ -89,7 +85,7 @@ static int store_updates_sp(struct pt_regs *regs)
case 45: /* sthu */
case 53: /* stfsu */
case 55: /* stfdu */
- return 1;
+ return true;
case 62: /* std or stdu */
return (inst & 3) == 1;
case 31:
@@ -101,18 +97,53 @@ static int store_updates_sp(struct pt_regs *regs)
case 439: /* sthux */
case 695: /* stfsux */
case 759: /* stfdux */
- return 1;
+ return true;
}
}
- return 0;
+ return false;
}
/*
* do_page_fault error handling helpers
*/
-#define MM_FAULT_RETURN 0
-#define MM_FAULT_CONTINUE -1
-#define MM_FAULT_ERR(sig) (sig)
+static int
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+{
+ /*
+ * If we are in kernel mode, bail out with a SEGV, this will
+ * be caught by the assembly which will restore the non-volatile
+ * registers before calling bad_page_fault()
+ */
+ if (!user_mode(regs))
+ return SIGSEGV;
+
+ _exception(SIGSEGV, regs, si_code, address);
+
+ return 0;
+}
+
+static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+}
+
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+{
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+ up_read(&mm->mmap_sem);
+
+ return __bad_area_nosemaphore(regs, address, si_code);
+}
+
+static noinline int bad_area(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area(regs, address, SEGV_MAPERR);
+}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int fault)
@@ -121,7 +152,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
unsigned int lsb = 0;
if (!user_mode(regs))
- return MM_FAULT_ERR(SIGBUS);
+ return SIGBUS;
current->thread.trap_nr = BUS_ADRERR;
info.si_signo = SIGBUS;
@@ -142,25 +173,17 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
#endif
info.si_addr_lsb = lsb;
force_sig_info(SIGBUS, &info, current);
- return MM_FAULT_RETURN;
+ return 0;
}
static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
{
/*
- * Pagefault was interrupted by SIGKILL. We have no reason to
- * continue the pagefault.
+ * Kernel page fault interrupted by SIGKILL. We have no reason to
+ * continue processing.
*/
- if (fatal_signal_pending(current)) {
- /* Coming from kernel, we need to deal with uaccess fixups */
- if (user_mode(regs))
- return MM_FAULT_RETURN;
- return MM_FAULT_ERR(SIGKILL);
- }
-
- /* No fault: be happy */
- if (!(fault & VM_FAULT_ERROR))
- return MM_FAULT_CONTINUE;
+ if (fatal_signal_pending(current) && !user_mode(regs))
+ return SIGKILL;
/* Out of memory */
if (fault & VM_FAULT_OOM) {
@@ -169,19 +192,176 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* made us unable to handle the page fault gracefully.
*/
if (!user_mode(regs))
- return MM_FAULT_ERR(SIGKILL);
+ return SIGSEGV;
pagefault_out_of_memory();
- return MM_FAULT_RETURN;
+ } else {
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
+ return do_sigbus(regs, addr, fault);
+ else if (fault & VM_FAULT_SIGSEGV)
+ return bad_area_nosemaphore(regs, addr);
+ else
+ BUG();
+ }
+ return 0;
+}
+
+/* Is this a bad kernel fault ? */
+static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
+ unsigned long address)
+{
+ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
+ printk_ratelimited(KERN_CRIT "kernel tried to execute"
+ " exec-protected page (%lx) -"
+ "exploit attempt? (uid: %d)\n",
+ address, from_kuid(&init_user_ns,
+ current_uid()));
}
+ return is_exec || (address >= TASK_SIZE);
+}
- if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
- return do_sigbus(regs, addr, fault);
+static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
+ struct vm_area_struct *vma,
+ bool store_update_sp)
+{
+ /*
+ * N.B. The POWER/Open ABI allows programs to access up to
+ * 288 bytes below the stack pointer.
+ * The kernel signal delivery code writes up to about 1.5kB
+ * below the stack pointer (r1) before decrementing it.
+ * The exec code can write slightly over 640kB to the stack
+ * before setting the user r1. Thus we allow the stack to
+ * expand to 1MB without further checks.
+ */
+ if (address + 0x100000 < vma->vm_end) {
+ /* get user regs even if this fault is in kernel mode */
+ struct pt_regs *uregs = current->thread.regs;
+ if (uregs == NULL)
+ return true;
- /* We don't understand the fault code, this is fatal */
- BUG();
- return MM_FAULT_CONTINUE;
+ /*
+ * A user-mode access to an address a long way below
+ * the stack pointer is only valid if the instruction
+ * is one which would update the stack pointer to the
+ * address accessed if the instruction completed,
+ * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+ * (or the byte, halfword, float or double forms).
+ *
+ * If we don't check this then any write to the area
+ * between the last mapped region and the stack will
+ * expand the stack rather than segfaulting.
+ */
+ if (address + 2048 < uregs->gpr[1] && !store_update_sp)
+ return true;
+ }
+ return false;
+}
+
+static bool access_error(bool is_write, bool is_exec,
+ struct vm_area_struct *vma)
+{
+ /*
+ * Allow execution from readable areas if the MMU does not
+ * provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
+ */
+ if (is_exec) {
+ return !(vma->vm_flags & VM_EXEC) &&
+ (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
+ !(vma->vm_flags & (VM_READ | VM_WRITE)));
+ }
+
+ if (is_write) {
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return true;
+ return false;
+ }
+
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ return true;
+
+ return false;
}
+#ifdef CONFIG_PPC_SMLPAR
+static inline void cmo_account_page_fault(void)
+{
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
+ }
+}
+#else
+static inline void cmo_account_page_fault(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+
+#ifdef CONFIG_PPC_STD_MMU
+static void sanity_check_fault(bool is_write, unsigned long error_code)
+{
+ /*
+ * For hash translation mode, we should never get a
+ * PROTFAULT. Any update to pte to reduce access will result in us
+ * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+ * fault instead of DSISR_PROTFAULT.
+ *
+ * A pte update to relax the access will not result in a hash page table
+ * entry invalidate and hence can result in DSISR_PROTFAULT.
+ * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+ * the special !is_write in the below conditional.
+ *
+ * For platforms that doesn't supports coherent icache and do support
+ * per page noexec bit, we do setup things such that we do the
+ * sync between D/I cache via fault. But that is handled via low level
+ * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+ * here in such case.
+ *
+ * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+ * check should handle those and hence we should fall to the bad_area
+ * handling correctly.
+ *
+ * For embedded with per page exec support that doesn't support coherent
+ * icache we do get PROTFAULT and we handle that D/I cache sync in
+ * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+ * is conditional for server MMU.
+ *
+ * For radix, we can get prot fault for autonuma case, because radix
+ * page table will have them marked noaccess for user.
+ */
+ if (!radix_enabled() && !is_write)
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+}
+#else
+static void sanity_check_fault(bool is_write, unsigned long error_code) { }
+#endif /* CONFIG_PPC_STD_MMU */
+
+/*
+ * Define the correct "is_write" bit in error_code based
+ * on the processor family
+ */
+#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#define page_fault_is_write(__err) ((__err) & ESR_DST)
+#define page_fault_is_bad(__err) (0)
+#else
+#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
+#if defined(CONFIG_PPC_8xx)
+#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
+#elif defined(CONFIG_PPC64)
+#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
+#else
+#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
+#endif
+#endif
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -195,92 +375,56 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
{
- enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- int code = SEGV_MAPERR;
- int is_write = 0;
- int trap = TRAP(regs);
- int is_exec = trap == 0x400;
+ int is_exec = TRAP(regs) == 0x400;
int is_user = user_mode(regs);
- int fault;
- int rc = 0, store_update_sp = 0;
+ int is_write = page_fault_is_write(error_code);
+ int fault, major = 0;
+ bool store_update_sp = false;
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- /*
- * Fortunately the bit assignments in SRR1 for an instruction
- * fault and DSISR for a data fault are mostly the same for the
- * bits we are interested in. But there are some bits which
- * indicate errors in DSISR but can validly be set in SRR1.
- */
- if (is_exec)
- error_code &= 0x48200000;
- else
- is_write = error_code & DSISR_ISSTORE;
-#else
- is_write = error_code & ESR_DST;
-#endif /* CONFIG_4xx || CONFIG_BOOKE */
+ if (notify_page_fault(regs))
+ return 0;
-#ifdef CONFIG_PPC_ICSWX
- /*
- * we need to do this early because this "data storage
- * interrupt" does not update the DAR/DEAR so we don't want to
- * look at it
- */
- if (error_code & ICSWX_DSI_UCT) {
- rc = acop_handle_fault(regs, address, error_code);
- if (rc)
- goto bail;
+ if (unlikely(page_fault_is_bad(error_code))) {
+ if (is_user) {
+ _exception(SIGBUS, regs, BUS_OBJERR, address);
+ return 0;
+ }
+ return SIGBUS;
}
-#endif /* CONFIG_PPC_ICSWX */
-
- if (notify_page_fault(regs))
- goto bail;
- if (unlikely(debugger_fault_handler(regs)))
- goto bail;
+ /* Additional sanity check(s) */
+ sanity_check_fault(is_write, error_code);
/*
* The kernel should never take an execute fault nor should it
* take a page fault to a kernel address.
*/
- if (!is_user && (is_exec || (address >= TASK_SIZE))) {
- rc = SIGSEGV;
- goto bail;
- }
+ if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+ return SIGSEGV;
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx))
- if (error_code & DSISR_DABRMATCH) {
- /* breakpoint match */
- do_break(regs, address, error_code);
- goto bail;
+ /*
+ * If we're in an interrupt, have no user context or are running
+ * in a region with pagefaults disabled then we must not take the fault
+ */
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ if (is_user)
+ printk_ratelimited(KERN_ERR "Page fault in user mode"
+ " with faulthandler_disabled()=%d"
+ " mm=%p\n",
+ faulthandler_disabled(), mm);
+ return bad_area_nosemaphore(regs, address);
}
-#endif
/* We restore the interrupt state now */
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- if (faulthandler_disabled() || mm == NULL) {
- if (!is_user) {
- rc = SIGSEGV;
- goto bail;
- }
- /* faulthandler_disabled() in user mode is really bad,
- as is current->mm == NULL. */
- printk(KERN_EMERG "Page fault in user mode with "
- "faulthandler_disabled() = %d mm = %p\n",
- faulthandler_disabled(), mm);
- printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
- regs->nip, regs->msr);
- die("Weird page fault", regs, SIGSEGV);
- }
-
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
@@ -293,6 +437,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (is_user)
flags |= FAULT_FLAG_USER;
+ if (is_write)
+ flags |= FAULT_FLAG_WRITE;
+ if (is_exec)
+ flags |= FAULT_FLAG_INSTRUCTION;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -309,9 +457,9 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* source. If this is invalid we can skip the address space check,
* thus avoiding the deadlock.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
+ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if (!is_user && !search_exception_tables(regs->nip))
- goto bad_area_nosemaphore;
+ return bad_area_nosemaphore(regs, address);
retry:
down_read(&mm->mmap_sem);
@@ -325,122 +473,24 @@ retry:
}
vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
+ if (unlikely(!vma))
+ return bad_area(regs, address);
+ if (likely(vma->vm_start <= address))
goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+ return bad_area(regs, address);
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- goto bad_area;
+ /* The stack is being expanded, check if it's valid */
+ if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
+ return bad_area(regs, address);
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
+ /* Try to expand it */
+ if (unlikely(expand_stack(vma, address)))
+ return bad_area(regs, address);
good_area:
- code = SEGV_ACCERR;
-#if defined(CONFIG_6xx)
- if (error_code & 0x95700000)
- /* an error such as lwarx to I/O controller space,
- address matching DABR, eciwx, etc. */
- goto bad_area;
-#endif /* CONFIG_6xx */
-#if defined(CONFIG_8xx)
- /* The MPC8xx seems to always set 0x80000000, which is
- * "undefined". Of those that can be set, this is the only
- * one which seems bad.
- */
- if (error_code & 0x10000000)
- /* Guarded storage error. */
- goto bad_area;
-#endif /* CONFIG_8xx */
-
- if (is_exec) {
- /*
- * Allow execution from readable areas if the MMU does not
- * provide separate controls over reading and executing.
- *
- * Note: That code used to not be enabled for 4xx/BookE.
- * It is now as I/D cache coherency for these is done at
- * set_pte_at() time and I see no reason why the test
- * below wouldn't be valid on those processors. This -may-
- * break programs compiled with a really old ABI though.
- */
- if (!(vma->vm_flags & VM_EXEC) &&
- (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
- !(vma->vm_flags & (VM_READ | VM_WRITE))))
- goto bad_area;
- /* a write */
- } else if (is_write) {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- /* a read */
- } else {
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
-#ifdef CONFIG_PPC_STD_MMU
- /*
- * For hash translation mode, we should never get a
- * PROTFAULT. Any update to pte to reduce access will result in us
- * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
- * fault instead of DSISR_PROTFAULT.
- *
- * A pte update to relax the access will not result in a hash page table
- * entry invalidate and hence can result in DSISR_PROTFAULT.
- * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
- * the special !is_write in the below conditional.
- *
- * For platforms that doesn't supports coherent icache and do support
- * per page noexec bit, we do setup things such that we do the
- * sync between D/I cache via fault. But that is handled via low level
- * hash fault code (hash_page_do_lazy_icache()) and we should not reach
- * here in such case.
- *
- * For wrong access that can result in PROTFAULT, the above vma->vm_flags
- * check should handle those and hence we should fall to the bad_area
- * handling correctly.
- *
- * For embedded with per page exec support that doesn't support coherent
- * icache we do get PROTFAULT and we handle that D/I cache sync in
- * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
- * is conditional for server MMU.
- *
- * For radix, we can get prot fault for autonuma case, because radix
- * page table will have them marked noaccess for user.
- */
- if (!radix_enabled() && !is_write)
- WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
-#endif /* CONFIG_PPC_STD_MMU */
+ if (unlikely(access_error(is_write, is_exec, vma)))
+ return bad_area(regs, address);
/*
* If for any reason at all we couldn't handle the fault,
@@ -448,6 +498,7 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+ major |= fault & VM_FAULT_MAJOR;
/*
* Handle the retry right now, the mmap_sem has been released in that
@@ -465,64 +516,39 @@ good_area:
if (!fatal_signal_pending(current))
goto retry;
}
- /* We will enter mm_fault_error() below */
- } else
- up_read(&current->mm->mmap_sem);
-
- if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
- if (fault & VM_FAULT_SIGSEGV)
- goto bad_area_nosemaphore;
- rc = mm_fault_error(regs, address, fault);
- if (rc >= MM_FAULT_RETURN)
- goto bail;
- else
- rc = 0;
+
+ /*
+ * User mode? Just return to handle the fatal exception otherwise
+ * return to bad_page_fault
+ */
+ return is_user ? 0 : SIGBUS;
}
+ up_read(&current->mm->mmap_sem);
+
+ if (unlikely(fault & VM_FAULT_ERROR))
+ return mm_fault_error(regs, address, fault);
+
/*
* Major/minor page fault accounting.
*/
- if (fault & VM_FAULT_MAJOR) {
+ if (major) {
current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
-#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- u32 page_ins;
-
- preempt_disable();
- page_ins = be32_to_cpu(get_lppaca()->page_ins);
- page_ins += 1 << PAGE_FACTOR;
- get_lppaca()->page_ins = cpu_to_be32(page_ins);
- preempt_enable();
- }
-#endif /* CONFIG_PPC_SMLPAR */
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ cmo_account_page_fault();
} else {
current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
-
- goto bail;
-
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses cause a SIGSEGV */
- if (is_user) {
- _exception(SIGSEGV, regs, code, address);
- goto bail;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}
+ return 0;
+}
+NOKPROBE_SYMBOL(__do_page_fault);
- if (is_exec && (error_code & DSISR_PROTFAULT))
- printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
- " page (%lx) - exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns, current_uid()));
-
- rc = SIGSEGV;
-
-bail:
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ enum ctx_state prev_state = exception_enter();
+ int rc = __do_page_fault(regs, address, error_code);
exception_exit(prev_state);
return rc;
}
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 6f962e5cb5e1..ffbd7c0bda96 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -575,7 +575,6 @@ _GLOBAL(flush_hash_pages)
rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
stwcx. r8,0,r5 /* update the pte */
bne- 33b
-EXPORT_SYMBOL(flush_hash_pages)
/* Get the address of the primary PTE group in the hash table (r3) */
_GLOBAL(flush_hash_patch_A)
@@ -634,6 +633,7 @@ _GLOBAL(flush_hash_patch_B)
SYNC_601
isync
blr
+EXPORT_SYMBOL(flush_hash_pages)
/*
* Flush an entry from the TLB
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7a20669c19e7..67ec2e927253 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -61,6 +61,7 @@
#include <asm/tm.h>
#include <asm/trace.h>
#include <asm/ps3.h>
+#include <asm/pte-walk.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -507,9 +508,9 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
printk(KERN_INFO "Huge page(16GB) memory: "
"addr = 0x%lX size = 0x%lX pages = %d\n",
phys_addr, block_size, expected_pages);
- if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) {
+ if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
memblock_reserve(phys_addr, block_size * expected_pages);
- add_gpage(phys_addr, block_size, expected_pages);
+ pseries_add_gpage(phys_addr, block_size, expected_pages);
}
return 0;
}
@@ -1019,6 +1020,7 @@ void __init hash__early_init_mmu(void)
__kernel_virt_size = H_KERN_VIRT_SIZE;
__vmalloc_start = H_VMALLOC_START;
__vmalloc_end = H_VMALLOC_END;
+ __kernel_io_start = H_KERN_IO_START;
vmemmap = (struct page *)H_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
@@ -1228,7 +1230,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
unsigned long vsid;
pte_t *ptep;
unsigned hugeshift;
- const struct cpumask *tmp;
int rc, user_region = 0;
int psize, ssize;
@@ -1280,8 +1281,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
}
/* Check CPU locality */
- tmp = cpumask_of(smp_processor_id());
- if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
+ if (user_region && mm_is_thread_local(mm))
flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
@@ -1297,7 +1297,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
- ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift);
+ ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
rc = 1;
@@ -1526,7 +1526,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
* THP pages use update_mmu_cache_pmd. We don't do
* hash preload there. Hence can ignore THP here
*/
- ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift);
+ ptep = find_current_mm_pte(pgdir, ea, NULL, &hugepage_shift);
if (!ptep)
goto out_exit;
@@ -1543,7 +1543,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
#endif /* CONFIG_PPC_64K_PAGES */
/* Is that local to this CPU ? */
- if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ if (mm_is_thread_local(mm))
update_flags |= HPTE_LOCAL_UPDATE;
/* Hash it in */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e1bf5ca397fe..1571a498a33f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,8 @@
#include <asm/tlb.h>
#include <asm/setup.h>
#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+
#ifdef CONFIG_HUGETLB_PAGE
@@ -36,32 +38,15 @@
unsigned int HPAGE_SHIFT;
EXPORT_SYMBOL(HPAGE_SHIFT);
-/*
- * Tracks gpages after the device tree is scanned and before the
- * huge_boot_pages list is ready. On non-Freescale implementations, this is
- * just used to track 16G pages and so is a single array. FSL-based
- * implementations may have more than one gpage size, so we need multiple
- * arrays
- */
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
-#define MAX_NUMBER_GPAGES 128
-struct psize_gpages {
- u64 gpage_list[MAX_NUMBER_GPAGES];
- unsigned int nr_gpages;
-};
-static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
-#else
-#define MAX_NUMBER_GPAGES 1024
-static u64 gpage_freearray[MAX_NUMBER_GPAGES];
-static unsigned nr_gpages;
-#endif
-
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
- /* Only called for hugetlbfs pages, hence can ignore THP */
- return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
+ /*
+ * Only called for hugetlbfs pages, hence can ignore THP and the
+ * irq disabled walk.
+ */
+ return __find_linux_pte(mm->pgd, addr, NULL, NULL);
}
static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -210,145 +195,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(*hpdp, addr, pdshift);
}
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
-/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
-{
- unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
- int i;
-
- if (addr == 0)
- return;
-
- gpage_freearray[idx].nr_gpages = number_of_pages;
-
- for (i = 0; i < number_of_pages; i++) {
- gpage_freearray[idx].gpage_list[i] = addr;
- addr += page_size;
- }
-}
-
-/*
- * Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
-{
- struct huge_bootmem_page *m;
- int idx = shift_to_mmu_psize(huge_page_shift(hstate));
- int nr_gpages = gpage_freearray[idx].nr_gpages;
-
- if (nr_gpages == 0)
- return 0;
-
-#ifdef CONFIG_HIGHMEM
- /*
- * If gpages can be in highmem we can't use the trick of storing the
- * data structure in the page; allocate space for this
- */
- m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0);
- m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
-#else
- m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
-#endif
-
- list_add(&m->list, &huge_boot_pages);
- gpage_freearray[idx].nr_gpages = nr_gpages;
- gpage_freearray[idx].gpage_list[nr_gpages] = 0;
- m->hstate = hstate;
-
- return 1;
-}
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * Scan the command line hugepagesz= options for gigantic pages; store those in
- * a list that we use to allocate the memory once all options are parsed.
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready on pseries.
*/
-
-unsigned long gpage_npages[MMU_PAGE_COUNT];
-
-static int __init do_gpage_early_setup(char *param, char *val,
- const char *unused, void *arg)
-{
- static phys_addr_t size;
- unsigned long npages;
-
- /*
- * The hugepagesz and hugepages cmdline options are interleaved. We
- * use the size variable to keep track of whether or not this was done
- * properly and skip over instances where it is incorrect. Other
- * command-line parsing code will issue warnings, so we don't need to.
- *
- */
- if ((strcmp(param, "default_hugepagesz") == 0) ||
- (strcmp(param, "hugepagesz") == 0)) {
- size = memparse(val, NULL);
- } else if (strcmp(param, "hugepages") == 0) {
- if (size != 0) {
- if (sscanf(val, "%lu", &npages) <= 0)
- npages = 0;
- if (npages > MAX_NUMBER_GPAGES) {
- pr_warn("MMU: %lu pages requested for page "
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- "size %llu KB, limiting to "
-#else
- "size %u KB, limiting to "
-#endif
- __stringify(MAX_NUMBER_GPAGES) "\n",
- npages, size / 1024);
- npages = MAX_NUMBER_GPAGES;
- }
- gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
- size = 0;
- }
- }
- return 0;
-}
-
+#define MAX_NUMBER_GPAGES 1024
+__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+__initdata static unsigned nr_gpages;
/*
- * This function allocates physical space for pages that are larger than the
- * buddy allocator can handle. We want to allocate these in highmem because
- * the amount of lowmem is limited. This means that this function MUST be
- * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
- * allocate to grab highmem.
- */
-void __init reserve_hugetlb_gpages(void)
-{
- static __initdata char cmdline[COMMAND_LINE_SIZE];
- phys_addr_t size, base;
- int i;
-
- strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
- NULL, &do_gpage_early_setup);
-
- /*
- * Walk gpage list in reverse, allocating larger page sizes first.
- * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
- * When we reach the point in the list where pages are no longer
- * considered gpages, we're done.
- */
- for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
- if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
- continue;
- else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
- break;
-
- size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
- base = memblock_alloc_base(size * gpage_npages[i], size,
- MEMBLOCK_ALLOC_ANYWHERE);
- add_gpage(base, size, gpage_npages[i]);
- }
-}
-
-#else /* !PPC_FSL_BOOK3E */
-
-/* Build list of addresses of gigantic pages. This function is used in early
+ * Build list of addresses of gigantic pages. This function is used in early
* boot before the buddy allocator is setup.
*/
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{
if (!addr)
return;
@@ -360,10 +220,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
}
}
-/* Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
+int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
{
struct huge_bootmem_page *m;
if (nr_gpages == 0)
@@ -376,6 +233,17 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
}
#endif
+
+int __init alloc_bootmem_huge_page(struct hstate *h)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+ return pseries_alloc_bootmem_huge_page(h);
+#endif
+ return __alloc_bootmem_huge_page(h);
+}
+
#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -407,8 +275,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
batchp = &get_cpu_var(hugepd_freelist_cur);
if (atomic_read(&tlb->mm->mm_users) < 2 ||
- cpumask_equal(mm_cpumask(tlb->mm),
- cpumask_of(smp_processor_id()))) {
+ mm_is_thread_local(tlb->mm)) {
kmem_cache_free(hugepte_cache, hugepte);
put_cpu_var(hugepd_freelist_cur);
return;
@@ -886,9 +753,8 @@ void flush_dcache_icache_hugepage(struct page *page)
* This function need to be called with interrupts disabled. We use this variant
* when we have MSR[EE] = 0 but the paca->soft_enabled = 1
*/
-
-pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *shift)
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hpage_shift)
{
pgd_t pgd, *pgdp;
pud_t pud, *pudp;
@@ -897,8 +763,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
hugepd_t *hpdp = NULL;
unsigned pdshift = PGDIR_SHIFT;
- if (shift)
- *shift = 0;
+ if (hpage_shift)
+ *hpage_shift = 0;
if (is_thp)
*is_thp = false;
@@ -968,16 +834,15 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp);
out:
- if (shift)
- *shift = pdshift;
+ if (hpage_shift)
+ *hpage_shift = pdshift;
return ret_pte;
}
-EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
+EXPORT_SYMBOL_GPL(__find_linux_pte);
int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
- unsigned long mask;
unsigned long pte_end;
struct page *head, *page;
pte_t pte;
@@ -988,18 +853,10 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
end = pte_end;
pte = READ_ONCE(*ptep);
- mask = _PAGE_PRESENT | _PAGE_READ;
- /*
- * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined
- * as 0 and _PAGE_RO has to be set when a page is not writable
- */
- if (write)
- mask |= _PAGE_WRITE;
- else
- mask |= _PAGE_RO;
-
- if ((pte_val(pte) & mask) != mask)
+ if (!pte_present(pte) || !pte_read(pte))
+ return 0;
+ if (write && !pte_write(pte))
return 0;
/* hugepages are never "special" */
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
deleted file mode 100644
index 1fa794d7d59f..000000000000
--- a/arch/powerpc/mm/icswx.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * ICSWX and ACOP Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-
-#include "icswx.h"
-
-/*
- * The processor and its L2 cache cause the icswx instruction to
- * generate a COP_REQ transaction on PowerBus. The transaction has no
- * address, and the processor does not perform an MMU access to
- * authenticate the transaction. The command portion of the PowerBus
- * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor
- * Process ID (PID), which the coprocessor compares to the authorized
- * LPID and PID held in the coprocessor, to determine if the process
- * is authorized to generate the transaction. The data of the COP_REQ
- * transaction is 128-byte or less in size and is placed in cacheable
- * memory on a 128-byte cache line boundary.
- *
- * The task to use a coprocessor should use use_cop() to mark the use
- * of the Coprocessor Type (CT) and context switching. On a server
- * class processor, the PID register is used only for coprocessor
- * management + * and so a coprocessor PID is allocated before
- * executing icswx + * instruction. Drop_cop() is used to free the
- * coprocessor PID.
- *
- * Example:
- * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
- * Each HFI have multiple windows. Each HFI window serves as a
- * network device sending to and receiving from HFI network.
- * HFI immediate send function uses icswx instruction. The immediate
- * send function allows small (single cache-line) packets be sent
- * without using the regular HFI send FIFO and doorbell, which are
- * much slower than immediate send.
- *
- * For each task intending to use HFI immediate send, the HFI driver
- * calls use_cop() to obtain a coprocessor PID for the task.
- * The HFI driver then allocate a free HFI window and save the
- * coprocessor PID to the HFI window to allow the task to use the
- * HFI window.
- *
- * The HFI driver repeatedly creates immediate send packets and
- * issues icswx instruction to send data through the HFI window.
- * The HFI compares the coprocessor PID in the CPU PID register
- * to the PID held in the HFI window to determine if the transaction
- * is allowed.
- *
- * When the task to release the HFI window, the HFI driver calls
- * drop_cop() to release the coprocessor PID.
- */
-
-void switch_cop(struct mm_struct *next)
-{
-#ifdef CONFIG_PPC_ICSWX_PID
- mtspr(SPRN_PID, next->context.cop_pid);
-#endif
- mtspr(SPRN_ACOP, next->context.acop);
-}
-
-/**
- * Start using a coprocessor.
- * @acop: mask of coprocessor to be used.
- * @mm: The mm the coprocessor to associate with. Most likely current mm.
- *
- * Return a positive PID if successful. Negative errno otherwise.
- * The returned PID will be fed to the coprocessor to determine if an
- * icswx transaction is authenticated.
- */
-int use_cop(unsigned long acop, struct mm_struct *mm)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX))
- return -ENODEV;
-
- if (!mm || !acop)
- return -EINVAL;
-
- /* The page_table_lock ensures mm_users won't change under us */
- spin_lock(&mm->page_table_lock);
- spin_lock(mm->context.cop_lockp);
-
- ret = get_cop_pid(mm);
- if (ret < 0)
- goto out;
-
- /* update acop */
- mm->context.acop |= acop;
-
- sync_cop(mm);
-
- /*
- * If this is a threaded process then there might be other threads
- * running. We need to send an IPI to force them to pick up any
- * change in PID and ACOP.
- */
- if (atomic_read(&mm->mm_users) > 1)
- smp_call_function(sync_cop, mm, 1);
-
-out:
- spin_unlock(mm->context.cop_lockp);
- spin_unlock(&mm->page_table_lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(use_cop);
-
-/**
- * Stop using a coprocessor.
- * @acop: mask of coprocessor to be stopped.
- * @mm: The mm the coprocessor associated with.
- */
-void drop_cop(unsigned long acop, struct mm_struct *mm)
-{
- int free_pid;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX))
- return;
-
- if (WARN_ON_ONCE(!mm))
- return;
-
- /* The page_table_lock ensures mm_users won't change under us */
- spin_lock(&mm->page_table_lock);
- spin_lock(mm->context.cop_lockp);
-
- mm->context.acop &= ~acop;
-
- free_pid = disable_cop_pid(mm);
- sync_cop(mm);
-
- /*
- * If this is a threaded process then there might be other threads
- * running. We need to send an IPI to force them to pick up any
- * change in PID and ACOP.
- */
- if (atomic_read(&mm->mm_users) > 1)
- smp_call_function(sync_cop, mm, 1);
-
- if (free_pid != COP_PID_NONE)
- free_cop_pid(free_pid);
-
- spin_unlock(mm->context.cop_lockp);
- spin_unlock(&mm->page_table_lock);
-}
-EXPORT_SYMBOL_GPL(drop_cop);
-
-static int acop_use_cop(int ct)
-{
- /* There is no alternate policy, yet */
- return -1;
-}
-
-/*
- * Get the instruction word at the NIP
- */
-static u32 acop_get_inst(struct pt_regs *regs)
-{
- u32 inst;
- u32 __user *p;
-
- p = (u32 __user *)regs->nip;
- if (!access_ok(VERIFY_READ, p, sizeof(*p)))
- return 0;
-
- if (__get_user(inst, p))
- return 0;
-
- return inst;
-}
-
-/**
- * @regs: registers at time of interrupt
- * @address: storage address
- * @error_code: Fault code, usually the DSISR or ESR depending on
- * processor type
- *
- * Return 0 if we are able to resolve the data storage fault that
- * results from a CT miss in the ACOP register.
- */
-int acop_handle_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
-{
- int ct;
- u32 inst = 0;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX)) {
- pr_info("No coprocessors available");
- _exception(SIGILL, regs, ILL_ILLOPN, address);
- }
-
- if (!user_mode(regs)) {
- /* this could happen if the HV denies the
- * kernel access, for now we just die */
- die("ICSWX from kernel failed", regs, SIGSEGV);
- }
-
- /* Some implementations leave us a hint for the CT */
- ct = ICSWX_GET_CT_HINT(error_code);
- if (ct < 0) {
- /* we have to peek at the instruction word to figure out CT */
- u32 ccw;
- u32 rs;
-
- inst = acop_get_inst(regs);
- if (inst == 0)
- return -1;
-
- rs = (inst >> (31 - 10)) & 0x1f;
- ccw = regs->gpr[rs];
- ct = (ccw >> 16) & 0x3f;
- }
-
- /*
- * We could be here because another thread has enabled acop
- * but the ACOP register has yet to be updated.
- *
- * This should have been taken care of by the IPI to sync all
- * the threads (see smp_call_function(sync_cop, mm, 1)), but
- * that could take forever if there are a significant amount
- * of threads.
- *
- * Given the number of threads on some of these systems,
- * perhaps this is the best way to sync ACOP rather than whack
- * every thread with an IPI.
- */
- if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) {
- sync_cop(current->active_mm);
- return 0;
- }
-
- /* check for alternate policy */
- if (!acop_use_cop(ct))
- return 0;
-
- /* at this point the CT is unknown to the system */
- pr_warn("%s[%d]: Coprocessor %d is unavailable\n",
- current->comm, current->pid, ct);
-
- /* get inst if we don't already have it */
- if (inst == 0) {
- inst = acop_get_inst(regs);
- if (inst == 0)
- return -1;
- }
-
- /* Check if the instruction is the "record form" */
- if (inst & 1) {
- /*
- * the instruction is "record" form so we can reject
- * using CR0
- */
- regs->ccr &= ~(0xful << 28);
- regs->ccr |= ICSWX_RC_NOT_FOUND << 28;
-
- /* Move on to the next instruction */
- regs->nip += 4;
- } else {
- /*
- * There is no architected mechanism to report a bad
- * CT so we could either SIGILL or report nothing.
- * Since the non-record version should only bu used
- * for "hints" or "don't care" we should probably do
- * nothing. However, I could see how some people
- * might want an SIGILL so it here if you want it.
- */
-#ifdef CONFIG_PPC_ICSWX_USE_SIGILL
- _exception(SIGILL, regs, ILL_ILLOPN, address);
-#else
- regs->nip += 4;
-#endif
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(acop_handle_fault);
diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h
deleted file mode 100644
index 6dedc08e62c8..000000000000
--- a/arch/powerpc/mm/icswx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _ARCH_POWERPC_MM_ICSWX_H_
-#define _ARCH_POWERPC_MM_ICSWX_H_
-
-/*
- * ICSWX and ACOP Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/mmu_context.h>
-
-/* also used to denote that PIDs are not used */
-#define COP_PID_NONE 0
-
-static inline void sync_cop(void *arg)
-{
- struct mm_struct *mm = arg;
-
- if (mm == current->active_mm)
- switch_cop(current->active_mm);
-}
-
-#ifdef CONFIG_PPC_ICSWX_PID
-extern int get_cop_pid(struct mm_struct *mm);
-extern int disable_cop_pid(struct mm_struct *mm);
-extern void free_cop_pid(int free_pid);
-#else
-#define get_cop_pid(m) (COP_PID_NONE)
-#define disable_cop_pid(m) (COP_PID_NONE)
-#define free_cop_pid(p)
-#endif
-
-/*
- * These are implementation bits for architected registers. If this
- * ever becomes architecture the should be moved to reg.h et. al.
- */
-/* UCT is the same bit for Server and Embedded */
-#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */
-
-#ifdef CONFIG_PPC_BOOK3E
-/* Embedded implementation gives us no hints as to what the CT is */
-#define ICSWX_GET_CT_HINT(x) (-1)
-#else
-/* Server implementation contains the CT value in the DSISR */
-#define ICSWX_DSISR_CTMASK 0x00003f00
-#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8)
-#endif
-
-#define ICSWX_RC_STARTED 0x8 /* The request has been started */
-#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */
-#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */
-#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */
-
-extern int acop_handle_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code);
-
-static inline u64 acop_copro_type_bit(unsigned int type)
-{
- return 1ULL << (63 - type);
-}
-
-#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */
diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c
deleted file mode 100644
index 91e30eb7d054..000000000000
--- a/arch/powerpc/mm/icswx_pid.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * ICSWX and ACOP/PID Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/module.h>
-#include "icswx.h"
-
-#define COP_PID_MIN (COP_PID_NONE + 1)
-#define COP_PID_MAX (0xFFFF)
-
-static DEFINE_SPINLOCK(mmu_context_acop_lock);
-static DEFINE_IDA(cop_ida);
-
-static int new_cop_pid(struct ida *ida, int min_id, int max_id,
- spinlock_t *lock)
-{
- int index;
- int err;
-
-again:
- if (!ida_pre_get(ida, GFP_KERNEL))
- return -ENOMEM;
-
- spin_lock(lock);
- err = ida_get_new_above(ida, min_id, &index);
- spin_unlock(lock);
-
- if (err == -EAGAIN)
- goto again;
- else if (err)
- return err;
-
- if (index > max_id) {
- spin_lock(lock);
- ida_remove(ida, index);
- spin_unlock(lock);
- return -ENOMEM;
- }
-
- return index;
-}
-
-int get_cop_pid(struct mm_struct *mm)
-{
- int pid;
-
- if (mm->context.cop_pid == COP_PID_NONE) {
- pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
- &mmu_context_acop_lock);
- if (pid >= 0)
- mm->context.cop_pid = pid;
- }
- return mm->context.cop_pid;
-}
-
-int disable_cop_pid(struct mm_struct *mm)
-{
- int free_pid = COP_PID_NONE;
-
- if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
- free_pid = mm->context.cop_pid;
- mm->context.cop_pid = COP_PID_NONE;
- }
- return free_pid;
-}
-
-void free_cop_pid(int free_pid)
-{
- spin_lock(&mmu_context_acop_lock);
- ida_remove(&cop_ida, free_pid);
- spin_unlock(&mmu_context_acop_lock);
-}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 8a7c38b8d335..6419b33ca309 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -113,6 +113,12 @@ void __init MMU_setup(void)
__map_without_bats = 1;
__map_without_ltlbs = 1;
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ if (rodata_enabled) {
+ __map_without_bats = 1;
+ __map_without_ltlbs = 1;
+ }
+#endif
}
/*
@@ -132,8 +138,6 @@ void __init MMU_init(void)
* Reserve gigantic pages for hugetlb. This MUST occur before
* lowmem_end_addr is initialized below.
*/
- reserve_hugetlb_gpages();
-
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 5b4c25d12ff3..588a521966ec 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,7 +356,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-#elif defined(CONFIG_FLATMEM)
+#else
struct page *realmode_pfn_to_page(unsigned long pfn)
{
@@ -365,7 +365,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_STD_MMU_64
static bool disable_radix;
@@ -381,7 +381,7 @@ early_param("disable_radix", parse_disable_radix);
* /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
* radix. If not, we clear the radix feature bit so we fall back to hash.
*/
-static void early_check_vec5(void)
+static void __init early_check_vec5(void)
{
unsigned long root, chosen;
int size;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 46b4e67d2372..4362b86ef84c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -436,7 +436,7 @@ void flush_dcache_icache_page(struct page *page)
return;
}
#endif
-#if defined(CONFIG_8xx) || defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
/* On 8xx there is no need to kmap since highmem is not supported */
__flush_dcache_icache(page_address(page));
#else
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 000000000000..0f613bc63c50
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,99 @@
+/*
+ * Common implementation of switch_mm_irqs_off
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+
+#include <asm/mmu_context.h>
+
+#if defined(CONFIG_PPC32)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ /* 32-bit keeps track of the current PGDIR in the thread struct */
+ tsk->thread.pgdir = mm->pgd;
+}
+#elif defined(CONFIG_PPC_BOOK3E_64)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ /* 64-bit Book3E keeps track of current PGD in the PACA */
+ get_paca()->pgd = mm->pgd;
+}
+#else
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm) { }
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_inc(&mm->context.active_cpus);
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+#endif
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ bool new_on_cpu = false;
+
+ /* Mark this context has been used on the new CPU */
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ inc_mm_active_cpus(next);
+
+ /*
+ * This full barrier orders the store to the cpumask above vs
+ * a subsequent operation which allows this CPU to begin loading
+ * translations for next.
+ *
+ * When using the radix MMU that operation is the load of the
+ * MMU context id, which is then moved to SPRN_PID.
+ *
+ * For the hash MMU it is either the first load from slb_cache
+ * in switch_slb(), and/or the store of paca->mm_ctx_id in
+ * copy_mm_to_paca().
+ *
+ * On the read side the barrier is in pte_xchg(), which orders
+ * the store to the PTE vs the load of mm_cpumask.
+ */
+ smp_mb();
+
+ new_on_cpu = true;
+ }
+
+ /* Some subarchs need to track the PGD elsewhere */
+ switch_mm_pgdir(tsk, next);
+
+ /* Nothing else to do if we aren't actually switching */
+ if (prev == next)
+ return;
+
+ /*
+ * We must stop all altivec streams before changing the HW
+ * context
+ */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile ("dssall");
+
+ if (new_on_cpu)
+ radix_kvm_prefetch_workaround(next);
+
+ /*
+ * The actual HW switching method differs between the various
+ * sub architectures. Out of line for now
+ */
+ switch_mmu_context(prev, next, tsk);
+}
+
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index a75f63833284..05e15386d4cb 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -25,8 +25,6 @@
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
-#include "icswx.h"
-
static DEFINE_SPINLOCK(mmu_context_lock);
static DEFINE_IDA(mmu_context_ida);
@@ -165,16 +163,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
return index;
mm->context.id = index;
-#ifdef CONFIG_PPC_ICSWX
- mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
- if (!mm->context.cop_lockp) {
- __destroy_context(index);
- subpage_prot_free(mm);
- mm->context.id = MMU_NO_CONTEXT;
- return -ENOMEM;
- }
- spin_lock_init(mm->context.cop_lockp);
-#endif /* CONFIG_PPC_ICSWX */
#ifdef CONFIG_PPC_64K_PAGES
mm->context.pte_frag = NULL;
@@ -182,6 +170,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
mm_iommu_init(mm);
#endif
+ atomic_set(&mm->context.active_cpus, 0);
+
return 0;
}
@@ -226,12 +216,6 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
-#ifdef CONFIG_PPC_ICSWX
- drop_cop(mm->context.acop, mm);
- kfree(mm->context.cop_lockp);
- mm->context.cop_lockp = NULL;
-#endif /* CONFIG_PPC_ICSWX */
-
if (radix_enabled()) {
/*
* Radix doesn't have a valid bit in the process table
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index d46128b22150..57fbc554c785 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -27,7 +27,7 @@
/*
* On 40x and 8xx, we directly inline tlbia and tlbivax
*/
-#if defined(CONFIG_40x) || defined(CONFIG_8xx)
+#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx)
static inline void _tlbil_all(void)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
@@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid)
}
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
-#else /* CONFIG_40x || CONFIG_8xx */
+#else /* CONFIG_40x || CONFIG_PPC_8xx */
extern void _tlbil_all(void);
extern void _tlbil_pid(unsigned int pid);
#ifdef CONFIG_PPC_BOOK3E
@@ -46,12 +46,12 @@ extern void _tlbil_pid_noind(unsigned int pid);
#else
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
#endif
-#endif /* !(CONFIG_40x || CONFIG_8xx) */
+#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */
/*
* On 8xx, we directly inline tlbie, on others, it's extern
*/
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
static inline void _tlbil_va(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind)
{
@@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
{
__tlbil_va(address, pid);
}
-#endif /* CONFIG_8xx */
+#endif /* CONFIG_PPC_8xx */
#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 31eed8fa8e99..3b65917785a5 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <misc/cxl-base.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -64,6 +65,27 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+
+static void do_nothing(void *unused)
+{
+
+}
+/*
+ * Serialize against find_current_mm_pte which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * find_current_mm_pte to finish.
+ */
+void serialize_against_pte_lookup(struct mm_struct *mm)
+{
+ smp_mb();
+ smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+}
+
/*
* We use this to invalidate a pmdp entry before switching from a
* hugepte to regular pmd entry.
@@ -77,7 +99,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
*/
- kick_all_cpus_sync();
+ serialize_against_pte_lookup(vma->vm_mm);
}
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 443a2c66a304..ec277913e01b 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -239,7 +239,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
* by sending an IPI to all the cpus and executing a dummy
* function there.
*/
- kick_all_cpus_sync();
+ serialize_against_pte_lookup(vma->vm_mm);
/*
* Now invalidate the hpte entries in the range
* covered by pmd. This make sure we take a
@@ -329,7 +329,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
unsigned long vsid;
unsigned long flags = 0;
- const struct cpumask *tmp;
/* get the base page size,vsid and segment size */
#ifdef CONFIG_DEBUG_VM
@@ -350,8 +349,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
ssize = mmu_kernel_ssize;
}
- tmp = cpumask_of(smp_processor_id());
- if (cpumask_equal(mm_cpumask(mm), tmp))
+ if (mm_is_thread_local(mm))
flags |= HPTE_LOCAL_UPDATE;
return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
@@ -380,16 +378,16 @@ pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
/*
- * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * Serialize against find_current_mm_pte variants which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
- * find_linux_pte_or_hugepage to finish.
+ * find_curren_mm_pte to finish.
*/
- kick_all_cpus_sync();
+ serialize_against_pte_lookup(mm);
return old_pmd;
}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 671a45d86c18..39c252b54d16 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,10 +8,15 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#define pr_fmt(fmt) "radix-mmu: " fmt
+
+#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
+#include <linux/string_helpers.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -31,9 +36,13 @@ unsigned int mmu_base_pid;
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size)
{
- unsigned long patb1 = base | table_size | PATB_GR;
+ unsigned long patb0, patb1;
+
+ patb0 = be64_to_cpu(partition_tb[0].patb0);
+ patb1 = base | table_size | PATB_GR;
+
+ mmu_partition_table_set_entry(0, patb0, patb1);
- partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -179,10 +188,14 @@ static inline void __meminit print_mapping(unsigned long start,
unsigned long end,
unsigned long size)
{
+ char buf[10];
+
if (end <= start)
return;
- pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size);
+ string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+
+ pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
}
static int __meminit create_physical_mapping(unsigned long start,
@@ -526,6 +539,7 @@ void __init radix__early_init_mmu(void)
__kernel_virt_size = RADIX_KERN_VIRT_SIZE;
__vmalloc_start = RADIX_VMALLOC_START;
__vmalloc_end = RADIX_VMALLOC_END;
+ __kernel_io_start = RADIX_KERN_IO_START;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
@@ -836,9 +850,12 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
*/
pmd = *pmdp;
pmd_clear(pmdp);
+
/*FIXME!! Verify whether we need this kick below */
- kick_all_cpus_sync();
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ serialize_against_pte_lookup(vma->vm_mm);
+
+ radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
return pmd;
}
@@ -897,16 +914,16 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
- * Serialize against find_linux_pte_or_hugepte which does lock-less
+ * Serialize against find_current_mm_pte which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
- * find_linux_pte_or_hugepage to finish.
+ * find_current_mm_pte to finish.
*/
- kick_all_cpus_sync();
+ serialize_against_pte_lookup(mm);
return old_pmd;
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a9e4bfc025bc..65eda1997c3f 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -34,6 +34,7 @@
#include <asm/fixmap.h>
#include <asm/io.h>
#include <asm/setup.h>
+#include <asm/sections.h>
#include "mmu_decl.h"
@@ -242,7 +243,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
/*
* Map in a chunk of physical memory starting at start.
*/
-void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
+static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
{
unsigned long v, s, f;
phys_addr_t p;
@@ -294,7 +295,7 @@ void __init mapin_ram(void)
* Returns true (1) if PTE was found, zero otherwise. The pointer to
* the PTE pointer is unmodified if PTE is not found.
*/
-int
+static int
get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
{
pgd_t *pgd;
@@ -323,9 +324,7 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
return(retval);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
-static int __change_page_attr(struct page *page, pgprot_t prot)
+static int __change_page_attr_noflush(struct page *page, pgprot_t prot)
{
pte_t *kpte;
pmd_t *kpmd;
@@ -339,8 +338,6 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
return -EINVAL;
__set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
- wmb();
- flush_tlb_page(NULL, address);
pte_unmap(kpte);
return 0;
@@ -349,44 +346,65 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
/*
* Change the page attributes of an page in the linear mapping.
*
- * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
+ * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY
*/
static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
{
int i, err = 0;
unsigned long flags;
+ struct page *start = page;
local_irq_save(flags);
for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr(page, prot);
+ err = __change_page_attr_noflush(page, prot);
if (err)
break;
}
+ wmb();
+ flush_tlb_kernel_range((unsigned long)page_address(start),
+ (unsigned long)page_address(page));
local_irq_restore(flags);
return err;
}
-
-void __kernel_map_pages(struct page *page, int numpages, int enable)
+void mark_initmem_nx(void)
{
- if (PageHighMem(page))
- return;
+ struct page *page = virt_to_page(_sinittext);
+ unsigned long numpages = PFN_UP((unsigned long)_einittext) -
+ PFN_DOWN((unsigned long)_sinittext);
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ change_page_attr(page, numpages, PAGE_KERNEL);
}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-static int fixmaps;
-
-void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
{
- unsigned long address = __fix_to_virt(idx);
+ struct page *page;
+ unsigned long numpages;
+
+ page = virt_to_page(_stext);
+ numpages = PFN_UP((unsigned long)_etext) -
+ PFN_DOWN((unsigned long)_stext);
- if (idx >= __end_of_fixed_addresses) {
- BUG();
+ change_page_attr(page, numpages, PAGE_KERNEL_ROX);
+ /*
+ * mark .rodata as read only. Use __init_begin rather than __end_rodata
+ * to cover NOTES and EXCEPTION_TABLE.
+ */
+ page = virt_to_page(__start_rodata);
+ numpages = PFN_UP((unsigned long)__init_begin) -
+ PFN_DOWN((unsigned long)__start_rodata);
+
+ change_page_attr(page, numpages, PAGE_KERNEL_RO);
+}
+#endif
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (PageHighMem(page))
return;
- }
- map_kernel_page(address, phys, pgprot_val(flags));
- fixmaps++;
+ change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 0736e94c7615..ac0717a90ca6 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -104,6 +104,8 @@ unsigned long __vmalloc_start;
EXPORT_SYMBOL(__vmalloc_start);
unsigned long __vmalloc_end;
EXPORT_SYMBOL(__vmalloc_end);
+unsigned long __kernel_io_start;
+EXPORT_SYMBOL(__kernel_io_start);
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
unsigned long __pte_frag_nr;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index bde378559d01..906a86fe457b 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -121,12 +121,25 @@ slb_miss_kernel_load_vmemmap:
1:
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
- /* vmalloc mapping gets the encoding from the PACA as the mapping
- * can be demoted from 64K -> 4K dynamically on some machines
+ /*
+ * r10 contains the ESID, which is the original faulting EA shifted
+ * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
+ * which is 0xd00038000. That can't be used as an immediate, even if we
+ * ignored the 0xd, so we have to load it into a register, and we only
+ * have one register free. So we must load all of (H_VMALLOC_END >> 28)
+ * into a register and compare ESID against that.
+ */
+ lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000
+ ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800
+ // Rotate left 4, then mask with 0xffffffff0
+ rldic r11,r11,4,28 // r11 = 0xd00038000
+ cmpld r10,r11 // if r10 >= r11
+ bge 5f // goto io_mapping
+
+ /*
+ * vmalloc mapping gets the encoding from the PACA as the mapping
+ * can be demoted from 64K -> 4K dynamically on some machines.
*/
- clrldi r11,r10,48
- cmpldi r11,(H_VMALLOC_SIZE >> 28) - 1
- bgt 5f
lhz r11,PACAVMALLOCSLLP(r13)
b 6f
5:
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 16ae1bbe13f0..b3e849c4886e 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -54,23 +54,15 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
*/
__tlbiel_pid(pid, 0, ric);
- if (ric == RIC_FLUSH_ALL)
- /* For the remaining sets, just flush the TLB */
- ric = RIC_FLUSH_TLB;
+ /* For PWC, only one flush is needed */
+ if (ric == RIC_FLUSH_PWC) {
+ asm volatile("ptesync": : :"memory");
+ return;
+ }
+ /* For the remaining sets, just flush the TLB */
for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
- __tlbiel_pid(pid, set, ric);
-
- asm volatile("ptesync": : :"memory");
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
-}
-
-static inline void tlbiel_pwc(unsigned long pid)
-{
- asm volatile("ptesync": : :"memory");
-
- /* For PWC flush, we don't look at set number */
- __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
asm volatile("ptesync": : :"memory");
asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
@@ -146,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
preempt_disable();
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
preempt_enable();
}
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
-void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+#ifndef CONFIG_SMP
+static void radix__local_flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
- struct mm_struct *mm = tlb->mm;
- /*
- * If we are doing a full mm flush, we will do a tlb flush
- * with RIC_FLUSH_ALL later.
- */
- if (tlb->fullmm)
- return;
preempt_disable();
-
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- tlbiel_pwc(pid);
-
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
-EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+#endif /* CONFIG_SMP */
void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
int psize)
@@ -208,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto no_context;
if (!mm_is_thread_local(mm))
- _tlbie_pid(pid, RIC_FLUSH_ALL);
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
else
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
no_context:
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+static void radix__flush_all_mm(struct mm_struct *mm)
{
unsigned long pid;
- struct mm_struct *mm = tlb->mm;
- /*
- * If we are doing a full mm flush, we will do a tlb flush
- * with RIC_FLUSH_ALL later.
- */
- if (tlb->fullmm)
- return;
preempt_disable();
-
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
if (!mm_is_thread_local(mm))
- _tlbie_pid(pid, RIC_FLUSH_PWC);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
else
- tlbiel_pwc(pid);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
no_context:
preempt_enable();
}
+
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+ tlb->need_flush_all = 1;
+}
EXPORT_SYMBOL(radix__flush_tlb_pwc);
void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
@@ -271,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
}
EXPORT_SYMBOL(radix__flush_tlb_page);
+#else /* CONFIG_SMP */
+#define radix__flush_all_mm radix__local_flush_all_mm
#endif /* CONFIG_SMP */
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
@@ -288,6 +271,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm = vma->vm_mm;
+
radix__flush_tlb_mm(mm);
}
EXPORT_SYMBOL(radix__flush_tlb_range);
@@ -319,7 +303,10 @@ void radix__tlb_flush(struct mmu_gather *tlb)
*/
if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
- else
+ else if (tlb->need_flush_all) {
+ tlb->need_flush_all = 0;
+ radix__flush_all_mm(mm);
+ } else
radix__flush_tlb_mm(mm);
}
@@ -364,6 +351,43 @@ err_out:
preempt_enable();
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+{
+ int local = mm_is_thread_local(mm);
+ unsigned long ap = mmu_get_ap(mmu_virtual_psize);
+ unsigned long pid, end;
+
+
+ pid = mm ? mm->context.id : 0;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+
+ /* 4k page size, just blow the world */
+ if (PAGE_SIZE == 0x1000) {
+ radix__flush_all_mm(mm);
+ return;
+ }
+
+ /* Otherwise first do the PWC */
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
+
+ /* Then iterate the pages */
+ end = addr + HPAGE_PMD_SIZE;
+ for (; addr < end; addr += PAGE_SIZE) {
+ if (local)
+ _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ else
+ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ }
+no_context:
+ preempt_enable();
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
unsigned long page_size)
{
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index b5b0fb97b9c0..881ebd53ffc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -29,6 +29,8 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/bug.h>
+#include <asm/pte-walk.h>
+
#include <trace/events/thp.h>
@@ -138,13 +140,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
*/
void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
{
- const struct cpumask *tmp;
- int i, local = 0;
+ int i, local;
i = batch->index;
- tmp = cpumask_of(smp_processor_id());
- if (cpumask_equal(mm_cpumask(batch->mm), tmp))
- local = 1;
+ local = mm_is_thread_local(batch->mm);
if (i == 1)
flush_hash_page(batch->vpn[0], batch->pte[0],
batch->psize, batch->ssize, local);
@@ -207,8 +206,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
- pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, &is_thp,
- &hugepage_shift);
+ pte_t *ptep = find_current_mm_pte(mm->pgd, start, &is_thp,
+ &hugepage_shift);
unsigned long pte;
if (ptep == NULL)
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index eabecfcaef7c..048b8e9f4492 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -60,7 +60,7 @@ _GLOBAL(__tlbil_va)
isync
1: blr
-#elif defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_8xx)
/*
* Nothing to do for 8xx, everything is inline
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 30cf03f53428..47fc6660845d 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -263,6 +263,7 @@ static inline bool is_nearbranch(int offset)
#define COND_EQ (CR0_EQ | COND_CMP_TRUE)
#define COND_NE (CR0_EQ | COND_CMP_FALSE)
#define COND_LT (CR0_LT | COND_CMP_TRUE)
+#define COND_LE (CR0_GT | COND_CMP_FALSE)
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 861c5af1c9c4..a66e64b0b251 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -25,11 +25,7 @@ int bpf_jit_enable __read_mostly;
static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
- int *p = area;
-
- /* Fill whole space with trap instructions */
- while (p < (int *)((char *)area + size))
- *p++ = BREAKPOINT_INSTRUCTION;
+ memset32(area, BREAKPOINT_INSTRUCTION, size/4);
}
static inline void bpf_flush_icache(void *start, void *end)
@@ -795,12 +791,24 @@ emit_clear:
case BPF_JMP | BPF_JSGT | BPF_X:
true_cond = COND_GT;
goto cond_branch;
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ true_cond = COND_LT;
+ goto cond_branch;
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_X:
true_cond = COND_GE;
goto cond_branch;
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ true_cond = COND_LE;
+ goto cond_branch;
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_X:
true_cond = COND_EQ;
@@ -817,14 +825,18 @@ emit_clear:
cond_branch:
switch (code) {
case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
/* unsigned comparison */
PPC_CMPLD(dst_reg, src_reg);
break;
case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
/* signed comparison */
PPC_CMPD(dst_reg, src_reg);
break;
@@ -834,7 +846,9 @@ cond_branch:
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
/*
* Need sign-extended load, so only positive
* values can be used as imm in cmpldi
@@ -849,7 +863,9 @@ cond_branch:
}
break;
case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
/*
* signed comparison, so any 16-bit value
* can be used in cmpdi
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4d606b99a5cb..3f3a5ce66495 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
+obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 0fc26714780a..0af051a1974e 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -22,6 +22,7 @@
#ifdef CONFIG_PPC64
#include "../kernel/ppc32.h"
#endif
+#include <asm/pte-walk.h>
/*
@@ -127,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
return -EFAULT;
local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift);
+ ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
if (!ptep)
goto err_out;
if (!shift)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6c2d4168daec..2e3eb7431571 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
- if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+ if (event->attr.sample_type &
+ (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
perf_get_data_addr(regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 000000000000..9ccac86f3463
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,1306 @@
+/*
+ * In-Memory Collection (IMC) Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+
+/* Nest IMC data structures and variables */
+
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(nest_init_lock);
+static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
+static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+struct imc_pmu_ref *nest_imc_refc;
+static int nest_pmus;
+
+/* Core IMC data structures and variables */
+
+static cpumask_t core_imc_cpumask;
+struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu *core_imc_pmu;
+
+/* Thread IMC data structures and variables */
+
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+static struct imc_pmu *thread_imc_pmu;
+static int thread_imc_mem_size;
+
+struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_offset.attr,
+ &format_attr_rvalue.attr,
+ &format_attr_mode.attr,
+ NULL,
+};
+
+static struct attribute_group imc_format_group = {
+ .name = "format",
+ .attrs = imc_format_attrs,
+};
+
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
+ cpumask_t *active_mask;
+
+ switch(imc_pmu->domain){
+ case IMC_DOMAIN_NEST:
+ active_mask = &nest_imc_cpumask;
+ break;
+ case IMC_DOMAIN_CORE:
+ active_mask = &core_imc_cpumask;
+ break;
+ default:
+ return 0;
+ }
+
+ return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+ .attrs = imc_pmu_cpumask_attrs,
+};
+
+/* device_str_attr_create : Populate event "name" and string "str" in attribute */
+static struct attribute *device_str_attr_create(const char *name, const char *str)
+{
+ struct perf_pmu_events_attr *attr;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+ sysfs_attr_init(&attr->attr.attr);
+
+ attr->event_str = str;
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = perf_event_sysfs_show;
+
+ return &attr->attr.attr;
+}
+
+struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
+ const char *unit, const char *prefix, u32 base)
+{
+ struct imc_events *event;
+ const char *s;
+ u32 reg;
+
+ event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
+ if (!event)
+ return NULL;
+
+ if (of_property_read_u32(np, "reg", &reg))
+ goto error;
+ /* Add the base_reg value to the "reg" */
+ event->value = base + reg;
+
+ if (of_property_read_string(np, "event-name", &s))
+ goto error;
+
+ event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
+ if (!event->name)
+ goto error;
+
+ if (of_property_read_string(np, "scale", &s))
+ s = scale;
+
+ if (s) {
+ event->scale = kstrdup(s, GFP_KERNEL);
+ if (!event->scale)
+ goto error;
+ }
+
+ if (of_property_read_string(np, "unit", &s))
+ s = unit;
+
+ if (s) {
+ event->unit = kstrdup(s, GFP_KERNEL);
+ if (!event->unit)
+ goto error;
+ }
+
+ return event;
+error:
+ kfree(event->unit);
+ kfree(event->scale);
+ kfree(event->name);
+ kfree(event);
+
+ return NULL;
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
+{
+ struct attribute_group *attr_group;
+ struct attribute **attrs, *dev_str;
+ struct device_node *np, *pmu_events;
+ struct imc_events *ev;
+ u32 handle, base_reg;
+ int i=0, j=0, ct;
+ const char *prefix, *g_scale, *g_unit;
+ const char *ev_val_str, *ev_scale_str, *ev_unit_str;
+
+ if (!of_property_read_u32(node, "events", &handle))
+ pmu_events = of_find_node_by_phandle(handle);
+ else
+ return 0;
+
+ /* Did not find any node with a given phandle */
+ if (!pmu_events)
+ return 0;
+
+ /* Get a count of number of child nodes */
+ ct = of_get_child_count(pmu_events);
+
+ /* Get the event prefix */
+ if (of_property_read_string(node, "events-prefix", &prefix))
+ return 0;
+
+ /* Get a global unit and scale data if available */
+ if (of_property_read_string(node, "scale", &g_scale))
+ g_scale = NULL;
+
+ if (of_property_read_string(node, "unit", &g_unit))
+ g_unit = NULL;
+
+ /* "reg" property gives out the base offset of the counters data */
+ of_property_read_u32(node, "reg", &base_reg);
+
+ /* Allocate memory for the events */
+ pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
+ if (!pmu->events)
+ return -ENOMEM;
+
+ ct = 0;
+ /* Parse the events and update the struct */
+ for_each_child_of_node(pmu_events, np) {
+ ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
+ if (ev)
+ pmu->events[ct++] = ev;
+ }
+
+ /* Allocate memory for attribute group */
+ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ /*
+ * Allocate memory for attributes.
+ * Since we have count of events for this pmu, we also allocate
+ * memory for the scale and unit attribute for now.
+ * "ct" has the total event structs added from the events-parent node.
+ * So allocate three times the "ct" (this includes event, event_scale and
+ * event_unit).
+ */
+ attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
+ if (!attrs) {
+ kfree(attr_group);
+ kfree(pmu->events);
+ return -ENOMEM;
+ }
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ do {
+ ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
+ dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ if (pmu->events[i]->scale) {
+ ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
+ dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ }
+
+ if (pmu->events[i]->unit) {
+ ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
+ dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ }
+ } while (++i < ct);
+
+ /* Save the event attribute */
+ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+
+ kfree(pmu->events);
+ return 0;
+}
+
+/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
+static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
+{
+ return per_cpu(local_nest_imc_refc, cpu);
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+ struct imc_pmu **pn = per_nest_pmu_arr;
+ int i;
+
+ if (old_cpu < 0 || new_cpu < 0)
+ return;
+
+ for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+ perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+ int nid, target = -1;
+ const struct cpumask *l_cpumask;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * Check in the designated list for this cpu. Dont bother
+ * if not one of them.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * Now that this cpu is one of the designated,
+ * find a next cpu a) which is online and b) in same chip.
+ */
+ nid = cpu_to_node(cpu);
+ l_cpumask = cpumask_of_node(nid);
+ target = cpumask_any_but(l_cpumask, cpu);
+
+ /*
+ * Update the cpumask with the target cpu and
+ * migrate the context if needed
+ */
+ if (target >= 0 && target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &nest_imc_cpumask);
+ nest_change_cpu_context(cpu, target);
+ } else {
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ /*
+ * If this is the last cpu in this chip then, skip the reference
+ * count mutex lock and make the reference count on this chip zero.
+ */
+ ref = get_nest_pmu_ref(cpu);
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ }
+ return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int res;
+
+ /* Get the cpumask of this node */
+ l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ /*
+ * If this is not the first online CPU on this node, then
+ * just return.
+ */
+ if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * If this is the first online cpu on this node
+ * disable the nest counters by making an OPAL call.
+ */
+ res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ if (res)
+ return res;
+
+ /* Make this CPU the designated target for counter collection */
+ cpumask_set_cpu(cpu, &nest_imc_cpumask);
+ return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+ "perf/powerpc/imc:online",
+ ppc_nest_imc_cpu_online,
+ ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+ int rc, node_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * See if we need to disable the nest PMU.
+ * If no events are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * enable or disable the nest counters.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return;
+
+ /* Take the mutex lock for this node and then decrement the reference count */
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "nest-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+}
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+ int chip_id, rc, node_id;
+ u32 l_config, config = event->attr.config;
+ struct imc_mem_info *pcni;
+ struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
+ bool flag = false;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config (event offset) */
+ if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
+ return -EINVAL;
+
+ /*
+ * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
+ * Get the base memory addresss for this cpu.
+ */
+ chip_id = topology_physical_package_id(event->cpu);
+ pcni = pmu->mem_info;
+ do {
+ if (pcni->id == chip_id) {
+ flag = true;
+ break;
+ }
+ pcni++;
+ } while (pcni);
+
+ if (!flag)
+ return -ENODEV;
+
+ /*
+ * Add the event offset to the base address.
+ */
+ l_config = config & IMC_EVENT_OFFSET_MASK;
+ event->hw.event_base = (u64)pcni->vbase + l_config;
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * Get the imc_pmu_ref struct for this node.
+ * Take the mutex lock and then increment the count of nest pmu events
+ * inited.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return -EINVAL;
+
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to start the counters for node %d\n",
+ node_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ event->destroy = nest_imc_counters_release;
+ return 0;
+}
+
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_node() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int core_imc_mem_init(int cpu, int size)
+{
+ int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+ struct imc_mem_info *mem_info;
+
+ /*
+ * alloc_pages_node() will allocate memory for core in the
+ * local node only.
+ */
+ phys_id = topology_physical_package_id(cpu);
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ mem_info->id = core_id;
+
+ /* We need only vbase for core counters */
+ mem_info->vbase = page_address(alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+ get_order(size)));
+ if (!mem_info->vbase)
+ return -ENOMEM;
+
+ /* Init the mutex */
+ core_imc_refc[core_id].id = core_id;
+ mutex_init(&core_imc_refc[core_id].lock);
+
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+ __pa((void *)mem_info->vbase),
+ get_hard_smp_processor_id(cpu));
+ if (rc) {
+ free_pages((u64)mem_info->vbase, get_order(size));
+ mem_info->vbase = NULL;
+ }
+
+ return rc;
+}
+
+static bool is_core_imc_mem_inited(int cpu)
+{
+ struct imc_mem_info *mem_info;
+ int core_id = (cpu / threads_per_core);
+
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ if (!mem_info->vbase)
+ return false;
+
+ return true;
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int ret = 0;
+
+ /* Get the cpumask for this core */
+ l_cpumask = cpu_sibling_mask(cpu);
+
+ /* If a cpu for this core is already set, then, don't do anything */
+ if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+ return 0;
+
+ if (!is_core_imc_mem_inited(cpu)) {
+ ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+ if (ret) {
+ pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+ return ret;
+ }
+ }
+
+ /* set the cpu in the mask */
+ cpumask_set_cpu(cpu, &core_imc_cpumask);
+ return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+ unsigned int ncpu, core_id;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * clear this cpu out of the mask, if not present in the mask,
+ * don't bother doing anything.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+ return 0;
+
+ /* Find any online cpu in that core except the current "cpu" */
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+ if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+ cpumask_set_cpu(ncpu, &core_imc_cpumask);
+ perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+ } else {
+ /*
+ * If this is the last cpu in this core then, skip taking refernce
+ * count mutex lock for this core and directly zero "refc" for
+ * this core.
+ */
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ core_id = cpu / threads_per_core;
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ }
+ return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+ "perf/powerpc/imc_core:online",
+ ppc_core_imc_cpu_online,
+ ppc_core_imc_cpu_offline);
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+ int rc, core_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+ /*
+ * See if we need to disable the IMC PMU.
+ * If no events are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * enable or disable the core counters.
+ */
+ core_id = event->cpu / threads_per_core;
+
+ /* Take the mutex lock and decrement the refernce count for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return;
+
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "core-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+}
+
+static int core_imc_event_init(struct perf_event *event)
+{
+ int core_id, rc;
+ u64 config = event->attr.config;
+ struct imc_mem_info *pcmi;
+ struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ /* unsupported modes and filters */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config (event offset) */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+ return -EINVAL;
+
+ if (!is_core_imc_mem_inited(event->cpu))
+ return -ENODEV;
+
+ core_id = event->cpu / threads_per_core;
+ pcmi = &core_imc_pmu->mem_info[core_id];
+ if ((!pcmi->vbase))
+ return -ENODEV;
+
+ /* Get the core_imc mutex for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ /*
+ * Core pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the mutex lock and enable the core counters.
+ * If not, just increment the count in core_imc_refc struct.
+ */
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ mutex_unlock(&ref->lock);
+ pr_err("core-imc: Unable to start the counters for core %d\n",
+ core_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
+ event->destroy = core_imc_counters_release;
+ return 0;
+}
+
+/*
+ * Allocates a page of memory for each of the online cpus, and write the
+ * physical base address of that page to the LDBAR for that cpu.
+ *
+ * LDBAR Register Layout:
+ *
+ * 0 4 8 12 16 20 24 28
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ Counter Address [8:50]
+ * | * Mode |
+ * | * PB Scope
+ * * Enable/Disable
+ *
+ * 32 36 40 44 48 52 56 60
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * Counter Address [8:50] ]
+ *
+ */
+static int thread_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ int phys_id = topology_physical_package_id(cpu_id);
+
+ if (!local_mem) {
+ /*
+ * This case could happen only once at start, since we dont
+ * free the memory in cpu offline path.
+ */
+ local_mem = page_address(alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+ get_order(size)));
+ if (!local_mem)
+ return -ENOMEM;
+
+ per_cpu(thread_imc_mem, cpu_id) = local_mem;
+ }
+
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+
+ mtspr(SPRN_LDBAR, ldbar_value);
+ return 0;
+}
+
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+ return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int thread_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ "perf/powerpc/imc_thread:online",
+ ppc_thread_imc_cpu_online,
+ ppc_thread_imc_cpu_offline);
+}
+
+void thread_imc_pmu_sched_task(struct perf_event_context *ctx,
+ bool sched_in)
+{
+ int core_id;
+ struct imc_pmu_ref *ref;
+
+ if (!is_core_imc_mem_inited(smp_processor_id()))
+ return;
+
+ core_id = smp_processor_id() / threads_per_core;
+ /*
+ * imc pmus are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the mutex lock and enable the counters.
+ * If not, just increment the count in ref count struct.
+ */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return;
+
+ if (sched_in) {
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to start the counter\
+ for core %d\n", core_id);
+ return;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+ } else {
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to stop the counters\
+ for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+ }
+
+ return;
+}
+
+static int thread_imc_event_init(struct perf_event *event)
+{
+ u32 config = event->attr.config;
+ struct task_struct *target;
+ struct imc_pmu *pmu;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config offset */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+ return -EINVAL;
+
+ target = event->hw.target;
+ if (!target)
+ return -EINVAL;
+
+ event->pmu->task_ctx_nr = perf_sw_context;
+ return 0;
+}
+
+static bool is_thread_imc_pmu(struct perf_event *event)
+{
+ if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
+ return true;
+
+ return false;
+}
+
+static u64 * get_event_base_addr(struct perf_event *event)
+{
+ u64 addr;
+
+ if (is_thread_imc_pmu(event)) {
+ addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
+ return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+ }
+
+ return (u64 *)event->hw.event_base;
+}
+
+static void thread_imc_pmu_start_txn(struct pmu *pmu,
+ unsigned int txn_flags)
+{
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+ perf_pmu_disable(pmu);
+}
+
+static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
+{
+ perf_pmu_enable(pmu);
+}
+
+static int thread_imc_pmu_commit_txn(struct pmu *pmu)
+{
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+static u64 imc_read_counter(struct perf_event *event)
+{
+ u64 *addr, data;
+
+ /*
+ * In-Memory Collection (IMC) counters are free flowing counters.
+ * So we take a snapshot of the counter value on enable and save it
+ * to calculate the delta at later stage to present the event counter
+ * value.
+ */
+ addr = get_event_base_addr(event);
+ data = be64_to_cpu(READ_ONCE(*addr));
+ local64_set(&event->hw.prev_count, data);
+
+ return data;
+}
+
+static void imc_event_update(struct perf_event *event)
+{
+ u64 counter_prev, counter_new, final_count;
+
+ counter_prev = local64_read(&event->hw.prev_count);
+ counter_new = imc_read_counter(event);
+ final_count = counter_new - counter_prev;
+
+ /* Update the delta to the event count */
+ local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+ /*
+ * In Memory Counters are free flowing counters. HW or the microcode
+ * keeps adding to the counter offset in memory. To get event
+ * counter value, we snapshot the value here and we calculate
+ * delta at later point.
+ */
+ imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ return 0;
+}
+
+static int thread_imc_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ /* Enable the sched_task to start the engine */
+ perf_sched_cb_inc(event->ctx->pmu);
+ return 0;
+}
+
+static void thread_imc_event_del(struct perf_event *event, int flags)
+{
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+ perf_sched_cb_dec(event->ctx->pmu);
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+ pmu->pmu.task_ctx_nr = perf_invalid_context;
+ pmu->pmu.add = imc_event_add;
+ pmu->pmu.del = imc_event_stop;
+ pmu->pmu.start = imc_event_start;
+ pmu->pmu.stop = imc_event_stop;
+ pmu->pmu.read = imc_event_update;
+ pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+
+ switch (pmu->domain) {
+ case IMC_DOMAIN_NEST:
+ pmu->pmu.event_init = nest_imc_event_init;
+ pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+ break;
+ case IMC_DOMAIN_CORE:
+ pmu->pmu.event_init = core_imc_event_init;
+ pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+ break;
+ case IMC_DOMAIN_THREAD:
+ pmu->pmu.event_init = thread_imc_event_init;
+ pmu->pmu.sched_task = thread_imc_pmu_sched_task;
+ pmu->pmu.add = thread_imc_event_add;
+ pmu->pmu.del = thread_imc_event_del;
+ pmu->pmu.start_txn = thread_imc_pmu_start_txn;
+ pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
+ pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
+static int init_nest_pmu_ref(void)
+{
+ int nid, i, cpu;
+
+ nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
+ GFP_KERNEL);
+
+ if (!nest_imc_refc)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_node(nid) {
+ /*
+ * Mutex lock to avoid races while tracking the number of
+ * sessions using the chip's nest pmu units.
+ */
+ mutex_init(&nest_imc_refc[i].lock);
+
+ /*
+ * Loop to init the "id" with the node_id. Variable "i" initialized to
+ * 0 and will be used as index to the array. "i" will not go off the
+ * end of the array since the "for_each_node" loops for "N_POSSIBLE"
+ * nodes only.
+ */
+ nest_imc_refc[i++].id = nid;
+ }
+
+ /*
+ * Loop to init the per_cpu "local_nest_imc_refc" with the proper
+ * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
+ */
+ for_each_possible_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+ for (i = 0; i < num_possible_nodes(); i++) {
+ if (nest_imc_refc[i].id == nid) {
+ per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void cleanup_all_core_imc_memory(void)
+{
+ int i, nr_cores = num_present_cpus() / threads_per_core;
+ struct imc_mem_info *ptr = core_imc_pmu->mem_info;
+ int size = core_imc_pmu->counter_mem_size;
+
+ /* mem_info will never be NULL */
+ for (i = 0; i < nr_cores; i++) {
+ if (ptr[i].vbase)
+ free_pages((u64)ptr->vbase, get_order(size));
+ }
+
+ kfree(ptr);
+ kfree(core_imc_refc);
+}
+
+static void thread_imc_ldbar_disable(void *dummy)
+{
+ /*
+ * By Zeroing LDBAR, we disable thread-imc
+ * updates.
+ */
+ mtspr(SPRN_LDBAR, 0);
+}
+
+void thread_imc_disable(void)
+{
+ on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
+}
+
+static void cleanup_all_thread_imc_memory(void)
+{
+ int i, order = get_order(thread_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(thread_imc_mem, i))
+ free_pages((u64)per_cpu(thread_imc_mem, i), order);
+
+ }
+}
+
+/*
+ * Common function to unregister cpu hotplug callback and
+ * free the memory.
+ * TODO: Need to handle pmu unregistering, which will be
+ * done in followup series.
+ */
+static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
+ mutex_lock(&nest_init_lock);
+ if (nest_pmus == 1) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
+ kfree(nest_imc_refc);
+ }
+
+ if (nest_pmus > 0)
+ nest_pmus--;
+ mutex_unlock(&nest_init_lock);
+ }
+
+ /* Free core_imc memory */
+ if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
+ cleanup_all_core_imc_memory();
+ }
+
+ /* Free thread_imc memory */
+ if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
+ cleanup_all_thread_imc_memory();
+ }
+
+ /* Only free the attr_groups which are dynamically allocated */
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ kfree(pmu_ptr);
+ return;
+}
+
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
+ int pmu_index)
+{
+ const char *s;
+ int nr_cores, cpu, res;
+
+ if (of_property_read_string(parent, "name", &s))
+ return -ENODEV;
+
+ switch (pmu_ptr->domain) {
+ case IMC_DOMAIN_NEST:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ /* Needed for hotplug/migration */
+ per_nest_pmu_arr[pmu_index] = pmu_ptr;
+ break;
+ case IMC_DOMAIN_CORE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = num_present_cpus() / threads_per_core;
+ pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
+ GFP_KERNEL);
+
+ if (!pmu_ptr->mem_info)
+ return -ENOMEM;
+
+ core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+
+ if (!core_imc_refc)
+ return -ENOMEM;
+
+ core_imc_pmu = pmu_ptr;
+ break;
+ case IMC_DOMAIN_THREAD:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ thread_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
+ if (res)
+ return res;
+ }
+
+ thread_imc_pmu = pmu_ptr;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @parent: Device tree unit node
+ * @pmu_ptr: memory allocated for this pmu
+ * @pmu_idx: Count of nest pmc registered
+ *
+ * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
+ * Handles failure cases and accordingly frees memory.
+ */
+int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
+{
+ int ret;
+
+ ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
+ if (ret)
+ goto err_free;
+
+ switch (pmu_ptr->domain) {
+ case IMC_DOMAIN_NEST:
+ /*
+ * Nest imc pmu need only one cpu per chip, we initialize the
+ * cpumask for the first nest imc pmu and use the same for the
+ * rest. To handle the cpuhotplug callback unregister, we track
+ * the number of nest pmus in "nest_pmus".
+ */
+ mutex_lock(&nest_init_lock);
+ if (nest_pmus == 0) {
+ ret = init_nest_pmu_ref();
+ if (ret) {
+ mutex_unlock(&nest_init_lock);
+ goto err_free;
+ }
+ /* Register for cpu hotplug notification. */
+ ret = nest_pmu_cpumask_init();
+ if (ret) {
+ mutex_unlock(&nest_init_lock);
+ goto err_free;
+ }
+ }
+ nest_pmus++;
+ mutex_unlock(&nest_init_lock);
+ break;
+ case IMC_DOMAIN_CORE:
+ ret = core_imc_pmu_cpumask_init();
+ if (ret) {
+ cleanup_all_core_imc_memory();
+ return ret;
+ }
+
+ break;
+ case IMC_DOMAIN_THREAD:
+ ret = thread_imc_cpu_init();
+ if (ret) {
+ cleanup_all_thread_imc_memory();
+ return ret;
+ }
+
+ break;
+ default:
+ return -1; /* Unknown domain */
+ }
+
+ ret = update_events_in_group(parent, pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = update_pmu_ops(pmu_ptr);
+ if (ret)
+ goto err_free;
+
+ ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+ if (ret)
+ goto err_free;
+
+ pr_info("%s performance monitor hardware support registered\n",
+ pmu_ptr->pmu.name);
+
+ return 0;
+
+err_free:
+ imc_common_cpuhp_mem_free(pmu_ptr);
+ return ret;
+}
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 3f3aa9a7063a..2efee3f196f5 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -99,7 +99,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
else
- *mmcra |= MMCRA_SDAR_MODE_TLB;
+ *mmcra |= MMCRA_SDAR_MODE_DCACHE;
} else
*mmcra |= MMCRA_SDAR_MODE_TLB;
}
@@ -488,8 +488,8 @@ static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int
return -1;
}
-int isa207_get_alternatives(u64 event, u64 alt[],
- const unsigned int ev_alt[][MAX_ALT], int size)
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+ const unsigned int ev_alt[][MAX_ALT])
{
int i, j, num_alt = 0;
u64 alt_event;
@@ -505,5 +505,30 @@ int isa207_get_alternatives(u64 event, u64 alt[],
}
}
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state, so PM_CYC is equivalent to
+ * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
+ */
+ j = num_alt;
+ for (i = 0; i < num_alt; ++i) {
+ switch (alt[i]) {
+ case 0x1e: /* PMC_CYC */
+ alt[j++] = 0x600f4; /* PM_RUN_CYC */
+ break;
+ case 0x600f4:
+ alt[j++] = 0x1e;
+ break;
+ case 0x2: /* PM_INST_CMPL */
+ alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
+ break;
+ case 0x500fa:
+ alt[j++] = 0x2;
+ break;
+ }
+ }
+ num_alt = j;
+ }
+
return num_alt;
}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 8acbe6e802c7..6c737d675792 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -247,6 +247,7 @@
#define MMCRA_SDAR_MODE_SHIFT 42
#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT)
#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_DCACHE (2ull << MMCRA_SDAR_MODE_SHIFT)
#define MMCRA_IFM_SHIFT 30
#define MMCRA_THR_CTR_MANT_SHIFT 19
#define MMCRA_THR_CTR_MANT_MASK 0x7Ful
@@ -287,8 +288,8 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], unsigned long mmcr[],
struct perf_event *pevents[]);
void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
-int isa207_get_alternatives(u64 event, u64 alt[],
- const unsigned int ev_alt[][MAX_ALT], int size);
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+ const unsigned int ev_alt[][MAX_ALT]);
void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
struct pt_regs *regs);
void isa207_get_mem_weight(u64 *weight);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5463516e369b..c9356955cab4 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -50,34 +50,11 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
- int i, j, num_alt = 0;
-
- num_alt = isa207_get_alternatives(event, alt, event_alternatives,
- (int)ARRAY_SIZE(event_alternatives));
- if (flags & PPMU_ONLY_COUNT_RUN) {
- /*
- * We're only counting in RUN state, so PM_CYC is equivalent to
- * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
- */
- j = num_alt;
- for (i = 0; i < num_alt; ++i) {
- switch (alt[i]) {
- case PM_CYC:
- alt[j++] = PM_RUN_CYC;
- break;
- case PM_RUN_CYC:
- alt[j++] = PM_CYC;
- break;
- case PM_INST_CMPL:
- alt[j++] = PM_RUN_INST_CMPL;
- break;
- case PM_RUN_INST_CMPL:
- alt[j++] = PM_INST_CMPL;
- break;
- }
- }
- num_alt = j;
- }
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(event_alternatives), flags,
+ event_alternatives);
return num_alt;
}
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 50689180a6c1..e99c6bf4d391 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -16,13 +16,16 @@ EVENT(PM_CYC, 0x0001e)
EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
EVENT(PM_CMPLU_STALL, 0x1e054)
EVENT(PM_INST_CMPL, 0x00002)
-EVENT(PM_BRU_CMPL, 0x4d05e)
+EVENT(PM_BR_CMPL, 0x4d05e)
EVENT(PM_BR_MPRED_CMPL, 0x400f6)
/* All L1 D cache load references counted at finish, gated by reject */
EVENT(PM_LD_REF_L1, 0x100fc)
/* Load Missed L1 */
EVENT(PM_LD_MISS_L1_FIN, 0x2c04e)
+EVENT(PM_LD_MISS_L1, 0x3e054)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT, 0x400f0)
/* Store Missed L1 */
EVENT(PM_ST_MISS_L1, 0x300f0)
/* L1 cache data prefetches */
@@ -62,3 +65,7 @@ EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
/* Alternate Branch event code */
EVENT(PM_BR_CMPL_ALT, 0x10012)
+/* Branch event that are not strongly biased */
+EVENT(PM_BR_2PATH, 0x20036)
+/* ALternate branch event that are not strongly biased */
+EVENT(PM_BR_2PATH_ALT, 0x40036)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 2280cf87ff9c..24b5b5b7a206 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -109,14 +109,17 @@ static const unsigned int power9_event_alternatives[][MAX_ALT] = {
{ PM_INST_DISP, PM_INST_DISP_ALT },
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
{ PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_BR_2PATH, PM_BR_2PATH_ALT },
};
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
int num_alt = 0;
- num_alt = isa207_get_alternatives(event, alt, power9_event_alternatives,
- (int)ARRAY_SIZE(power9_event_alternatives));
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(power9_event_alternatives), flags,
+ power9_event_alternatives);
return num_alt;
}
@@ -125,7 +128,7 @@ GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
-GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
@@ -143,7 +146,7 @@ CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
-CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
@@ -152,7 +155,7 @@ static struct attribute *power9_events_attr[] = {
GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
GENERIC_EVENT_PTR(PM_CMPLU_STALL),
GENERIC_EVENT_PTR(PM_INST_CMPL),
- GENERIC_EVENT_PTR(PM_BRU_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_CMPL),
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
@@ -169,7 +172,7 @@ static struct attribute *power9_events_attr[] = {
CACHE_EVENT_PTR(PM_L2_ST_MISS),
CACHE_EVENT_PTR(PM_L2_ST),
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
- CACHE_EVENT_PTR(PM_BRU_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
CACHE_EVENT_PTR(PM_DTLB_MISS),
CACHE_EVENT_PTR(PM_ITLB_MISS),
NULL
@@ -244,7 +247,7 @@ static int power9_generic_events[] = {
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
@@ -370,7 +373,7 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
},
[ C(BPU) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = PM_BRU_CMPL,
+ [ C(RESULT_ACCESS) ] = PM_BR_CMPL,
[ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
},
[ C(OP_WRITE) ] = {
@@ -459,8 +462,8 @@ static int __init init_power9_pmu(void)
* Power9 DD1 should use PM_BR_CMPL_ALT event code for
* "branches" to provide correct counter value.
*/
- EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT;
- EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT;
+ EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT;
+ EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT;
rc = register_power_pmu(&power9_isa207_pmu);
} else {
rc = register_power_pmu(&power9_pmu);
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 72b824160660..2c5651992369 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -1,6 +1,6 @@
-obj-$(CONFIG_44x) += misc_44x.o
+obj-y += misc_44x.o machine_check.o
ifneq ($(CONFIG_PPC4xx_CPM),y)
-obj-$(CONFIG_44x) += idle.o
+obj-y += idle.o
endif
obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
obj-$(CONFIG_EBONY) += ebony.o
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
new file mode 100644
index 000000000000..034d70d6d335
--- /dev/null
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -0,0 +1,89 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_440A(struct pt_regs *regs)
+{
+ unsigned long reason = regs->dsisr;
+
+ printk("Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP){
+ printk("Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ }
+ else {
+ u32 mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk("Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk("Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk("Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk("TLB Parity Error\n");
+ if (mcsr & MCSR_ICP){
+ flush_instruction_cache();
+ printk("I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk("D-Cache Search Parity Error\n");
+ if (mcsr & MCSR_DCFP)
+ printk("D-Cache Flush Parity Error\n");
+ if (mcsr & MCSR_IMPE)
+ printk("Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_PPC_47x
+int machine_check_47x(struct pt_regs *regs)
+{
+ unsigned long reason = regs->dsisr;
+ u32 mcsr;
+
+ printk(KERN_ERR "Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP) {
+ printk(KERN_ERR "Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ return 0;
+ }
+ mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk(KERN_ERR "Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk(KERN_ERR "Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk(KERN_ERR "Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk(KERN_ERR "TLB Parity Error\n");
+ if (mcsr & MCSR_ICP) {
+ flush_instruction_cache();
+ printk(KERN_ERR "I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk(KERN_ERR "D-Cache Search Parity Error\n");
+ if (mcsr & PPC47x_MCSR_GPR)
+ printk(KERN_ERR "GPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_FPR)
+ printk(KERN_ERR "FPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_IPR)
+ printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_47x */
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
new file mode 100644
index 000000000000..9779c32db34e
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -0,0 +1,8 @@
+obj-y += uic.o machine_check.o
+obj-$(CONFIG_PPC4xx_OCM) += ocm.o
+obj-$(CONFIG_4xx_SOC) += soc.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
+obj-$(CONFIG_PPC4xx_MSI) += msi.o
+obj-$(CONFIG_PPC4xx_CPM) += cpm.o
+obj-$(CONFIG_PPC4xx_GPIO) += gpio.o
diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/platforms/4xx/cpm.c
index ba95adf81d8d..53ff81ca8a3c 100644
--- a/arch/powerpc/sysdev/ppc4xx_cpm.c
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -240,7 +240,7 @@ static int cpm_suspend_enter(suspend_state_t state)
return 0;
}
-static struct platform_suspend_ops cpm_suspend_ops = {
+static const struct platform_suspend_ops cpm_suspend_ops = {
.valid = cpm_suspend_valid,
.enter = cpm_suspend_enter,
};
@@ -278,8 +278,8 @@ static int __init cpm_init(void)
dcr_len = dcr_resource_len(np, 0);
if (dcr_base == 0 || dcr_len == 0) {
- printk(KERN_ERR "cpm: could not parse dcr property for %s\n",
- np->full_name);
+ printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n",
+ np);
ret = -EINVAL;
goto node_put;
}
@@ -287,8 +287,8 @@ static int __init cpm_init(void)
cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
if (!DCR_MAP_OK(cpm.dcr_host)) {
- printk(KERN_ERR "cpm: failed to map dcr property for %s\n",
- np->full_name);
+ printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n",
+ np);
ret = -EINVAL;
goto node_put;
}
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/platforms/4xx/gpio.c
index 5382d04dd872..2238e369cde4 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/platforms/4xx/gpio.c
@@ -198,8 +198,7 @@ static int __init ppc4xx_add_gpiochips(void)
goto err;
continue;
err:
- pr_err("%s: registration failed with status %d\n",
- np->full_name, ret);
+ pr_err("%pOF: registration failed with status %d\n", np, ret);
kfree(ppc4xx_gc);
/* try others anyway */
}
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index 9926ad67af76..9926ad67af76 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
new file mode 100644
index 000000000000..aa039dfaf82f
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -0,0 +1,26 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_4xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->dsisr;
+
+ if (reason & ESR_IMCP) {
+ printk("Instruction");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ } else
+ printk("Data");
+ printk(" machine check in kernel mode.\n");
+
+ return 0;
+}
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/platforms/4xx/msi.c
index 590dab4f47d6..d50417e23add 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -233,8 +233,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
/* Get MSI ranges */
err = of_address_to_resource(dev->dev.of_node, 0, &res);
if (err) {
- dev_err(&dev->dev, "%s resource error!\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "%pOF resource error!\n", dev->dev.of_node);
goto error_out;
}
diff --git a/arch/powerpc/sysdev/ppc4xx_ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37f5ccb..85d9e37f5ccb 100644
--- a/arch/powerpc/sysdev/ppc4xx_ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/platforms/4xx/pci.c
index 086aca69ecae..73e6b36bcd51 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -32,7 +32,7 @@
#include <asm/dcr-regs.h>
#include <mm/mmu_decl.h>
-#include "ppc4xx_pci.h"
+#include "pci.h"
static int dma_offset_set;
@@ -127,9 +127,9 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
* within 32 bits space
*/
if (cpu_addr != 0 || pci_addr > 0xffffffff) {
- printk(KERN_WARNING "%s: Ignored unsupported dma range"
+ printk(KERN_WARNING "%pOF: Ignored unsupported dma range"
" 0x%016llx...0x%016llx -> 0x%016llx\n",
- hose->dn->full_name,
+ hose->dn,
pci_addr, pci_addr + size - 1, cpu_addr);
continue;
}
@@ -152,8 +152,7 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
/* We only support one global DMA offset */
if (dma_offset_set && pci_dram_offset != res->start) {
- printk(KERN_ERR "%s: dma-ranges(s) mismatch\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn);
return -ENXIO;
}
@@ -161,17 +160,16 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
* DMA bounce buffers
*/
if (size < total_memory) {
- printk(KERN_ERR "%s: dma-ranges too small "
+ printk(KERN_ERR "%pOF: dma-ranges too small "
"(size=%llx total_memory=%llx)\n",
- hose->dn->full_name, size, (u64)total_memory);
+ hose->dn, size, (u64)total_memory);
return -ENXIO;
}
/* Check we are a power of 2 size and that base is a multiple of size*/
if ((size & (size - 1)) != 0 ||
(res->start & (size - 1)) != 0) {
- printk(KERN_ERR "%s: dma-ranges unaligned\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn);
return -ENXIO;
}
@@ -181,8 +179,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
if (res->end > 0xffffffff &&
!(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
|| of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
- printk(KERN_ERR "%s: dma-ranges outside of 32 bits space\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n",
+ hose->dn);
return -ENXIO;
}
out:
@@ -233,8 +231,7 @@ static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller *hose,
*/
if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) ||
size < 0x1000 || (plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
return -1;
}
ma = (0xffffffffu << ilog2(size)) | 1;
@@ -266,8 +263,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 2) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
break;
}
@@ -292,8 +288,8 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
if (j <= 2 && !found_isa_hole && hose->isa_mem_size)
if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
}
static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose,
@@ -333,21 +329,20 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
/* Check if device is enabled */
if (!of_device_is_available(np)) {
- printk(KERN_INFO "%s: Port disabled via device-tree\n",
- np->full_name);
+ printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np);
return;
}
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &rsrc_cfg)) {
- printk(KERN_ERR "%s: Can't get PCI config register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI config register base !",
+ np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 3, &rsrc_reg)) {
- printk(KERN_ERR "%s: Can't get PCI internal register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI internal register base !",
+ np);
return;
}
@@ -361,7 +356,7 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
/* Map registers */
reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
if (reg == NULL) {
- printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
goto fail;
}
@@ -423,8 +418,8 @@ static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller *hose,
if (!is_power_of_2(size) || size < 0x1000 ||
(plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n",
+ hose->dn);
return -1;
}
@@ -467,8 +462,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 1) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
break;
}
@@ -493,8 +487,8 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
}
static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose,
@@ -539,14 +533,14 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &rsrc_cfg)) {
- printk(KERN_ERR "%s:Can't get PCI-X config register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-X config register base !",
+ np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 3, &rsrc_reg)) {
- printk(KERN_ERR "%s: Can't get PCI-X internal register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !",
+ np);
return;
}
@@ -568,7 +562,7 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
/* Map registers */
reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
if (reg == NULL) {
- printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
goto fail;
}
@@ -1246,8 +1240,8 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
if (mbase == NULL) {
- printk(KERN_ERR "%s: Can't map internal config space !",
- port->node->full_name);
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
goto done;
}
@@ -1389,7 +1383,7 @@ static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
port->index);
return;
}
-
+
while (timeout_ms--) {
val = in_le32(mbase + PECFG_TLDLP);
@@ -1448,8 +1442,7 @@ static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
#endif
if (ppc4xx_pciex_hwops == NULL) {
- printk(KERN_WARNING "PCIE: unknown host type %s\n",
- np->full_name);
+ printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np);
return -ENODEV;
}
@@ -1730,8 +1723,7 @@ static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port *port,
(index < 2 && size < 0x100000) ||
(index == 2 && size < 0x100) ||
(plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
return -1;
}
@@ -1807,8 +1799,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 1) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- port->node->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n",
+ port->node);
break;
}
@@ -1834,8 +1826,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
/* Configure IO, always 64K starting at 0. We hard wire it to 64K !
* Note also that it -has- to be region index 2 on this HW
@@ -1970,8 +1962,8 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
(hose->first_busno + 1) * 0x100000,
busses * 0x100000);
if (cfg_data == NULL) {
- printk(KERN_ERR "%s: Can't map external config space !",
- port->node->full_name);
+ printk(KERN_ERR "%pOF: Can't map external config space !",
+ port->node);
goto fail;
}
hose->cfg_data = cfg_data;
@@ -1982,13 +1974,13 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
*/
mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
if (mbase == NULL) {
- printk(KERN_ERR "%s: Can't map internal config space !",
- port->node->full_name);
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
goto fail;
}
hose->cfg_addr = mbase;
- pr_debug("PCIE %s, bus %d..%d\n", port->node->full_name,
+ pr_debug("PCIE %pOF, bus %d..%d\n", port->node,
hose->first_busno, hose->last_busno);
pr_debug(" config space mapped at: root @0x%p, other @0x%p\n",
hose->cfg_addr, hose->cfg_data);
@@ -2100,14 +2092,13 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
/* Get the port number from the device-tree */
pval = of_get_property(np, "port", NULL);
if (pval == NULL) {
- printk(KERN_ERR "PCIE: Can't find port number for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np);
return;
}
portno = *pval;
if (portno >= ppc4xx_pciex_port_count) {
- printk(KERN_ERR "PCIE: port number out of range for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: port number out of range for %pOF\n",
+ np);
return;
}
port = &ppc4xx_pciex_ports[portno];
@@ -2125,8 +2116,8 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
if (ppc4xx_pciex_hwops->want_sdr) {
pval = of_get_property(np, "sdr-base", NULL);
if (pval == NULL) {
- printk(KERN_ERR "PCIE: missing sdr-base for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n",
+ np);
return;
}
port->sdr_base = *pval;
@@ -2142,29 +2133,26 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
} else if (!strcmp(val, "pci")) {
port->endpoint = 0;
} else {
- printk(KERN_ERR "PCIE: missing or incorrect device_type for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n",
+ np);
return;
}
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &port->cfg_space)) {
- printk(KERN_ERR "%s: Can't get PCI-E config space !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 1, &port->utl_regs)) {
- printk(KERN_ERR "%s: Can't get UTL register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get UTL register base !", np);
return;
}
/* Map DCRs */
dcrs = dcr_resource_start(np, 0);
if (dcrs == 0) {
- printk(KERN_ERR "%s: Can't get DCR register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
return;
}
port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.h b/arch/powerpc/platforms/4xx/pci.h
index bb4821938ab1..bb4821938ab1 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.h
+++ b/arch/powerpc/platforms/4xx/pci.h
diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/platforms/4xx/soc.c
index d41134d2f786..5e36508b2a70 100644
--- a/arch/powerpc/sysdev/ppc4xx_soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -90,7 +90,7 @@ static int __init ppc4xx_l2c_probe(void)
/* Get l2 cache size */
prop = of_get_property(np, "cache-size", NULL);
if (prop == NULL) {
- printk(KERN_ERR "%s: Can't get cache-size!\n", np->full_name);
+ printk(KERN_ERR "%pOF: Can't get cache-size!\n", np);
of_node_put(np);
return -ENODEV;
}
@@ -99,8 +99,7 @@ static int __init ppc4xx_l2c_probe(void)
/* Map DCRs */
dcrreg = of_get_property(np, "dcr-reg", &len);
if (!dcrreg || (len != 4 * sizeof(u32))) {
- printk(KERN_ERR "%s: Can't get DCR register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
of_node_put(np);
return -ENODEV;
}
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/platforms/4xx/uic.c
index a00949f3e378..8b4dd0da0839 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -243,16 +243,16 @@ static struct uic * __init uic_init_one(struct device_node *node)
raw_spin_lock_init(&uic->lock);
indexp = of_get_property(node, "cell-index", &len);
if (!indexp || (len != sizeof(u32))) {
- printk(KERN_ERR "uic: Device node %s has missing or invalid "
- "cell-index property\n", node->full_name);
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "cell-index property\n", node);
return NULL;
}
uic->index = *indexp;
dcrreg = of_get_property(node, "dcr-reg", &len);
if (!dcrreg || (len != 2*sizeof(u32))) {
- printk(KERN_ERR "uic: Device node %s has missing or invalid "
- "dcr-reg property\n", node->full_name);
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "dcr-reg property\n", node);
return NULL;
}
uic->dcrbase = *dcrreg;
@@ -292,7 +292,7 @@ void __init uic_init_tree(void)
* top-level interrupt controller */
primary_uic = uic_init_one(np);
if (!primary_uic)
- panic("Unable to initialize primary UIC %s\n", np->full_name);
+ panic("Unable to initialize primary UIC %pOF\n", np);
irq_set_default_host(primary_uic->irqhost);
of_node_put(np);
@@ -306,8 +306,8 @@ void __init uic_init_tree(void)
uic = uic_init_one(np);
if (! uic)
- panic("Unable to initialize a secondary UIC %s\n",
- np->full_name);
+ panic("Unable to initialize a secondary UIC %pOF\n",
+ np);
cascade_virq = irq_of_parse_and_map(np, 0);
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index add5a5374fa0..b3097fe6441b 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -363,7 +363,7 @@ static int get_cpmf_mult_x2(void)
*/
/* applies to the IPS_DIV, and PCI_DIV values */
-static struct clk_div_table divtab_2346[] = {
+static const struct clk_div_table divtab_2346[] = {
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
{ .val = 4, .div = 4, },
@@ -372,7 +372,7 @@ static struct clk_div_table divtab_2346[] = {
};
/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
-static struct clk_div_table divtab_1234[] = {
+static const struct clk_div_table divtab_1234[] = {
{ .val = 1, .div = 1, },
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6b4f4cb7009a..f99e79ee060e 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
if (fp)
return *fp;
- pr_warning("no %s property in %s node, defaulting to %d\n",
- prop_name, np->full_name, DEFAULT_FIFO_SIZE);
+ pr_warning("no %s property in %pOF node, defaulting to %d\n",
+ prop_name, np, DEFAULT_FIFO_SIZE);
return DEFAULT_FIFO_SIZE;
}
@@ -426,15 +426,15 @@ static void __init mpc512x_psc_fifo_init(void)
psc = of_iomap(np, 0);
if (!psc) {
- pr_err("%s: Can't map %s device\n",
- __func__, np->full_name);
+ pr_err("%s: Can't map %pOF device\n",
+ __func__, np);
continue;
}
/* FIFO space is 4KiB, check if requested size is available */
if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
- pr_err("%s: no fifo space available for %s\n",
- __func__, np->full_name);
+ pr_err("%s: no fifo space available for %pOF\n",
+ __func__, np);
iounmap(psc);
/*
* chances are that another device requests less
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 39b49822ace1..1ecbf176d35a 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -99,7 +99,7 @@ static void __init efika_pcisetup(void)
bus_range = of_get_property(pcictrl, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
printk(KERN_WARNING EFIKA_PLATFORM_NAME
- ": Can't get bus-range for %s\n", pcictrl->full_name);
+ ": Can't get bus-range for %pOF\n", pcictrl);
goto out_put;
}
@@ -109,14 +109,14 @@ static void __init efika_pcisetup(void)
else
printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
bus_range[0], bus_range[1]);
- printk(" controlled by %s\n", pcictrl->full_name);
+ printk(" controlled by %pOF\n", pcictrl);
printk("\n");
hose = pcibios_alloc_controller(pcictrl);
if (!hose) {
printk(KERN_WARNING EFIKA_PLATFORM_NAME
- ": Can't allocate PCI controller structure for %s\n",
- pcictrl->full_name);
+ ": Can't allocate PCI controller structure for %pOF\n",
+ pcictrl);
goto out_put;
}
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index a3227040cc86..1fcab233d2f2 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -156,7 +156,7 @@ static void __init media5200_init_irq(void)
fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
if (!fpga_np)
goto out;
- pr_debug("%s: found fpga node: %s\n", __func__, fpga_np->full_name);
+ pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np);
media5200_irq.regs = of_iomap(fpga_np, 0);
if (!media5200_irq.regs)
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 22645a7c6b8a..9e974b1e1697 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -226,7 +226,7 @@ static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
if ((intsize < 1) || (intspec[0] > 3)) {
- dev_err(gpt->dev, "bad irq specifier in %s\n", ct->full_name);
+ dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct);
return -EINVAL;
}
@@ -331,7 +331,7 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
if (!of_find_property(node, "gpio-controller", NULL))
return;
- gpt->gc.label = kstrdup(node->full_name, GFP_KERNEL);
+ gpt->gc.label = kasprintf(GFP_KERNEL, "%pOF", node);
if (!gpt->gc.label) {
dev_err(gpt->dev, "out of memory\n");
return;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index 00282c2b0cae..af0f79995214 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -369,19 +369,19 @@ mpc52xx_add_bridge(struct device_node *node)
const int *bus_range;
struct resource rsrc;
- pr_debug("Adding MPC52xx PCI host bridge %s\n", node->full_name);
+ pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node);
pci_add_flags(PCI_REASSIGN_ALL_BUS);
if (of_address_to_resource(node, 0, &rsrc) != 0) {
- printk(KERN_ERR "Can't get %s resources\n", node->full_name);
+ printk(KERN_ERR "Can't get %pOF resources\n", node);
return -EINVAL;
}
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get %s bus-range, assume bus 0\n",
- node->full_name);
+ printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n",
+ node);
bus_range = NULL;
}
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 63c5ab6489c9..96bb55ca61d3 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -128,7 +128,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
return -ENODEV;
gc->owner = THIS_MODULE;
- gc->label = np->full_name;
+ gc->label = kasprintf(GFP_KERNEL, "%pOF", np);
gc->can_sleep = 1;
gc->ngpio = MCU_NUM_GPIO;
gc->base = -1;
@@ -141,6 +141,7 @@ static int mcu_gpiochip_add(struct mcu *mcu)
static int mcu_gpiochip_remove(struct mcu *mcu)
{
+ kfree(mcu->gc.label);
gpiochip_remove(&mcu->gc);
return 0;
}
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 763ffca9628d..a4539c5accb0 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -113,7 +113,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
unreg:
platform_device_del(pdev);
err:
- pr_err("%s: registration failed\n", np->full_name);
+ pr_err("%pOF: registration failed\n", np);
next:
i++;
}
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 978b85bb3233..7fa3e197871a 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -361,7 +361,7 @@ static int pmc_probe(struct platform_device *ofdev)
return -EBUSY;
}
- pmc_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+ pmc_regs = ioremap(res.start, sizeof(*pmc_regs));
if (!pmc_regs) {
ret = -ENOMEM;
@@ -374,7 +374,7 @@ static int pmc_probe(struct platform_device *ofdev)
goto out_pmc;
}
- clock_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+ clock_regs = ioremap(res.start, sizeof(*clock_regs));
if (!clock_regs) {
ret = -ENOMEM;
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 0908abd7e36f..9fb57f78cdbe 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -508,8 +508,8 @@ static void __init p1022_ds_setup_arch(void)
* allocate one static local variable for each
* call to this function.
*/
- pr_info("p1022ds: disabling %s node",
- np2->full_name);
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
of_update_property(np2, &nor_status);
of_node_put(np2);
}
@@ -524,8 +524,8 @@ static void __init p1022_ds_setup_arch(void)
.length = sizeof("disabled"),
};
- pr_info("p1022ds: disabling %s node",
- np2->full_name);
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
of_update_property(np2, &nand_status);
of_node_put(np2);
}
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index cd6ce845f398..77e618dce4a8 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -100,8 +100,8 @@ static void xes_mpc85xx_fixups(void)
err = of_address_to_resource(np, 0, &r[0]);
if (err) {
printk(KERN_WARNING "xes_mpc85xx: Could not get "
- "resource for device tree node '%s'",
- np->full_name);
+ "resource for device tree node '%pOF'",
+ np);
continue;
}
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 80cbcb0ad9b1..536b0c5d5ce3 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -5,7 +5,6 @@ config CPM1
choice
prompt "8xx Machine Type"
depends on PPC_8xx
- depends on 8xx
default MPC885ADS
config MPC8XXFADS
@@ -92,7 +91,7 @@ endmenu
#
menu "MPC8xx CPM Options"
- depends on 8xx
+ depends on PPC_8xx
# This doesn't really belong here, but it is convenient to ask
# 8xx specific questions.
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
index 76a81c3350a8..f9af3218bd9c 100644
--- a/arch/powerpc/platforms/8xx/Makefile
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -1,7 +1,7 @@
#
# Makefile for the PowerPC 8xx linux kernel.
#
-obj-$(CONFIG_PPC_8xx) += m8xx_setup.o
+obj-y += m8xx_setup.o machine_check.o pic.o
obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o
obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o
obj-$(CONFIG_PPC_EP88XC) += ep88xc.o
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index f81069f79a94..1917d69f84df 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -23,7 +23,7 @@
#include <asm/fs_pd.h>
#include <mm/mmu_decl.h>
-#include <sysdev/mpc8xx_pic.h>
+#include "pic.h"
#include "mpc8xx.h"
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
new file mode 100644
index 000000000000..402016705a39
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -0,0 +1,37 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_8xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->msr;
+
+ pr_err("Machine check in kernel mode.\n");
+ pr_err("Caused by (from SRR1=%lx): ", reason);
+ if (reason & 0x40000000)
+ pr_err("Fetch error at address %lx\n", regs->nip);
+ else
+ pr_err("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+ /* the qspan pci read routines can cause machine checks -- Cort
+ *
+ * yuck !!! that totally needs to go away ! There are better ways
+ * to deal with that than having a wart in the mcheck handler.
+ * -- BenH
+ */
+ bad_page_fault(regs, regs->dar, SIGBUS);
+ return 1;
+#else
+ return 0;
+#endif
+}
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/platforms/8xx/pic.c
index 2842f9d63d21..8d5a25d43ef3 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -9,7 +9,7 @@
#include <asm/io.h>
#include <asm/8xx_immap.h>
-#include "mpc8xx_pic.h"
+#include "pic.h"
#define PIC_VEC_SPURRIOUS 15
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.h b/arch/powerpc/platforms/8xx/pic.h
index 9fe00eebdc8b..9fe00eebdc8b 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.h
+++ b/arch/powerpc/platforms/8xx/pic.h
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 2f629e0551e9..13663efc1d31 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -32,7 +32,6 @@ config PPC_85xx
config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
- select 8xx
select PPC_LIB_RHEAP
select SYS_SUPPORTS_HUGETLBFS
@@ -149,10 +148,6 @@ config 6xx
depends on PPC32 && PPC_BOOK3S
select PPC_HAVE_PMU_SUPPORT
-# this is temp to handle compat with arch=ppc
-config 8xx
- bool
-
config E500
select FSL_EMB_PERFMON
select PPC_FSL_BOOK3E
@@ -271,44 +266,6 @@ config VSX
If in doubt, say Y here.
-config PPC_ICSWX
- bool "Support for PowerPC icswx coprocessor instruction"
- depends on PPC_BOOK3S_64
- default n
- ---help---
-
- This option enables kernel support for the PowerPC Initiate
- Coprocessor Store Word (icswx) coprocessor instruction on POWER7
- and POWER8 processors. POWER9 uses new copy/paste instructions
- to invoke the coprocessor.
-
- This option is only useful if you have a processor that supports
- the icswx coprocessor instruction. It does not have any effect
- on processors without the icswx coprocessor instruction.
-
- This option slightly increases kernel memory usage.
-
- If in doubt, say N here.
-
-config PPC_ICSWX_PID
- bool "icswx requires direct PID management"
- depends on PPC_ICSWX
- default y
- ---help---
- The PID register in server is used explicitly for ICSWX. In
- embedded systems PID management is done by the system.
-
-config PPC_ICSWX_USE_SIGILL
- bool "Should a bad CT cause a SIGILL?"
- depends on PPC_ICSWX
- default n
- ---help---
- Should a bad CT used for "non-record form ICSWX" cause an
- illegal instruction signal or should it be silent as
- architected.
-
- If in doubt, say N here.
-
config SPE_POSSIBLE
def_bool y
depends on E200 || (E500 && !PPC_E500MC)
@@ -413,7 +370,7 @@ config NR_CPUS
config NOT_COHERENT_CACHE
bool
- depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
+ depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
default n if PPC_47x
default y
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 469ef170d218..d7a55ecfaee5 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o
obj-$(CONFIG_PPC_PMAC) += powermac/
obj-$(CONFIG_PPC_CHRP) += chrp/
+obj-$(CONFIG_4xx) += 4xx/
obj-$(CONFIG_40x) += 40x/
obj-$(CONFIG_44x) += 44x/
obj-$(CONFIG_PPC_MPC512x) += 512x/
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 45cb9821173c..b9d466cc2b8a 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -40,7 +40,7 @@ static int __init amigaone_add_bridge(struct device_node *dev)
const int *bus_range;
struct pci_controller *hose;
- printk(KERN_INFO "Adding PCI host bridge %s\n", dev->full_name);
+ printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev);
cfg_addr = of_get_address(dev, 0, NULL, NULL);
cfg_data = of_get_address(dev, 1, NULL, NULL);
@@ -49,8 +49,8 @@ static int __init amigaone_add_bridge(struct device_node *dev)
bus_range = of_get_property(dev, "bus-range", &len);
if ((bus_range == NULL) || (len < 2 * sizeof(int)))
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
hose = pcibios_alloc_controller(dev);
if (hose == NULL)
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 8d3ae2cc52bf..6ea3f248b155 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -187,8 +187,8 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev)
irq_domain = irq_find_host(dn);
if (!irq_domain) {
- dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
- dn->full_name);
+ dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n",
+ dn);
goto out_error;
}
@@ -326,8 +326,8 @@ static void axon_msi_shutdown(struct platform_device *device)
struct axon_msic *msic = dev_get_drvdata(&device->dev);
u32 tmp;
- pr_devel("axon_msi: disabling %s\n",
- irq_domain_get_of_node(msic->irq_domain)->full_name);
+ pr_devel("axon_msi: disabling %pOF\n",
+ irq_domain_get_of_node(msic->irq_domain));
tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
@@ -340,12 +340,12 @@ static int axon_msi_probe(struct platform_device *device)
unsigned int virq;
int dcr_base, dcr_len;
- pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+ pr_devel("axon_msi: setting up dn %pOF\n", dn);
msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
if (!msic) {
- printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
- dn->full_name);
+ printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
+ dn);
goto out;
}
@@ -354,30 +354,30 @@ static int axon_msi_probe(struct platform_device *device)
if (dcr_base == 0 || dcr_len == 0) {
printk(KERN_ERR
- "axon_msi: couldn't parse dcr properties on %s\n",
- dn->full_name);
+ "axon_msi: couldn't parse dcr properties on %pOF\n",
+ dn);
goto out_free_msic;
}
msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
if (!DCR_MAP_OK(msic->dcr_host)) {
- printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
- dn->full_name);
+ printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n",
+ dn);
goto out_free_msic;
}
msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
&msic->fifo_phys, GFP_KERNEL);
if (!msic->fifo_virt) {
- printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
- dn->full_name);
+ printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n",
+ dn);
goto out_free_msic;
}
virq = irq_of_parse_and_map(dn, 0);
if (!virq) {
- printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
- dn->full_name);
+ printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n",
+ dn);
goto out_free_fifo;
}
memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
@@ -385,8 +385,8 @@ static int axon_msi_probe(struct platform_device *device)
/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
if (!msic->irq_domain) {
- printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
- dn->full_name);
+ printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n",
+ dn);
goto out_free_fifo;
}
@@ -412,7 +412,7 @@ static int axon_msi_probe(struct platform_device *device)
axon_msi_debug_setup(dn, msic);
- printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+ printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn);
return 0;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 871d38479a25..6fc85e29dc08 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -303,8 +303,8 @@ static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
iic->node = of_node_get(node);
out_be64(&iic->regs->prio, 0);
- printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
- hw_cpu, iic->target_id, node->full_name);
+ printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n",
+ hw_cpu, iic->target_id, node);
}
static int __init setup_iic(void)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 29d4f96ed33e..4b91ad08eefd 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -278,8 +278,8 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base)
if (of_node_to_nid(np) != nid)
continue;
if (of_address_to_resource(np, 0, &r)) {
- printk(KERN_ERR "iommu: can't get address for %s\n",
- np->full_name);
+ printk(KERN_ERR "iommu: can't get address for %pOF\n",
+ np);
continue;
}
*base = r.start;
@@ -458,8 +458,8 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np)
ioid = of_get_property(np, "ioid", NULL);
if (ioid == NULL) {
- printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
- np->full_name);
+ printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n",
+ np);
return 0;
}
@@ -559,8 +559,8 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
*/
iommu = cell_iommu_for_node(dev_to_node(dev));
if (iommu == NULL || list_empty(&iommu->windows)) {
- dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
- of_node_full_name(dev->of_node), dev_to_node(dev));
+ dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n",
+ dev->of_node, dev_to_node(dev));
return NULL;
}
window = list_entry(iommu->windows.next, struct iommu_window, list);
@@ -720,12 +720,12 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
/* Get node ID */
nid = of_node_to_nid(np);
if (nid < 0) {
- printk(KERN_ERR "iommu: failed to get node for %s\n",
- np->full_name);
+ printk(KERN_ERR "iommu: failed to get node for %pOF\n",
+ np);
return NULL;
}
- pr_debug("iommu: setting up iommu for node %d (%s)\n",
- nid, np->full_name);
+ pr_debug("iommu: setting up iommu for node %d (%pOF)\n",
+ nid, np);
/* XXX todo: If we can have multiple windows on the same IOMMU, which
* isn't the case today, we probably want here to check whether the
@@ -736,8 +736,8 @@ static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
*/
if (cbe_nr_iommus >= NR_IOMMUS) {
- printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
- np->full_name);
+ printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n",
+ np);
return NULL;
}
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 460ab392f0e7..2f704afe9af3 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -196,8 +196,8 @@ static int __init cbe_ptcal_enable(void)
for_each_node_by_type(np, "cpu") {
const u32 *nid = of_get_property(np, "node-id", NULL);
if (!nid) {
- printk(KERN_ERR "%s: node %s is missing node-id?\n",
- __func__, np->full_name);
+ printk(KERN_ERR "%s: node %pOF is missing node-id?\n",
+ __func__, np);
continue;
}
cbe_ptcal_enable_on_node(*nid, order);
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index f1f7878893f3..d1e61e273e64 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -130,8 +130,8 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
struct resource r;
unsigned long offset = (unsigned long)data;
- pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
- np->full_name);
+ pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
+ np);
priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
if (!priv) {
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index ff924af00e78..aa44bfc46467 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -323,8 +323,8 @@ static void __init spider_init_one(struct device_node *of_node, int chip,
irq_set_handler_data(virq, pic);
irq_set_chained_handler(virq, spider_irq_cascade);
- printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
- pic->node_id, addr, of_node->full_name);
+ printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n",
+ pic->node_id, addr, of_node);
/* Enable the interrupt detection enable bit. Do this last! */
out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 672d310dcf14..f636ee22b203 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -191,8 +191,8 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
goto err;
}
ret = -EINVAL;
- pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0],
- oirq.np->full_name);
+ pr_debug(" irq %d no 0x%x on %pOF\n", i, oirq.args[0],
+ oirq.np);
spu->irqs[i] = irq_create_of_mapping(&oirq);
if (!spu->irqs[i]) {
pr_debug("spu_new: failed to map it !\n");
@@ -243,32 +243,32 @@ static int __init spu_map_device(struct spu *spu)
ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
&spu->local_store_phys);
if (ret) {
- pr_debug("spu_new: failed to map %s resource 0\n",
- np->full_name);
+ pr_debug("spu_new: failed to map %pOF resource 0\n",
+ np);
goto out;
}
ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
&spu->problem_phys);
if (ret) {
- pr_debug("spu_new: failed to map %s resource 1\n",
- np->full_name);
+ pr_debug("spu_new: failed to map %pOF resource 1\n",
+ np);
goto out_unmap;
}
ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
if (ret) {
- pr_debug("spu_new: failed to map %s resource 2\n",
- np->full_name);
+ pr_debug("spu_new: failed to map %pOF resource 2\n",
+ np);
goto out_unmap;
}
if (!firmware_has_feature(FW_FEATURE_LPAR))
ret = spu_map_resource(spu, 3,
(void __iomem**)&spu->priv1, NULL);
if (ret) {
- pr_debug("spu_new: failed to map %s resource 3\n",
- np->full_name);
+ pr_debug("spu_new: failed to map %pOF resource 3\n",
+ np);
goto out_unmap;
}
- pr_debug("spu_new: %s maps:\n", np->full_name);
+ pr_debug("spu_new: %pOF maps:\n", np);
pr_debug(" local store : 0x%016lx -> 0x%p\n",
spu->local_store_phys, spu->local_store);
pr_debug(" problem state : 0x%016lx -> 0x%p\n",
@@ -316,8 +316,8 @@ static int __init of_create_spu(struct spu *spu, void *data)
spu->node = of_node_to_nid(spe);
if (spu->node >= MAX_NUMNODES) {
- printk(KERN_WARNING "SPE %s on node %d ignored,"
- " node number too big\n", spe->full_name, spu->node);
+ printk(KERN_WARNING "SPE %pOF on node %d ignored,"
+ " node number too big\n", spe, spu->node);
printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
ret = -ENODEV;
goto out;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ae2f740a82f1..5ffcdeb1eb17 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1749,7 +1749,7 @@ out:
static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file_inode(file);
- int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ int err = file_write_and_wait_range(file, start, end);
if (!err) {
inode_lock(inode);
err = spufs_mfc_flush(file, NULL);
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 1b87e198faa7..27264794f5c0 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -235,14 +235,14 @@ chrp_find_bridges(void)
++index;
/* The GG2 bridge on the LongTrail doesn't have an address */
if (of_address_to_resource(dev, 0, &r) && !is_longtrail) {
- printk(KERN_WARNING "Can't use %s: no address\n",
- dev->full_name);
+ printk(KERN_WARNING "Can't use %pOF: no address\n",
+ dev);
continue;
}
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s\n",
- dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+ dev);
continue;
}
if (bus_range[1] == bus_range[0])
@@ -250,15 +250,15 @@ chrp_find_bridges(void)
else
printk(KERN_INFO "PCI buses %d..%d",
bus_range[0], bus_range[1]);
- printk(" controlled by %s", dev->full_name);
+ printk(" controlled by %pOF", dev);
if (!is_longtrail)
printk(" at %llx", (unsigned long long)r.start);
printk("\n");
hose = pcibios_alloc_controller(dev);
if (!hose) {
- printk("Can't allocate PCI controller structure for %s\n",
- dev->full_name);
+ printk("Can't allocate PCI controller structure for %pOF\n",
+ dev);
continue;
}
hose->first_busno = hose->self_busno = bus_range[0];
@@ -297,8 +297,8 @@ chrp_find_bridges(void)
}
}
} else {
- printk("No methods for %s (model %s), using RTAS\n",
- dev->full_name, model);
+ printk("No methods for %pOF (model %s), using RTAS\n",
+ dev, model);
hose->ops = &rtas_pci_ops;
}
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
index 2b4dc6abde6c..19760712b39d 100644
--- a/arch/powerpc/platforms/chrp/pegasos_eth.c
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -63,7 +63,7 @@ static struct platform_device mv643xx_eth_mvmdio_device = {
.name = "orion-mdio",
.id = -1,
.num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources),
- .resource = mv643xx_eth_shared_resources,
+ .resource = mv643xx_eth_mvmdio_resources,
};
static struct resource mv643xx_eth_port1_resources[] = {
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
index f29cf29b11f8..f514d5d28cd4 100644
--- a/arch/powerpc/platforms/embedded6xx/linkstation.c
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -41,12 +41,12 @@ static int __init linkstation_add_bridge(struct device_node *dev)
struct pci_controller *hose;
const int *bus_range;
- printk("Adding PCI host bridge %s\n", dev->full_name);
+ printk("Adding PCI host bridge %pOF\n", dev);
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int))
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
hose = pcibios_alloc_controller(dev);
if (hose == NULL)
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
index 8e3590941960..273dfa3f0252 100644
--- a/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -115,7 +115,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev)
struct pci_controller *hose;
unsigned short devid;
- pr_info("Adding PCI host bridge %s\n", dev->full_name);
+ pr_info("Adding PCI host bridge %pOF\n", dev);
bus_range = of_get_property(dev, "bus-range", &len);
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index 471a50bcd074..ed1914dd34bb 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -44,7 +44,7 @@ static int __init storcenter_add_bridge(struct device_node *dev)
struct pci_controller *hose;
const int *bus_range;
- printk("Adding PCI host bridge %s\n", dev->full_name);
+ printk("Adding PCI host bridge %pOF\n", dev);
hose = pcibios_alloc_controller(dev);
if (hose == NULL)
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index 69794d9389c2..e3821379e86f 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -73,8 +73,8 @@ static void __init fixup_bus_range(struct device_node *bridge)
/* Lookup the "bus-range" property for the hose */
prop = of_find_property(bridge, "bus-range", &len);
if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s\n",
- bridge->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+ bridge);
return;
}
bus_range = prop->value;
@@ -498,12 +498,12 @@ static int __init maple_add_bridge(struct device_node *dev)
const int *bus_range;
int primary = 1;
- DBG("Adding PCI host bridge %s\n", dev->full_name);
+ DBG("Adding PCI host bridge %pOF\n", dev);
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
- dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n",
+ dev);
}
hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index 10c4e8fc6ea9..5ff6108f19e9 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -193,7 +193,7 @@ static int __init pas_add_bridge(struct device_node *dev)
{
struct pci_controller *hose;
- pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+ pr_debug("Adding PCI host bridge %pOF\n", dev);
hose = pcibios_alloc_controller(dev);
if (!hose)
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 1e02328c3f2d..9e3f39d36e88 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2658,25 +2658,25 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
if (i >= MAX_MACIO_CHIPS) {
printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
- printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name);
+ printk(KERN_ERR "pmac_feature: %pOF skipped\n", node);
return;
}
addrp = of_get_pci_address(node, 0, &size, NULL);
if (addrp == NULL) {
- printk(KERN_ERR "pmac_feature: %s: can't find base !\n",
- node->full_name);
+ printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n",
+ node);
return;
}
addr = of_translate_address(node, addrp);
if (addr == 0) {
- printk(KERN_ERR "pmac_feature: %s, can't translate base !\n",
- node->full_name);
+ printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n",
+ node);
return;
}
base = ioremap(addr, (unsigned long)size);
if (!base) {
- printk(KERN_ERR "pmac_feature: %s, can't map mac-io chip !\n",
- node->full_name);
+ printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n",
+ node);
return;
}
if (type == macio_keylargo || type == macio_keylargo2) {
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index f627c9fd7b48..70183eb3d5c8 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -494,8 +494,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
if (host == NULL) {
- printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
- np->full_name);
+ printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
+ np);
return NULL;
}
@@ -505,8 +505,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
*/
addrp = of_get_property(np, "AAPL,address", NULL);
if (addrp == NULL) {
- printk(KERN_ERR "low_i2c: Can't find address for %s\n",
- np->full_name);
+ printk(KERN_ERR "low_i2c: Can't find address for %pOF\n",
+ np);
kfree(host);
return NULL;
}
@@ -538,13 +538,13 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
host->irq = irq_of_parse_and_map(np, 0);
if (!host->irq)
printk(KERN_WARNING
- "low_i2c: Failed to map interrupt for %s\n",
- np->full_name);
+ "low_i2c: Failed to map interrupt for %pOF\n",
+ np);
host->base = ioremap((*addrp), 0x1000);
if (host->base == NULL) {
- printk(KERN_ERR "low_i2c: Can't map registers for %s\n",
- np->full_name);
+ printk(KERN_ERR "low_i2c: Can't map registers for %pOF\n",
+ np);
kfree(host);
return NULL;
}
@@ -560,8 +560,8 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
"keywest i2c", host))
host->irq = 0;
- printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n",
- *addrp, host->irq, np->full_name);
+ printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %pOF\n",
+ *addrp, host->irq, np);
return host;
}
@@ -798,7 +798,7 @@ static void __init pmu_i2c_probe(void)
if (busnode == NULL)
return;
- printk(KERN_INFO "PMU i2c %s\n", busnode->full_name);
+ printk(KERN_INFO "PMU i2c %pOF\n", busnode);
/*
* We add bus 1 and 2 only for now, bus 0 is "special"
@@ -913,7 +913,7 @@ static void __init smu_i2c_probe(void)
if (controller == NULL)
return;
- printk(KERN_INFO "SMU i2c %s\n", controller->full_name);
+ printk(KERN_INFO "SMU i2c %pOF\n", controller);
/* Look for childs, note that they might not be of the right
* type as older device trees mix i2c busses and other things
@@ -945,8 +945,8 @@ static void __init smu_i2c_probe(void)
bus->flags = 0;
list_add(&bus->link, &pmac_i2c_busses);
- printk(KERN_INFO " channel %x bus %s\n",
- bus->channel, busnode->full_name);
+ printk(KERN_INFO " channel %x bus %pOF\n",
+ bus->channel, busnode);
}
}
@@ -1129,7 +1129,7 @@ int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode)
*/
if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) {
printk(KERN_ERR "low_i2c: Invalid mode %d requested on"
- " bus %s !\n", mode, bus->busnode->full_name);
+ " bus %pOF !\n", mode, bus->busnode);
return -EINVAL;
}
bus->mode = mode;
@@ -1146,8 +1146,8 @@ int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
WARN_ON(!bus->opened);
DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x,"
- " %d bytes, bus %s\n", bus->channel, addrdir, bus->mode, subsize,
- subaddr, len, bus->busnode->full_name);
+ " %d bytes, bus %pOF\n", bus->channel, addrdir, bus->mode, subsize,
+ subaddr, len, bus->busnode);
rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len);
@@ -1241,13 +1241,13 @@ static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args)
bus = pmac_i2c_find_bus(func->node);
if (bus == NULL) {
- printk(KERN_ERR "low_i2c: Can't find bus for %s (pfunc)\n",
- func->node->full_name);
+ printk(KERN_ERR "low_i2c: Can't find bus for %pOF (pfunc)\n",
+ func->node);
return NULL;
}
if (pmac_i2c_open(bus, 0)) {
- printk(KERN_ERR "low_i2c: Can't open i2c bus for %s (pfunc)\n",
- func->node->full_name);
+ printk(KERN_ERR "low_i2c: Can't open i2c bus for %pOF (pfunc)\n",
+ func->node);
return NULL;
}
@@ -1417,7 +1417,7 @@ static struct pmf_handlers pmac_i2c_pfunc_handlers = {
static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
{
- DBG("dev_create(%s)\n", np->full_name);
+ DBG("dev_create(%pOF)\n", np);
pmf_register_driver(np, &pmac_i2c_pfunc_handlers,
(void *)(long)quirks);
@@ -1425,20 +1425,20 @@ static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
static void __init pmac_i2c_dev_init(struct device_node *np, int quirks)
{
- DBG("dev_create(%s)\n", np->full_name);
+ DBG("dev_create(%pOF)\n", np);
pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
}
static void pmac_i2c_dev_suspend(struct device_node *np, int quirks)
{
- DBG("dev_suspend(%s)\n", np->full_name);
+ DBG("dev_suspend(%pOF)\n", np);
pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL);
}
static void pmac_i2c_dev_resume(struct device_node *np, int quirks)
{
- DBG("dev_resume(%s)\n", np->full_name);
+ DBG("dev_resume(%pOF)\n", np);
pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL);
}
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 6e06c3be2e9a..0b8174a79993 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -783,7 +783,7 @@ static int __init pmac_add_bridge(struct device_node *dev)
const int *bus_range;
int primary = 1, has_address = 0;
- DBG("Adding PCI host bridge %s\n", dev->full_name);
+ DBG("Adding PCI host bridge %pOF\n", dev);
/* Fetch host bridge registers address */
has_address = (of_address_to_resource(dev, 0, &rsrc) == 0);
@@ -791,8 +791,8 @@ static int __init pmac_add_bridge(struct device_node *dev)
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
}
hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 459138ed4571..860159d46ab8 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -54,8 +54,8 @@ static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask)
raw_spin_lock_irqsave(&feature_lock, flags);
tmp = readb(addr);
tmp = (tmp & ~mask) | (value & mask);
- DBG("Do write 0x%02x to GPIO %s (%p)\n",
- tmp, func->node->full_name, addr);
+ DBG("Do write 0x%02x to GPIO %pOF (%p)\n",
+ tmp, func->node, addr);
writeb(tmp, addr);
raw_spin_unlock_irqrestore(&feature_lock, flags);
@@ -107,8 +107,8 @@ static void macio_gpio_init_one(struct macio_chip *macio)
if (gparent == NULL)
return;
- DBG("Installing GPIO functions for macio %s\n",
- macio->of_node->full_name);
+ DBG("Installing GPIO functions for macio %pOF\n",
+ macio->of_node);
/*
* Ok, got one, we dont need anything special to track them down, so
@@ -129,8 +129,8 @@ static void macio_gpio_init_one(struct macio_chip *macio)
pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset);
}
- DBG("Calling initial GPIO functions for macio %s\n",
- macio->of_node->full_name);
+ DBG("Calling initial GPIO functions for macio %pOF\n",
+ macio->of_node);
/* And now we run all the init ones */
for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;)
@@ -267,8 +267,8 @@ static struct pmf_handlers macio_mmio_handlers = {
static void macio_mmio_init_one(struct macio_chip *macio)
{
- DBG("Installing MMIO functions for macio %s\n",
- macio->of_node->full_name);
+ DBG("Installing MMIO functions for macio %pOF\n",
+ macio->of_node);
pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio);
}
@@ -298,8 +298,8 @@ static void uninorth_install_pfunc(void)
{
struct device_node *np;
- DBG("Installing functions for UniN %s\n",
- uninorth_node->full_name);
+ DBG("Installing functions for UniN %pOF\n",
+ uninorth_node);
/*
* Install handlers for the bridge itself
@@ -317,8 +317,8 @@ static void uninorth_install_pfunc(void)
break;
}
if (unin_hwclock) {
- DBG("Installing functions for UniN clock %s\n",
- unin_hwclock->full_name);
+ DBG("Installing functions for UniN clock %pOF\n",
+ unin_hwclock);
pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL);
pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT,
NULL);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index 695e8c4d4224..df3c93bef228 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -708,7 +708,7 @@ int pmf_register_driver(struct device_node *np,
if (handlers == NULL)
return -EINVAL;
- DBG("pmf: registering driver for node %s\n", np->full_name);
+ DBG("pmf: registering driver for node %pOF\n", np);
spin_lock_irqsave(&pmf_lock, flags);
dev = pmf_find_device(np);
@@ -781,7 +781,7 @@ void pmf_unregister_driver(struct device_node *np)
struct pmf_device *dev;
unsigned long flags;
- DBG("pmf: unregistering driver for node %s\n", np->full_name);
+ DBG("pmf: unregistering driver for node %pOF\n", np);
spin_lock_irqsave(&pmf_lock, flags);
dev = pmf_find_device(np);
@@ -940,7 +940,7 @@ int pmf_call_one(struct pmf_function *func, struct pmf_args *args)
void *instdata = NULL;
int rc = 0;
- DBG(" ** pmf_call_one(%s/%s) **\n", dev->node->full_name, func->name);
+ DBG(" ** pmf_call_one(%pOF/%s) **\n", dev->node, func->name);
if (dev->handlers->begin)
instdata = dev->handlers->begin(func, args);
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index f5f9ad7c3398..5e0719b27294 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -364,8 +364,8 @@ static void __init pmac_pic_probe_oldstyle(void)
(addr + 0x10);
of_node_put(master);
- printk(KERN_INFO "irq: Found primary Apple PIC %s for %d irqs\n",
- master->full_name, max_real_irqs);
+ printk(KERN_INFO "irq: Found primary Apple PIC %pOF for %d irqs\n",
+ master, max_real_irqs);
/* Map interrupts of cascaded controller */
if (slave && !of_address_to_resource(slave, 0, &r)) {
@@ -378,8 +378,8 @@ static void __init pmac_pic_probe_oldstyle(void)
(addr + 0x10);
pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
- printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
- " cascade: %d\n", slave->full_name,
+ printk(KERN_INFO "irq: Found slave Apple PIC %pOF for %d irqs"
+ " cascade: %d\n", slave,
max_irqs - max_real_irqs, pmac_irq_cascade);
}
of_node_put(slave);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 6b4e9d181126..ab668cb72263 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -556,7 +556,7 @@ static int __init check_pmac_serial_console(void)
pr_debug(" can't find stdout package %s !\n", name);
return -ENODEV;
}
- pr_debug("stdout is %s\n", prom_stdout->full_name);
+ pr_debug("stdout is %pOF\n", prom_stdout);
name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 6a6f4ef46b9e..340cbe263b33 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -30,3 +30,25 @@ config OPAL_PRD
help
This enables the opal-prd driver, a facility to run processor
recovery diagnostics on OpenPower machines
+
+config PPC_MEMTRACE
+ bool "Enable removal of RAM from kernel mappings for tracing"
+ depends on PPC_POWERNV && MEMORY_HOTREMOVE
+ default n
+ help
+ Enabling this option allows for the removal of memory (RAM)
+ from the kernel mappings to be used for hardware tracing.
+
+config PPC_VAS
+ bool "IBM Virtual Accelerator Switchboard (VAS)"
+ depends on PPC_POWERNV && PPC_64K_PAGES
+ default y
+ help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS allows accelerators in co-processors like NX-GZIP and NX-842
+ to be accessible to kernel subsystems and user processes.
+
+ VAS adapters are found in POWER9 based systems.
+
+ If unsure, say N.
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb3f482..37d60f7dd86d 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y += opal-kmsg.o
+obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
@@ -12,3 +12,6 @@ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000000000000..c9a503623431
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc-opcode.h>
+
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3f48f6df1cf3..8864065eba22 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -113,7 +113,6 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
size_t count, loff_t *ppos)
{
struct pci_controller *hose = filp->private_data;
- struct eeh_dev *edev;
struct eeh_pe *pe;
int pe_no, type, func;
unsigned long addr, mask;
@@ -135,13 +134,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
return -EINVAL;
/* Retrieve PE */
- edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev)
- return -ENOMEM;
- edev->phb = hose;
- edev->pe_config_addr = pe_no;
- pe = eeh_pe_get(edev);
- kfree(edev);
+ pe = eeh_pe_get(hose, pe_no, 0);
if (!pe)
return -ENODEV;
@@ -359,6 +352,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
uint32_t pcie_flags;
int ret;
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
/*
* When probing the root bridge, which doesn't have any
@@ -393,8 +387,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
}
}
- edev->config_addr = (pdn->busno << 8) | (pdn->devfn);
- edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
+ edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
@@ -933,7 +926,6 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
int pos, u16 mask)
{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
int i, status = 0;
/* Wait for Transaction Pending bit to be cleared */
@@ -947,7 +939,7 @@ static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
__func__, type,
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
}
@@ -1381,7 +1373,6 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *pnv_pe;
struct eeh_pe *dev_pe;
- struct eeh_dev edev;
/*
* If PHB supports compound PE, to fetch
@@ -1397,10 +1388,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
}
/* Find the PE according to PE# */
- memset(&edev, 0, sizeof(struct eeh_dev));
- edev.phb = hose;
- edev.pe_config_addr = pe_no;
- dev_pe = eeh_pe_get(&edev);
+ dev_pe = eeh_pe_get(hose, pe_no, 0);
if (!dev_pe)
return -EEXIST;
@@ -1711,6 +1699,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
struct pnv_phb *phb;
s64 ret;
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
if (!edev)
return -EEXIST;
@@ -1725,14 +1714,14 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
if (edev->physfn) {
ret = pnv_eeh_restore_vf_config(pdn);
} else {
- phb = edev->phb->private_data;
+ phb = pdn->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
- OPAL_REINIT_PCI_DEV, edev->config_addr);
+ OPAL_REINIT_PCI_DEV, config_addr);
}
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
- __func__, edev->config_addr, ret);
+ __func__, config_addr, ret);
return -EIO;
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 2abee070373f..9f59041a172b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
*/
static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
static int pnv_save_sprs_for_deep_states(void)
@@ -68,7 +69,7 @@ static int pnv_save_sprs_for_deep_states(void)
* all cpus at boot. Get these reg values of current cpu and use the
* same across all cpus.
*/
- uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
uint64_t hid0_val = mfspr(SPRN_HID0);
uint64_t hid1_val = mfspr(SPRN_HID1);
uint64_t hid4_val = mfspr(SPRN_HID4);
@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
update_subcore_sibling_mask();
- if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
- pnv_save_sprs_for_deep_states();
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+ int rc = pnv_save_sprs_for_deep_states();
+
+ if (likely(!rc))
+ return;
+
+ /*
+ * The stop-api is unable to restore hypervisor
+ * resources on wakeup from platform idle states which
+ * lose full context. So disable such states.
+ */
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+ /*
+ * Use the default stop state for CPU-Hotplug
+ * if available.
+ */
+ if (default_stop_found) {
+ pnv_deepest_stop_psscr_val =
+ pnv_default_stop_val;
+ pnv_deepest_stop_psscr_mask =
+ pnv_default_stop_mask;
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+ pnv_deepest_stop_psscr_val);
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
+ deepest_stop_found = false;
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+ }
+ }
+ }
}
u32 pnv_get_supported_cpuidle_states(void)
@@ -355,6 +388,14 @@ void power9_idle(void)
}
#ifdef CONFIG_HOTPLUG_CPU
+static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+{
+ u64 pir = get_hard_smp_processor_id(cpu);
+
+ mtspr(SPRN_LPCR, lpcr_val);
+ opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+}
+
/*
* pnv_cpu_offline: A function that puts the CPU into the deepest
* available platform idle state on a CPU-Offline.
@@ -364,6 +405,20 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
u32 idle_states = pnv_get_supported_cpuidle_states();
+ u64 lpcr_val;
+
+ /*
+ * We don't want to take decrementer interrupts while we are
+ * offline, so clear LPCR:PECE1. We keep PECE2 (and
+ * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
+ *
+ * If the CPU gets woken up by a special wakeup, ensure that
+ * the SLW engine sets LPCR with decrementer bit cleared, else
+ * the CPU will come back to the kernel due to a spurious
+ * wakeup.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
__ppc64_runlatch_off();
@@ -375,7 +430,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
pnv_deepest_stop_psscr_val;
srr1 = power9_idle_stop(psscr);
- } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
+ (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
@@ -394,6 +450,16 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
__ppc64_runlatch_on();
+ /*
+ * Re-enable decrementer interrupts in LPCR.
+ *
+ * Further, we want stop states to be woken up by decrementer
+ * for non-hotplug cases. So program the LPCR via stop api as
+ * well.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
return srr1;
}
#endif
@@ -553,6 +619,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
max_residency_ns = residency_ns[i];
pnv_deepest_stop_psscr_val = psscr_val[i];
pnv_deepest_stop_psscr_mask = psscr_mask[i];
+ pnv_deepest_stop_flag = flags[i];
deepest_stop_found = true;
}
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 000000000000..de470caf0784
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) IBM Corporation, 2014, 2017
+ * Anton Blanchard, Rashmica Gupta.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "memtrace: " fmt
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <asm/machdep.h>
+#include <asm/debugfs.h>
+
+/* This enables us to keep track of the memory removed from each node. */
+struct memtrace_entry {
+ void *mem;
+ u64 start;
+ u64 size;
+ u32 nid;
+ struct dentry *dir;
+ char name[16];
+};
+
+static u64 memtrace_size;
+
+static struct memtrace_entry *memtrace_array;
+static unsigned int memtrace_array_nr;
+
+
+static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct memtrace_entry *ent = filp->private_data;
+
+ return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
+}
+
+static bool valid_memtrace_range(struct memtrace_entry *dev,
+ unsigned long start, unsigned long size)
+{
+ if ((start >= dev->start) &&
+ ((start + size) <= (dev->start + dev->size)))
+ return true;
+
+ return false;
+}
+
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct memtrace_entry *dev = filp->private_data;
+
+ if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
+ size, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static const struct file_operations memtrace_fops = {
+ .llseek = default_llseek,
+ .read = memtrace_read,
+ .mmap = memtrace_mmap,
+ .open = simple_open,
+};
+
+static void flush_memory_region(u64 base, u64 size)
+{
+ unsigned long line_size = ppc64_caches.l1d.size;
+ u64 end = base + size;
+ u64 addr;
+
+ base = round_down(base, line_size);
+ end = round_up(end, line_size);
+
+ for (addr = base; addr < end; addr += line_size)
+ asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
+}
+
+static int check_memblock_online(struct memory_block *mem, void *arg)
+{
+ if (mem->state != MEM_ONLINE)
+ return -1;
+
+ return 0;
+}
+
+static int change_memblock_state(struct memory_block *mem, void *arg)
+{
+ unsigned long state = (unsigned long)arg;
+
+ mem->state = state;
+
+ return 0;
+}
+
+static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
+{
+ u64 end_pfn = start_pfn + nr_pages - 1;
+
+ if (walk_memory_range(start_pfn, end_pfn, NULL,
+ check_memblock_online))
+ return false;
+
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
+ change_memblock_state);
+
+ if (offline_pages(start_pfn, nr_pages)) {
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
+ change_memblock_state);
+ return false;
+ }
+
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
+ change_memblock_state);
+
+ /* RCU grace period? */
+ flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
+ nr_pages << PAGE_SHIFT);
+
+ lock_device_hotplug();
+ remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ unlock_device_hotplug();
+
+ return true;
+}
+
+static u64 memtrace_alloc_node(u32 nid, u64 size)
+{
+ u64 start_pfn, end_pfn, nr_pages;
+ u64 base_pfn;
+
+ if (!NODE_DATA(nid) || !node_spanned_pages(nid))
+ return 0;
+
+ start_pfn = node_start_pfn(nid);
+ end_pfn = node_end_pfn(nid);
+ nr_pages = size >> PAGE_SHIFT;
+
+ /* Trace memory needs to be aligned to the size */
+ end_pfn = round_down(end_pfn - nr_pages, nr_pages);
+
+ for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
+ if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+ return base_pfn << PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
+static int memtrace_init_regions_runtime(u64 size)
+{
+ u32 nid;
+ u64 m;
+
+ memtrace_array = kcalloc(num_online_nodes(),
+ sizeof(struct memtrace_entry), GFP_KERNEL);
+ if (!memtrace_array) {
+ pr_err("Failed to allocate memtrace_array\n");
+ return -EINVAL;
+ }
+
+ for_each_online_node(nid) {
+ m = memtrace_alloc_node(nid, size);
+
+ /*
+ * A node might not have any local memory, so warn but
+ * continue on.
+ */
+ if (!m) {
+ pr_err("Failed to allocate trace memory on node %d\n", nid);
+ continue;
+ }
+
+ pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
+
+ memtrace_array[memtrace_array_nr].start = m;
+ memtrace_array[memtrace_array_nr].size = size;
+ memtrace_array[memtrace_array_nr].nid = nid;
+ memtrace_array_nr++;
+ }
+
+ return 0;
+}
+
+static struct dentry *memtrace_debugfs_dir;
+
+static int memtrace_init_debugfs(void)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < memtrace_array_nr; i++) {
+ struct dentry *dir;
+ struct memtrace_entry *ent = &memtrace_array[i];
+
+ ent->mem = ioremap(ent->start, ent->size);
+ /* Warn but continue on */
+ if (!ent->mem) {
+ pr_err("Failed to map trace memory at 0x%llx\n",
+ ent->start);
+ ret = -1;
+ continue;
+ }
+
+ snprintf(ent->name, 16, "%08x", ent->nid);
+ dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
+ if (!dir)
+ return -1;
+
+ ent->dir = dir;
+ debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
+ debugfs_create_x64("start", 0400, dir, &ent->start);
+ debugfs_create_x64("size", 0400, dir, &ent->size);
+ }
+
+ return ret;
+}
+
+static int memtrace_enable_set(void *data, u64 val)
+{
+ if (memtrace_size)
+ return -EINVAL;
+
+ if (!val)
+ return -EINVAL;
+
+ /* Make sure size is aligned to a memory block */
+ if (val & (memory_block_size_bytes() - 1))
+ return -EINVAL;
+
+ if (memtrace_init_regions_runtime(val))
+ return -EINVAL;
+
+ if (memtrace_init_debugfs())
+ return -EINVAL;
+
+ memtrace_size = val;
+
+ return 0;
+}
+
+static int memtrace_enable_get(void *data, u64 *val)
+{
+ *val = memtrace_size;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
+ memtrace_enable_set, "0x%016llx\n");
+
+static int memtrace_init(void)
+{
+ memtrace_debugfs_dir = debugfs_create_dir("memtrace",
+ powerpc_debugfs_root);
+ if (!memtrace_debugfs_dir)
+ return -1;
+
+ debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
+ NULL, &memtrace_init_fops);
+
+ return 0;
+}
+machine_device_initcall(powernv, memtrace_init);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index b5d960d6db3d..2cb6cbea4b3b 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -546,6 +546,12 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
unsigned long pid = npu_context->mm->context.id;
/*
+ * Unfortunately the nest mmu does not support flushing specific
+ * addresses so we have to flush the whole mm.
+ */
+ flush_tlb_mm(npu_context->mm);
+
+ /*
* Loop over all the NPUs this process is active on and launch
* an invalidate.
*/
@@ -576,12 +582,6 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
}
}
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm.
- */
- flush_tlb_mm(npu_context->mm);
-
mmio_invalidate_wait(mmio_atsd_reg, flush);
if (flush)
/* Wait for the flush to complete */
@@ -614,15 +614,6 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
mmio_invalidate(npu_context, 1, address, true);
}
-static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
-
- mmio_invalidate(npu_context, 1, address, true);
-}
-
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -640,7 +631,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
.release = pnv_npu2_mn_release,
.change_pte = pnv_npu2_mn_change_pte,
- .invalidate_page = pnv_npu2_mn_invalidate_page,
.invalidate_range = pnv_npu2_mn_invalidate_range,
};
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 83bebeec0fea..cf33769a7b72 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -171,8 +171,8 @@ int __init opal_async_comp_init(void)
async = of_get_property(opal_node, "opal-msg-async-num", NULL);
if (!async) {
- pr_err("%s: %s has no opal-msg-async-num\n",
- __func__, opal_node->full_name);
+ pr_err("%s: %pOF has no opal-msg-async-num\n",
+ __func__, opal_node);
err = -ENOENT;
goto out_opal_node;
}
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 4ec6219287fc..2fa3ac80cb4e 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -520,7 +520,7 @@ out:
* update_flash : Flash new firmware image
*
*/
-static struct bin_attribute image_data_attr = {
+static const struct bin_attribute image_data_attr = {
.attr = {.name = "image", .mode = 0200},
.size = MAX_IMAGE_SIZE, /* Limit image size */
.write = image_data_write,
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 88f3c61eec95..d78fed728cdf 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -30,6 +30,8 @@
#include <asm/cputable.h>
#include <asm/machdep.h>
+#include "powernv.h"
+
static int opal_hmi_handler_nb_init;
struct OpalHmiEvtNode {
struct list_head list;
@@ -267,8 +269,6 @@ static void hmi_event_handler(struct work_struct *work)
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
if (unrecoverable) {
- int ret;
-
/* Pull all HMI events from OPAL before we panic. */
while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
u32 type;
@@ -284,23 +284,7 @@ static void hmi_event_handler(struct work_struct *work)
print_hmi_event_info(hmi_evt);
}
- /*
- * Unrecoverable HMI exception. We need to inform BMC/OCC
- * about this error so that it can collect relevant data
- * for error analysis before rebooting.
- */
- ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
- "Unrecoverable HMI exception");
- if (ret == OPAL_UNSUPPORTED) {
- pr_emerg("Reboot type %d not supported\n",
- OPAL_REBOOT_PLATFORM_ERROR);
- }
-
- /*
- * Fall through and panic if opal_cec_reboot2() returns
- * OPAL_UNSUPPORTED.
- */
- panic("Unrecoverable HMI exception");
+ pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000000000000..21f6531fae20
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,226 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/crash_dump.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+
+/*
+ * imc_get_mem_addr_nest: Function to get nest counter memory region
+ * for each chip
+ */
+static int imc_get_mem_addr_nest(struct device_node *node,
+ struct imc_pmu *pmu_ptr,
+ u32 offset)
+{
+ int nr_chips = 0, i;
+ u64 *base_addr_arr, baddr;
+ u32 *chipid_arr;
+
+ nr_chips = of_property_count_u32_elems(node, "chip-id");
+ if (nr_chips <= 0)
+ return -ENODEV;
+
+ base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+ if (!base_addr_arr)
+ return -ENOMEM;
+
+ chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+ if (!chipid_arr)
+ return -ENOMEM;
+
+ if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
+ goto error;
+
+ if (of_property_read_u64_array(node, "base-addr", base_addr_arr,
+ nr_chips))
+ goto error;
+
+ pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
+ GFP_KERNEL);
+ if (!pmu_ptr->mem_info)
+ goto error;
+
+ for (i = 0; i < nr_chips; i++) {
+ pmu_ptr->mem_info[i].id = chipid_arr[i];
+ baddr = base_addr_arr[i] + offset;
+ pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr);
+ }
+
+ pmu_ptr->imc_counter_mmaped = true;
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return 0;
+
+error:
+ kfree(pmu_ptr->mem_info);
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return -1;
+}
+
+/*
+ * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
+ * and domain as the inputs.
+ * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
+ */
+static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+{
+ int ret = 0;
+ struct imc_pmu *pmu_ptr;
+ u32 offset;
+
+ /* memory for pmu */
+ pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+ if (!pmu_ptr)
+ return -ENOMEM;
+
+ /* Set the domain */
+ pmu_ptr->domain = domain;
+
+ ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
+ if (ret) {
+ ret = -EINVAL;
+ goto free_pmu;
+ }
+
+ if (!of_property_read_u32(parent, "offset", &offset)) {
+ if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
+ ret = -EINVAL;
+ goto free_pmu;
+ }
+ }
+
+ /* Function to register IMC pmu */
+ ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
+ if (ret)
+ pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+
+ return 0;
+
+free_pmu:
+ kfree(pmu_ptr);
+ return ret;
+}
+
+static void disable_nest_pmu_counters(void)
+{
+ int nid, cpu;
+ const struct cpumask *l_cpumask;
+
+ get_online_cpus();
+ for_each_online_node(nid) {
+ l_cpumask = cpumask_of_node(nid);
+ cpu = cpumask_first(l_cpumask);
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ }
+ put_online_cpus();
+}
+
+static void disable_core_pmu_counters(void)
+{
+ cpumask_t cores_map;
+ int cpu, rc;
+
+ get_online_cpus();
+ /* Disable the IMC Core functions */
+ cores_map = cpu_online_cores_map();
+ for_each_cpu(cpu, &cores_map) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ if (rc)
+ pr_err("%s: Failed to stop Core (cpu = %d)\n",
+ __FUNCTION__, cpu);
+ }
+ put_online_cpus();
+}
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+ struct device_node *imc_dev = pdev->dev.of_node;
+ int pmu_count = 0, domain;
+ u32 type;
+
+ /*
+ * Check whether this is kdump kernel. If yes, force the engines to
+ * stop and return.
+ */
+ if (is_kdump_kernel()) {
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+ return -ENODEV;
+ }
+
+ for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+ if (of_property_read_u32(imc_dev, "type", &type)) {
+ pr_warn("IMC Device without type property\n");
+ continue;
+ }
+
+ switch (type) {
+ case IMC_TYPE_CHIP:
+ domain = IMC_DOMAIN_NEST;
+ break;
+ case IMC_TYPE_CORE:
+ domain =IMC_DOMAIN_CORE;
+ break;
+ case IMC_TYPE_THREAD:
+ domain = IMC_DOMAIN_THREAD;
+ break;
+ default:
+ pr_warn("IMC Unknown Device type \n");
+ domain = -1;
+ break;
+ }
+
+ if (!imc_pmu_create(imc_dev, pmu_count, domain))
+ pmu_count++;
+ }
+
+ return 0;
+}
+
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+ /*
+ * Function only stops the engines which is bare minimum.
+ * TODO: Need to handle proper memory cleanup and pmu
+ * unregister.
+ */
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+}
+
+static const struct of_device_id opal_imc_match[] = {
+ { .compatible = IMC_DTB_COMPAT },
+ {},
+};
+
+static struct platform_driver opal_imc_driver = {
+ .driver = {
+ .name = "opal-imc-counters",
+ .of_match_table = opal_imc_match,
+ },
+ .probe = opal_imc_counters_probe,
+ .shutdown = opal_imc_counters_shutdown,
+};
+
+builtin_platform_driver(opal_imc_driver);
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
new file mode 100644
index 000000000000..badb29bde93f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -0,0 +1,244 @@
+/*
+ * PowerNV OPAL Powercap interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-powercap: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(powercap_mutex);
+
+static struct kobject *powercap_kobj;
+
+struct powercap_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct pcap {
+ struct attribute_group pg;
+ struct powercap_attr *pattrs;
+} *pcaps;
+
+static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t powercap_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &pcap);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_powercap(pcap_attr->handle, token, pcap);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static void powercap_add_attr(int handle, const char *name,
+ struct powercap_attr *attr)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = powercap_show;
+}
+
+void __init opal_powercap_init(void)
+{
+ struct device_node *powercap, *node;
+ int i = 0;
+
+ powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap");
+ if (!powercap) {
+ pr_devel("Powercap node not found\n");
+ return;
+ }
+
+ pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
+ GFP_KERNEL);
+ if (!pcaps)
+ return;
+
+ powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
+ if (!powercap_kobj) {
+ pr_warn("Failed to create powercap kobject\n");
+ goto out_pcaps;
+ }
+
+ i = 0;
+ for_each_child_of_node(powercap, node) {
+ u32 cur, min, max;
+ int j = 0;
+ bool has_cur = false, has_min = false, has_max = false;
+
+ if (!of_property_read_u32(node, "powercap-min", &min)) {
+ j++;
+ has_min = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-max", &max)) {
+ j++;
+ has_max = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-current", &cur)) {
+ j++;
+ has_cur = true;
+ }
+
+ pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr),
+ GFP_KERNEL);
+ if (!pcaps[i].pattrs)
+ goto out_pcaps_pattrs;
+
+ pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!pcaps[i].pg.attrs) {
+ kfree(pcaps[i].pattrs);
+ goto out_pcaps_pattrs;
+ }
+
+ j = 0;
+ pcaps[i].pg.name = node->name;
+ if (has_min) {
+ powercap_add_attr(min, "powercap-min",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_max) {
+ powercap_add_attr(max, "powercap-max",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_cur) {
+ powercap_add_attr(cur, "powercap-current",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pattrs[j].attr.attr.mode |= 0220;
+ pcaps[i].pattrs[j].attr.store = powercap_store;
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) {
+ pr_warn("Failed to create powercap attribute group %s\n",
+ pcaps[i].pg.name);
+ goto out_pcaps_pattrs;
+ }
+ i++;
+ }
+
+ return;
+
+out_pcaps_pattrs:
+ while (--i >= 0) {
+ kfree(pcaps[i].pattrs);
+ kfree(pcaps[i].pg.attrs);
+ }
+ kobject_put(powercap_kobj);
+out_pcaps:
+ kfree(pcaps);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 2d6ee1c5ad85..de4dd09f4a15 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -241,15 +241,9 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
size = be16_to_cpu(hdr.size);
- msg = kmalloc(size, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- rc = copy_from_user(msg, buf, size);
- if (rc) {
- size = -EFAULT;
- goto out_free;
- }
+ msg = memdup_user(buf, size);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
rc = opal_prd_msg(msg);
if (rc) {
@@ -257,7 +251,6 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
size = -EIO;
}
-out_free:
kfree(msg);
return size;
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
new file mode 100644
index 000000000000..7313b7fc9071
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -0,0 +1,175 @@
+/*
+ * PowerNV OPAL Power-Shift-Ratio interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-psr: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(psr_mutex);
+
+static struct kobject *psr_kobj;
+
+struct psr_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+} *psr_attrs;
+
+static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_power_shift_ratio(psr_attr->handle, token,
+ (u32 *)__pa(&psr));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ ret = kstrtoint(buf, 0, &psr);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+void __init opal_psr_init(void)
+{
+ struct device_node *psr, *node;
+ int i = 0;
+
+ psr = of_find_compatible_node(NULL, NULL,
+ "ibm,opal-power-shift-ratio");
+ if (!psr) {
+ pr_devel("Power-shift-ratio node not found\n");
+ return;
+ }
+
+ psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+ GFP_KERNEL);
+ if (!psr_attrs)
+ return;
+
+ psr_kobj = kobject_create_and_add("psr", opal_kobj);
+ if (!psr_kobj) {
+ pr_warn("Failed to create psr kobject\n");
+ goto out;
+ }
+
+ for_each_child_of_node(psr, node) {
+ if (of_property_read_u32(node, "handle",
+ &psr_attrs[i].handle))
+ goto out_kobj;
+
+ sysfs_attr_init(&psr_attrs[i].attr.attr);
+ if (of_property_read_string(node, "label",
+ &psr_attrs[i].attr.attr.name))
+ goto out_kobj;
+ psr_attrs[i].attr.attr.mode = 0664;
+ psr_attrs[i].attr.show = psr_show;
+ psr_attrs[i].attr.store = psr_store;
+ if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) {
+ pr_devel("Failed to create psr sysfs file %s\n",
+ psr_attrs[i].attr.attr.name);
+ goto out_kobj;
+ }
+ i++;
+ }
+
+ return;
+out_kobj:
+ kobject_put(psr_kobj);
+out:
+ kfree(psr_attrs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
new file mode 100644
index 000000000000..7e5a235ebf76
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -0,0 +1,212 @@
+/*
+ * PowerNV OPAL Sensor-groups interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-sensor-groups: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(sg_mutex);
+
+static struct kobject *sg_kobj;
+
+struct sg_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct sensor_group {
+ char name[20];
+ struct attribute_group sg;
+ struct sg_attr *sgattrs;
+} *sgs;
+
+static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sg_attr *sattr = container_of(attr, struct sg_attr, attr);
+ struct opal_msg msg;
+ u32 data;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &data);
+ if (ret)
+ return ret;
+
+ if (data != 1)
+ return -EINVAL;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&sg_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_sensor_group_clear(sattr->handle, token);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&sg_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static struct sg_ops_info {
+ int opal_no;
+ const char *attr_name;
+ ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count);
+} ops_info[] = {
+ { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store },
+};
+
+static void add_attr(int handle, struct sg_attr *attr, int index)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = ops_info[index].attr_name;
+ attr->attr.attr.mode = 0220;
+ attr->attr.store = ops_info[index].store;
+}
+
+static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+ u32 handle)
+{
+ int i, j;
+ int count = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) {
+ add_attr(handle, &sg->sgattrs[count], j);
+ sg->sg.attrs[count] =
+ &sg->sgattrs[count].attr.attr;
+ count++;
+ }
+
+ return sysfs_create_group(sg_kobj, &sg->sg);
+}
+
+static int get_nr_attrs(const __be32 *ops, int len)
+{
+ int i, j;
+ int nr_attrs = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no)
+ nr_attrs++;
+
+ return nr_attrs;
+}
+
+void __init opal_sensor_groups_init(void)
+{
+ struct device_node *sg, *node;
+ int i = 0;
+
+ sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
+ if (!sg) {
+ pr_devel("Sensor groups node not found\n");
+ return;
+ }
+
+ sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
+ if (!sgs)
+ return;
+
+ sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
+ if (!sg_kobj) {
+ pr_warn("Failed to create sensor group kobject\n");
+ goto out_sgs;
+ }
+
+ for_each_child_of_node(sg, node) {
+ const __be32 *ops;
+ u32 sgid, len, nr_attrs, chipid;
+
+ ops = of_get_property(node, "ops", &len);
+ if (!ops)
+ continue;
+
+ nr_attrs = get_nr_attrs(ops, len);
+ if (!nr_attrs)
+ continue;
+
+ sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+ GFP_KERNEL);
+ if (!sgs[i].sgattrs)
+ goto out_sgs_sgattrs;
+
+ sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
+ sizeof(struct attribute *),
+ GFP_KERNEL);
+
+ if (!sgs[i].sg.attrs) {
+ kfree(sgs[i].sgattrs);
+ goto out_sgs_sgattrs;
+ }
+
+ if (of_property_read_u32(node, "sensor-group-id", &sgid)) {
+ pr_warn("sensor-group-id property not found\n");
+ goto out_sgs_sgattrs;
+ }
+
+ if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
+ sprintf(sgs[i].name, "%s%d", node->name, chipid);
+ else
+ sprintf(sgs[i].name, "%s", node->name);
+
+ sgs[i].sg.name = sgs[i].name;
+ if (add_attr_group(ops, len, &sgs[i], sgid)) {
+ pr_warn("Failed to create sensor attribute group %s\n",
+ sgs[i].sg.name);
+ goto out_sgs_sgattrs;
+ }
+ i++;
+ }
+
+ return;
+
+out_sgs_sgattrs:
+ while (--i >= 0) {
+ kfree(sgs[i].sgattrs);
+ kfree(sgs[i].sg.attrs);
+ }
+ kobject_put(sg_kobj);
+out_sgs:
+ kfree(sgs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4ca6c26a56d5..8c1ede2d3f7e 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -27,7 +27,7 @@
.globl opal_tracepoint_refcount
opal_tracepoint_refcount:
- .llong 0
+ .8byte 0
.section ".text"
@@ -310,3 +310,12 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
+OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 28651fb25417..81c0a943dea9 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -36,14 +36,14 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
const __be32 *gcid;
if (!of_get_property(dev, "scom-controller", NULL)) {
- pr_err("%s: device %s is not a SCOM controller\n",
- __func__, dev->full_name);
+ pr_err("%s: device %pOF is not a SCOM controller\n",
+ __func__, dev);
return SCOM_MAP_INVALID;
}
gcid = of_get_property(dev, "ibm,chip-id", NULL);
if (!gcid) {
- pr_err("%s: device %s has no ibm,chip-id\n",
- __func__, dev->full_name);
+ pr_err("%s: device %pOF has no ibm,chip-id\n",
+ __func__, dev);
return SCOM_MAP_INVALID;
}
m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cad6b57ce494..65c79ecf5a4d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
+#include <linux/of_address.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
@@ -25,11 +26,17 @@
#include <linux/memblock.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/printk.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/sched/debug.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/mce.h>
+#include <asm/imc-pmu.h>
+#include <asm/bug.h>
#include "powernv.h"
@@ -162,12 +169,9 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
sizeof(struct mcheck_recoverable_range);
/*
- * Allocate a buffer to hold the MC recoverable ranges. We would be
- * accessing them in real mode, hence it needs to be within
- * RMO region.
+ * Allocate a buffer to hold the MC recoverable ranges.
*/
- mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
- ppc64_rma_size));
+ mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
memset(mc_recoverable_range, 0, size);
for (i = 0; i < mc_recoverable_range_len; i++) {
@@ -422,24 +426,88 @@ static int opal_recover_mce(struct pt_regs *regs,
/* Fatal machine check */
pr_err("Machine check interrupt is fatal\n");
recovered = 0;
- } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
- (user_mode(regs) && !is_global_init(current))) {
+ }
+
+ if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
/*
- * For now, kill the task if we have received exception when
- * in userspace.
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
*
* TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
*/
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
}
+
return recovered;
}
+void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+{
+ /*
+ * This is mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+ pr_emerg("Hardware platform error: %s\n", msg);
+ if (regs)
+ show_regs(regs);
+ smp_send_stop();
+ printk_safe_flush_on_panic();
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic();
+
+ /*
+ * Don't bother to shut things down because this will
+ * xstop the system.
+ */
+ if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
+ == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported for %s\n",
+ OPAL_REBOOT_PLATFORM_ERROR, msg);
+ }
+
+ /*
+ * We reached here. There can be three possibilities:
+ * 1. We are running on a firmware level that do not support
+ * opal_cec_reboot2()
+ * 2. We are running on a firmware level that do not support
+ * OPAL_REBOOT_PLATFORM_ERROR reboot type.
+ * 3. We are running on FSP based system that does not need
+ * opal to trigger checkstop explicitly for error analysis.
+ * The FSP PRD component would have already got notified
+ * about this error through other channels.
+ */
+
+ ppc_md.restart(NULL);
+}
+
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
- int ret;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return 0;
@@ -455,43 +523,7 @@ int opal_machine_check(struct pt_regs *regs)
if (opal_recover_mce(regs, &evt))
return 1;
- /*
- * Unrecovered machine check, we are heading to panic path.
- *
- * We may have hit this MCE in very early stage of kernel
- * initialization even before opal-prd has started running. If
- * this is the case then this MCE error may go un-noticed or
- * un-analyzed if we go down panic path. We need to inform
- * BMC/OCC about this error so that they can collect relevant
- * data for error analysis before rebooting.
- * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
- * This function may not return on BMC based system.
- */
- ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
- "Unrecoverable Machine Check exception");
- if (ret == OPAL_UNSUPPORTED) {
- pr_emerg("Reboot type %d not supported\n",
- OPAL_REBOOT_PLATFORM_ERROR);
- }
-
- /*
- * We reached here. There can be three possibilities:
- * 1. We are running on a firmware level that do not support
- * opal_cec_reboot2()
- * 2. We are running on a firmware level that do not support
- * OPAL_REBOOT_PLATFORM_ERROR reboot type.
- * 3. We are running on FSP based system that does not need opal
- * to trigger checkstop explicitly for error analysis. The FSP
- * PRD component would have already got notified about this
- * error through other channels.
- *
- * If hardware marked this as an unrecoverable MCE, we are
- * going to panic anyway. Even if it didn't, it's not safe to
- * continue at this point, so we should explicitly panic.
- */
-
- panic("PowerNV Unrecovered Machine Check");
- return 0;
+ pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
}
/* Early hmi handler called in real mode. */
@@ -720,6 +752,15 @@ static void opal_pdev_init(const char *compatible)
of_platform_device_create(np, NULL, NULL);
}
+static void __init opal_imc_init_dev(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+ if (np)
+ of_platform_device_create(np, NULL, NULL);
+}
+
static int kopald(void *unused)
{
unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
@@ -793,6 +834,9 @@ static int __init opal_init(void)
/* Setup a heatbeat thread if requested by OPAL */
opal_init_heartbeat();
+ /* Detect In-Memory Collection counters and create devices*/
+ opal_imc_init_dev();
+
/* Create leds platform devices */
leds = of_find_node_by_path("/ibm,opal/leds");
if (leds) {
@@ -836,6 +880,15 @@ static int __init opal_init(void)
/* Initialise OPAL kmsg dumper for flushing console on panic */
opal_kmsg_init();
+ /* Initialise OPAL powercap interface */
+ opal_powercap_init();
+
+ /* Initialise OPAL Power-Shifting-Ratio interface */
+ opal_psr_init();
+
+ /* Initialise OPAL sensor groups */
+ opal_sensor_groups_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
@@ -952,6 +1005,7 @@ int opal_error_code(int rc)
case OPAL_UNSUPPORTED: return -EIO;
case OPAL_HARDWARE: return -EIO;
case OPAL_INTERNAL_ERROR: return -EIO;
+ case OPAL_TIMEOUT: return -ETIMEDOUT;
default:
pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
return -EIO;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b900eb1d5e17..57f9e55f4352 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -444,8 +444,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
r = of_get_property(dn, "ibm,opal-m64-window", NULL);
if (!r) {
- pr_info(" No <ibm,opal-m64-window> on %s\n",
- dn->full_name);
+ pr_info(" No <ibm,opal-m64-window> on %pOF\n",
+ dn);
return;
}
@@ -1408,7 +1408,6 @@ m64_failed:
static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
int num);
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
{
@@ -2402,7 +2401,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
return 0;
}
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
@@ -3797,8 +3796,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (!of_device_is_available(np))
return;
- pr_info("Initializing %s PHB (%s)\n",
- pnv_phb_names[ioda_type], of_node_full_name(np));
+ pr_info("Initializing %s PHB (%pOF)\n", pnv_phb_names[ioda_type], np);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
@@ -3813,8 +3811,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Allocate PCI controller */
phb->hose = hose = pcibios_alloc_controller(np);
if (!phb->hose) {
- pr_err(" Can't allocate PCI controller for %s\n",
- np->full_name);
+ pr_err(" Can't allocate PCI controller for %pOF\n",
+ np);
memblock_free(__pa(phb), sizeof(struct pnv_phb));
return;
}
@@ -3825,7 +3823,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
hose->first_busno = be32_to_cpu(prop32[0]);
hose->last_busno = be32_to_cpu(prop32[1]);
} else {
- pr_warn(" Broken <bus-range> on %s\n", np->full_name);
+ pr_warn(" Broken <bus-range> on %pOF\n", np);
hose->first_busno = 0;
hose->last_busno = 0xff;
}
@@ -4046,7 +4044,7 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
const __be64 *prop64;
u64 hub_id;
- pr_info("Probing IODA IO-Hub %s\n", np->full_name);
+ pr_info("Probing IODA IO-Hub %pOF\n", np);
prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
if (!prop64) {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 7905d179d036..5422f4a6317c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -37,6 +37,8 @@
#include "powernv.h"
#include "pci.h"
+static DEFINE_MUTEX(p2p_mutex);
+
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
{
struct device_node *parent = np;
@@ -1017,6 +1019,79 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus)
}
}
+int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb_init, *phb_target;
+ struct pnv_ioda_pe *pe_init;
+ int rc;
+
+ if (!opal_check_token(OPAL_PCI_SET_P2P))
+ return -ENXIO;
+
+ hose = pci_bus_to_host(initiator->bus);
+ phb_init = hose->private_data;
+
+ hose = pci_bus_to_host(target->bus);
+ phb_target = hose->private_data;
+
+ pe_init = pnv_ioda_get_pe(initiator);
+ if (!pe_init)
+ return -ENODEV;
+
+ /*
+ * Configuring the initiator's PHB requires to adjust its
+ * TVE#1 setting. Since the same device can be an initiator
+ * several times for different target devices, we need to keep
+ * a reference count to know when we can restore the default
+ * bypass setting on its TVE#1 when disabling. Opal is not
+ * tracking PE states, so we add a reference count on the PE
+ * in linux.
+ *
+ * For the target, the configuration is per PHB, so we keep a
+ * target reference count on the PHB.
+ */
+ mutex_lock(&p2p_mutex);
+
+ if (desc & OPAL_PCI_P2P_ENABLE) {
+ /* always go to opal to validate the configuration */
+ rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
+ desc, pe_init->pe_number);
+
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+
+ pe_init->p2p_initiator_count++;
+ phb_target->p2p_target_count++;
+ } else {
+ if (!pe_init->p2p_initiator_count ||
+ !phb_target->p2p_target_count) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (--pe_init->p2p_initiator_count == 0)
+ pnv_pci_ioda2_set_bypass(pe_init, true);
+
+ if (--phb_target->p2p_target_count == 0) {
+ rc = opal_pci_set_p2p(phb_init->opal_id,
+ phb_target->opal_id, desc,
+ pe_init->pe_number);
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+ }
+ }
+ rc = 0;
+out:
+ mutex_unlock(&p2p_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index f16bc403ec03..a95273c524f6 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -78,6 +78,9 @@ struct pnv_ioda_pe {
struct pnv_ioda_pe *master;
struct list_head slaves;
+ /* PCI peer-to-peer*/
+ int p2p_initiator_count;
+
/* Link in list of PE#s */
struct list_head list;
};
@@ -189,6 +192,7 @@ struct pnv_phb {
#ifdef CONFIG_CXL_BASE
struct cxl_afu *cxl_afu;
#endif
+ int p2p_target_count;
};
extern struct pci_ops pnv_pci_ops;
@@ -229,6 +233,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
+extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6dbc0a1da1f6..a159d48573d7 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,6 +7,8 @@ extern void pnv_smp_init(void);
static inline void pnv_smp_init(void) { }
#endif
+extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn;
+
struct pci_dev;
#ifdef CONFIG_PCI
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 1a9d84371a4d..718f50ed22f1 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -16,11 +16,13 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <asm/archrandom.h>
+#include <asm/cputable.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
struct powernv_rng {
void __iomem *regs;
@@ -67,6 +69,41 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
}
+int powernv_get_random_darn(unsigned long *v)
+{
+ unsigned long val;
+
+ /* Using DARN with L=1 - 64-bit conditioned random number */
+ asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+ if (val == DARN_ERR)
+ return 0;
+
+ *v = val;
+
+ return 1;
+}
+
+static int initialise_darn(void)
+{
+ unsigned long val;
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
+ for (i = 0; i < 10; i++) {
+ if (powernv_get_random_darn(&val)) {
+ ppc_md.get_random_seed = powernv_get_random_darn;
+ return 0;
+ }
+ }
+
+ pr_warn("Unable to use DARN for get_random_seed()\n");
+
+ return -EIO;
+}
+
int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
@@ -88,7 +125,7 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
chip_id = of_get_ibm_chip_id(dn);
if (chip_id == -1)
- pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+ pr_warn("No ibm,chip-id found for %pOF.\n", dn);
for_each_possible_cpu(cpu) {
if (per_cpu(powernv_rng, cpu) == NULL ||
@@ -141,8 +178,8 @@ static __init int rng_init(void)
for_each_compatible_node(dn, NULL, "ibm,power-rng") {
rc = rng_create(dn);
if (rc) {
- pr_err("Failed creating rng for %s (%d).\n",
- dn->full_name, rc);
+ pr_err("Failed creating rng for %pOF (%d).\n",
+ dn, rc);
continue;
}
@@ -150,6 +187,8 @@ static __init int rng_init(void)
of_platform_device_create(dn, NULL, NULL);
}
+ initialise_darn();
+
return 0;
}
machine_subsys_initcall(powernv, rng_init);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 40dae96f7e20..c17f81e433f7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -57,7 +57,7 @@ static void pnv_smp_setup_cpu(int cpu)
static int pnv_smp_kick_cpu(int nr)
{
- unsigned int pcpu = get_hard_smp_processor_id(nr);
+ unsigned int pcpu;
unsigned long start_here =
__pa(ppc_function_entry(generic_secondary_smp_init));
long rc;
@@ -66,6 +66,7 @@ static int pnv_smp_kick_cpu(int nr)
if (nr < 0 || nr >= nr_cpu_ids)
return -EINVAL;
+ pcpu = get_hard_smp_processor_id(nr);
/*
* If we already started or OPAL is not supported, we just
* kick the CPU via the PACA
@@ -164,12 +165,6 @@ static void pnv_smp_cpu_kill_self(void)
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
- /* We don't want to take decrementer interrupts while we are offline,
- * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9)
- * enabled as to let IPIs in.
- */
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
-
while (!generic_check_cpu_restart(cpu)) {
/*
* Clear IPI flag, since we don't handle IPIs while
@@ -219,8 +214,6 @@ static void pnv_smp_cpu_kill_self(void)
}
- /* Re-enable decrementer interrupts */
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
DBG("CPU%d coming online...\n", cpu);
}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000000000000..5aae845b8cd9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,1134 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <linux/cred.h>
+
+#include "vas.h"
+#include "copy-paste.h"
+
+/*
+ * Compute the paste address region for the window @window using the
+ * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
+ */
+static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
+{
+ int winid;
+ u64 base, shift;
+
+ base = window->vinst->paste_base_addr;
+ shift = window->vinst->paste_win_id_shift;
+ winid = window->winid;
+
+ *addr = base + (winid << shift);
+ if (len)
+ *len = PAGE_SIZE;
+
+ pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->hvwc_bar_start;
+ *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+ *len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->uwc_bar_start;
+ *start = pbaddr + window->winid * VAS_UWC_SIZE;
+ *len = VAS_UWC_SIZE;
+}
+
+/*
+ * Map the paste bus address of the given send window into kernel address
+ * space. Unlike MMIO regions (map_mmio_region() below), paste region must
+ * be mapped cache-able and is only applicable to send windows.
+ */
+static void *map_paste_region(struct vas_window *txwin)
+{
+ int len;
+ void *map;
+ char *name;
+ u64 start;
+
+ name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
+ txwin->winid);
+ if (!name)
+ goto free_name;
+
+ txwin->paste_addr_name = name;
+ compute_paste_address(txwin, &start, &len);
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ goto free_name;
+ }
+
+ map = ioremap_cache(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
+ start, len);
+ goto free_name;
+ }
+
+ pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
+ return map;
+
+free_name:
+ kfree(name);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void *map_mmio_region(char *name, u64 start, int len)
+{
+ void *map;
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ return NULL;
+ }
+
+ map = ioremap(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+ len);
+ return NULL;
+ }
+
+ return map;
+}
+
+static void unmap_region(void *addr, u64 start, int len)
+{
+ iounmap(addr);
+ release_mem_region((phys_addr_t)start, len);
+}
+
+/*
+ * Unmap the paste address region for a window.
+ */
+static void unmap_paste_region(struct vas_window *window)
+{
+ int len;
+ u64 busaddr_start;
+
+ if (window->paste_kaddr) {
+ compute_paste_address(window, &busaddr_start, &len);
+ unmap_region(window->paste_kaddr, busaddr_start, len);
+ window->paste_kaddr = NULL;
+ kfree(window->paste_addr_name);
+ window->paste_addr_name = NULL;
+ }
+}
+
+/*
+ * Unmap the MMIO regions for a window.
+ */
+static void unmap_winctx_mmio_bars(struct vas_window *window)
+{
+ int len;
+ u64 busaddr_start;
+
+ if (window->hvwc_map) {
+ get_hvwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(window->hvwc_map, busaddr_start, len);
+ window->hvwc_map = NULL;
+ }
+
+ if (window->uwc_map) {
+ get_uwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(window->uwc_map, busaddr_start, len);
+ window->uwc_map = NULL;
+ }
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+int map_winctx_mmio_bars(struct vas_window *window)
+{
+ int len;
+ u64 start;
+
+ get_hvwc_mmio_bar(window, &start, &len);
+ window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+ get_uwc_mmio_bar(window, &start, &len);
+ window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+ if (!window->hvwc_map || !window->uwc_map) {
+ unmap_winctx_mmio_bars(window);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ * offsets in a window context are valid registers and the valid
+ * registers are not sequential. And, we can only write to offsets
+ * with valid registers.
+ */
+void reset_window_regs(struct vas_window *window)
+{
+ write_hvwc_reg(window, VREG(LPID), 0ULL);
+ write_hvwc_reg(window, VREG(PID), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(AMR), 0ULL);
+ write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(PSWID), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+ write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+ /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+ /*
+ * The send and receive window credit adder registers are also
+ * accessible from HVWC and have been initialized above. We don't
+ * need to initialize from the OS/User Window Context, so skip
+ * following calls:
+ *
+ * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ */
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve the TODOs we may
+ * want to add fields to vas_winctx and move the initialization to
+ * init_vas_winctx_regs().
+ */
+static void init_xlate_regs(struct vas_window *window, bool user_win)
+{
+ u64 lpcr, val;
+
+ /*
+ * MSR_TA, MSR_US are false for both kernel and user.
+ * MSR_DR and MSR_PR are false for kernel.
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
+ if (user_win) {
+ val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
+ }
+ write_hvwc_reg(window, VREG(XLATE_MSR), val);
+
+ lpcr = mfspr(SPRN_LPCR);
+ val = 0ULL;
+ /*
+ * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
+ * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
+ *
+ * NOTE: From Section 1.3.1, Address Translation Context of the
+ * Nest MMU Workbook, LPCR_SC should be 0 for Power9.
+ */
+ val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
+ val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
+ val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
+ val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), val);
+
+ /*
+ * Section 1.3.1 (Address translation Context) of NMMU workbook.
+ * 0b00 Hashed Page Table mode
+ * 0b01 Reserved
+ * 0b10 Radix on HPT
+ * 0b11 Radix on Radix
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
+ write_hvwc_reg(window, VREG(XLATE_CTL), val);
+
+ /*
+ * TODO: Can we mfspr(AMR) even for user windows?
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
+ write_hvwc_reg(window, VREG(AMR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_SEIDR, val, 0);
+ write_hvwc_reg(window, VREG(SEIDR), val);
+}
+
+/*
+ * Initialize Reserved Send Buffer Count for the send window. It involves
+ * writing to the register, reading it back to confirm that the hardware
+ * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
+ *
+ * Since we can only make a best-effort attempt to fulfill the request,
+ * we don't return any errors if we cannot.
+ *
+ * TODO: Reserved (aka dedicated) send buffers are not supported yet.
+ */
+static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+ struct vas_winctx *winctx)
+{
+ write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+}
+
+/*
+ * init_winctx_regs()
+ * Initialize window context registers for a receive window.
+ * Except for caching control and marking window open, the registers
+ * are initialized in the order listed in Section 3.1.4 (Window Context
+ * Cache Register Details) of the VAS workbook although they don't need
+ * to be.
+ *
+ * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
+ * (so that it can get a large contiguous area) and passes that buffer
+ * to kernel via device tree. We now write that buffer address to the
+ * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
+ * write the per-chip RX FIFO addresses to the windows during boot-up
+ * as a one-time task? That could work for NX but what about other
+ * receivers? Let the receivers tell us the rx-fifo buffers for now.
+ */
+int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+{
+ u64 val;
+ int fifo_size;
+
+ reset_window_regs(window);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LPID, val, winctx->lpid);
+ write_hvwc_reg(window, VREG(LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
+ write_hvwc_reg(window, VREG(PID), val);
+
+ init_xlate_regs(window, winctx->user_win);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
+
+ /* In PowerNV, interrupts go to HV. */
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
+ write_hvwc_reg(window, VREG(PSWID), val);
+
+ write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+
+ /*
+ * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
+ * register as is - do NOT shift the address into VAS_LFIFO_BAR
+ * bit fields! Ok to set the page migration select fields -
+ * VAS ignores the lower 10+ bits in the address anyway, because
+ * the minimum FIFO size is 1K?
+ *
+ * See also: Design note in function header.
+ */
+ val = __pa(winctx->rx_fifo);
+ val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
+ val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
+
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(LRX_WCRED), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(TX_WCRED), val);
+
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+
+ fifo_size = winctx->rx_fifo_size / 1024;
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
+
+ /* Update window control and caching control registers last so
+ * we mark the window open only after fully initializing it and
+ * pushing context to cache.
+ */
+
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+
+ init_rsvd_tx_buf_count(window, winctx);
+
+ /* for a send window, point to the matching receive window */
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
+
+ write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
+ val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
+ val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
+ val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
+ val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
+
+ /* Skip read-only registers NX_UTIL and NX_UTIL_SE */
+
+ write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+
+ /* Finally, push window context to memory and... */
+ val = 0ULL;
+ val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+ /* ... mark the window open for business */
+ val = 0ULL;
+ val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
+ val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
+ val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
+ val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+
+ return 0;
+}
+
+static DEFINE_SPINLOCK(vas_ida_lock);
+
+static void vas_release_window_id(struct ida *ida, int winid)
+{
+ spin_lock(&vas_ida_lock);
+ ida_remove(ida, winid);
+ spin_unlock(&vas_ida_lock);
+}
+
+static int vas_assign_window_id(struct ida *ida)
+{
+ int rc, winid;
+
+ do {
+ rc = ida_pre_get(ida, GFP_KERNEL);
+ if (!rc)
+ return -EAGAIN;
+
+ spin_lock(&vas_ida_lock);
+ rc = ida_get_new(ida, &winid);
+ spin_unlock(&vas_ida_lock);
+ } while (rc == -EAGAIN);
+
+ if (rc)
+ return rc;
+
+ if (winid > VAS_WINDOWS_PER_CHIP) {
+ pr_err("Too many (%d) open windows\n", winid);
+ vas_release_window_id(ida, winid);
+ return -EAGAIN;
+ }
+
+ return winid;
+}
+
+static void vas_window_free(struct vas_window *window)
+{
+ int winid = window->winid;
+ struct vas_instance *vinst = window->vinst;
+
+ unmap_winctx_mmio_bars(window);
+ kfree(window);
+
+ vas_release_window_id(&vinst->ida, winid);
+}
+
+static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+{
+ int winid;
+ struct vas_window *window;
+
+ winid = vas_assign_window_id(&vinst->ida);
+ if (winid < 0)
+ return ERR_PTR(winid);
+
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ goto out_free;
+
+ window->vinst = vinst;
+ window->winid = winid;
+
+ if (map_winctx_mmio_bars(window))
+ goto out_free;
+
+ return window;
+
+out_free:
+ kfree(window);
+ vas_release_window_id(&vinst->ida, winid);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void put_rx_win(struct vas_window *rxwin)
+{
+ /* Better not be a send window! */
+ WARN_ON_ONCE(rxwin->tx_win);
+
+ atomic_dec(&rxwin->num_txwins);
+}
+
+/*
+ * Get the VAS receive window associated with NX engine identified
+ * by @cop and if applicable, @pswid.
+ *
+ * See also function header of set_vinst_win().
+ */
+static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+ enum vas_cop_type cop, u32 pswid)
+{
+ struct vas_window *rxwin;
+
+ mutex_lock(&vinst->mutex);
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI)
+ rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+ else
+ rxwin = ERR_PTR(-EINVAL);
+
+ if (!IS_ERR(rxwin))
+ atomic_inc(&rxwin->num_txwins);
+
+ mutex_unlock(&vinst->mutex);
+
+ return rxwin;
+}
+
+/*
+ * We have two tables of windows in a VAS instance. The first one,
+ * ->windows[], contains all the windows in the instance and allows
+ * looking up a window by its id. It is used to look up send windows
+ * during fault handling and receive windows when pairing user space
+ * send/receive windows.
+ *
+ * The second table, ->rxwin[], contains receive windows that are
+ * associated with NX engines. This table has VAS_COP_TYPE_MAX
+ * entries and is used to look up a receive window by its
+ * coprocessor type.
+ *
+ * Here, we save @window in the ->windows[] table. If it is a receive
+ * window, we also save the window in the ->rxwin[] table.
+ */
+static void set_vinst_win(struct vas_instance *vinst,
+ struct vas_window *window)
+{
+ int id = window->winid;
+
+ mutex_lock(&vinst->mutex);
+
+ /*
+ * There should only be one receive window for a coprocessor type
+ * unless its a user (FTW) window.
+ */
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(vinst->rxwin[window->cop]);
+ vinst->rxwin[window->cop] = window;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != NULL);
+ vinst->windows[id] = window;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+/*
+ * Clear this window from the table(s) of windows for this VAS instance.
+ * See also function header of set_vinst_win().
+ */
+static void clear_vinst_win(struct vas_window *window)
+{
+ int id = window->winid;
+ struct vas_instance *vinst = window->vinst;
+
+ mutex_lock(&vinst->mutex);
+
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(!vinst->rxwin[window->cop]);
+ vinst->rxwin[window->cop] = NULL;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != window);
+ vinst->windows[id] = NULL;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+static void init_winctx_for_rxwin(struct vas_window *rxwin,
+ struct vas_rx_win_attr *rxattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero (memset()) all fields and only set non-zero fields.
+ * Following fields are 0/false but maybe deserve a comment:
+ *
+ * ->notify_os_intr_reg In powerNV, send intrs to HV
+ * ->notify_disable False for NX windows
+ * ->intr_disable False for Fault Windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->rsvd_txbuf_count NA for Rx windows
+ * ->lpid, ->pid, ->tid NA for Rx windows
+ */
+
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->rx_fifo = rxattr->rx_fifo;
+ winctx->rx_fifo_size = rxattr->rx_fifo_size;
+ winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->pin_win = rxattr->pin_win;
+
+ winctx->nx_win = rxattr->nx_win;
+ winctx->fault_win = rxattr->fault_win;
+ winctx->rx_word_mode = rxattr->rx_win_ord_mode;
+ winctx->tx_word_mode = rxattr->tx_win_ord_mode;
+ winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+
+ if (winctx->nx_win) {
+ winctx->data_stamp = true;
+ winctx->intr_disable = true;
+ winctx->pin_win = true;
+
+ WARN_ON_ONCE(winctx->fault_win);
+ WARN_ON_ONCE(!winctx->rx_word_mode);
+ WARN_ON_ONCE(!winctx->tx_word_mode);
+ WARN_ON_ONCE(winctx->notify_after_count);
+ } else if (winctx->fault_win) {
+ winctx->notify_disable = true;
+ } else if (winctx->user_win) {
+ /*
+ * Section 1.8.1 Low Latency Core-Core Wake up of
+ * the VAS workbook:
+ *
+ * - disable credit checks ([tr]x_wcred_mode = false)
+ * - disable FIFO writes
+ * - enable ASB_Notify, disable interrupt
+ */
+ winctx->fifo_disable = true;
+ winctx->intr_disable = true;
+ winctx->rx_fifo = NULL;
+ }
+
+ winctx->lnotify_lpid = rxattr->lnotify_lpid;
+ winctx->lnotify_pid = rxattr->lnotify_pid;
+ winctx->lnotify_tid = rxattr->lnotify_tid;
+ winctx->pswid = rxattr->pswid;
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = rxattr->tc_mode;
+
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+}
+
+static bool rx_win_args_valid(enum vas_cop_type cop,
+ struct vas_rx_win_attr *attr)
+{
+ dump_rx_win_attr(attr);
+
+ if (cop >= VAS_COP_TYPE_MAX)
+ return false;
+
+ if (cop != VAS_COP_TYPE_FTW &&
+ attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
+ return false;
+
+ if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
+ return false;
+
+ if (attr->nx_win) {
+ /* cannot be fault or user window if it is nx */
+ if (attr->fault_win || attr->user_win)
+ return false;
+ /*
+ * Section 3.1.4.32: NX Windows must not disable notification,
+ * and must not enable interrupts or early notification.
+ */
+ if (attr->notify_disable || !attr->intr_disable ||
+ attr->notify_early)
+ return false;
+ } else if (attr->fault_win) {
+ /* cannot be both fault and user window */
+ if (attr->user_win)
+ return false;
+
+ /*
+ * Section 3.1.4.32: Fault windows must disable notification
+ * but not interrupts.
+ */
+ if (!attr->notify_disable || attr->intr_disable)
+ return false;
+
+ } else if (attr->user_win) {
+ /*
+ * User receive windows are only for fast-thread-wakeup
+ * (FTW). They don't need a FIFO and must disable interrupts
+ */
+ if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
+ return false;
+ } else {
+ /* Rx window must be one of NX or Fault or User window. */
+ return false;
+ }
+
+ return true;
+}
+
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
+{
+ memset(rxattr, 0, sizeof(*rxattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ rxattr->pin_win = true;
+ rxattr->nx_win = true;
+ rxattr->fault_win = false;
+ rxattr->intr_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->tx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FAULT) {
+ rxattr->pin_win = true;
+ rxattr->fault_win = true;
+ rxattr->notify_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->tx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ rxattr->user_win = true;
+ rxattr->intr_disable = true;
+
+ /*
+ * As noted in the VAS Workbook we disable credit checks.
+ * If we enable credit checks in the future, we must also
+ * implement a mechanism to return the user credits or new
+ * paste operations will fail.
+ */
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
+
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_rx_win_attr *rxattr)
+{
+ struct vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ if (!rx_win_args_valid(cop, rxattr))
+ return ERR_PTR(-EINVAL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+ pr_devel("Found instance %d\n", vasid);
+
+ rxwin = vas_window_alloc(vinst);
+ if (IS_ERR(rxwin)) {
+ pr_devel("Unable to allocate memory for Rx window\n");
+ return rxwin;
+ }
+
+ rxwin->tx_win = false;
+ rxwin->nx_win = rxattr->nx_win;
+ rxwin->user_win = rxattr->user_win;
+ rxwin->cop = cop;
+ if (rxattr->user_win)
+ rxwin->pid = task_pid_vnr(current);
+
+ init_winctx_for_rxwin(rxwin, rxattr, &winctx);
+ init_winctx_regs(rxwin, &winctx);
+
+ set_vinst_win(vinst, rxwin);
+
+ return rxwin;
+}
+EXPORT_SYMBOL_GPL(vas_rx_win_open);
+
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
+{
+ memset(txattr, 0, sizeof(*txattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ txattr->rej_no_credit = false;
+ txattr->rx_wcred_mode = true;
+ txattr->tx_wcred_mode = true;
+ txattr->rx_win_ord_mode = true;
+ txattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ txattr->user_win = true;
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
+
+static void init_winctx_for_txwin(struct vas_window *txwin,
+ struct vas_tx_win_attr *txattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero all fields and only set non-zero ones. Following
+ * are some fields set to 0/false for the stated reason:
+ *
+ * ->notify_os_intr_reg In powernv, send intrs to HV
+ * ->rsvd_txbuf_count Not supported yet.
+ * ->notify_disable False for NX windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->lnotify_lpid NA for Tx windows
+ * ->lnotify_pid NA for Tx windows
+ * ->lnotify_tid NA for Tx windows
+ * ->tx_win_cred_mode Ignore for now for NX windows
+ * ->rx_win_cred_mode Ignore for now for NX windows
+ */
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+ winctx->user_win = txattr->user_win;
+ winctx->nx_win = txwin->rxwin->nx_win;
+ winctx->pin_win = txattr->pin_win;
+
+ winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = txattr->tx_wcred_mode;
+ winctx->rx_word_mode = txattr->rx_win_ord_mode;
+ winctx->tx_word_mode = txattr->tx_win_ord_mode;
+
+ if (winctx->nx_win) {
+ winctx->data_stamp = true;
+ winctx->intr_disable = true;
+ }
+
+ winctx->lpid = txattr->lpid;
+ winctx->pidr = txattr->pidr;
+ winctx->rx_win_id = txwin->rxwin->winid;
+
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = txattr->tc_mode;
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+
+ winctx->pswid = 0;
+}
+
+static bool tx_win_args_valid(enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ if (attr->tc_mode != VAS_THRESH_DISABLED)
+ return false;
+
+ if (cop > VAS_COP_TYPE_MAX)
+ return false;
+
+ if (attr->user_win &&
+ (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
+ return false;
+
+ return true;
+}
+
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ int rc;
+ struct vas_window *txwin;
+ struct vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ if (!tx_win_args_valid(cop, attr))
+ return ERR_PTR(-EINVAL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
+ if (IS_ERR(rxwin)) {
+ pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
+ return rxwin;
+ }
+
+ txwin = vas_window_alloc(vinst);
+ if (IS_ERR(txwin)) {
+ rc = PTR_ERR(txwin);
+ goto put_rxwin;
+ }
+
+ txwin->tx_win = 1;
+ txwin->rxwin = rxwin;
+ txwin->nx_win = txwin->rxwin->nx_win;
+ txwin->pid = attr->pid;
+ txwin->user_win = attr->user_win;
+
+ init_winctx_for_txwin(txwin, attr, &winctx);
+
+ init_winctx_regs(txwin, &winctx);
+
+ /*
+ * If its a kernel send window, map the window address into the
+ * kernel's address space. For user windows, user must issue an
+ * mmap() to map the window into their address space.
+ *
+ * NOTE: If kernel ever resubmits a user CRB after handling a page
+ * fault, we will need to map this into kernel as well.
+ */
+ if (!txwin->user_win) {
+ txwin->paste_kaddr = map_paste_region(txwin);
+ if (IS_ERR(txwin->paste_kaddr)) {
+ rc = PTR_ERR(txwin->paste_kaddr);
+ goto free_window;
+ }
+ }
+
+ set_vinst_win(vinst, txwin);
+
+ return txwin;
+
+free_window:
+ vas_window_free(txwin);
+
+put_rxwin:
+ put_rx_win(rxwin);
+ return ERR_PTR(rc);
+
+}
+EXPORT_SYMBOL_GPL(vas_tx_win_open);
+
+int vas_copy_crb(void *crb, int offset)
+{
+ return vas_copy(crb, offset);
+}
+EXPORT_SYMBOL_GPL(vas_copy_crb);
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+{
+ int rc;
+ void *addr;
+ uint64_t val;
+
+ /*
+ * Only NX windows are supported for now and hardware assumes
+ * report-enable flag is set for NX windows. Ensure software
+ * complies too.
+ */
+ WARN_ON_ONCE(txwin->nx_win && !re);
+
+ addr = txwin->paste_kaddr;
+ if (re) {
+ /*
+ * Set the REPORT_ENABLE bit (equivalent to writing
+ * to 1K offset of the paste address)
+ */
+ val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
+ addr += val;
+ }
+
+ /*
+ * Map the raw CR value from vas_paste() to an error code (there
+ * is just pass or fail for now though).
+ */
+ rc = vas_paste(addr, offset);
+ if (rc == 2)
+ rc = 0;
+ else
+ rc = -EINVAL;
+
+ print_fifo_msg_count(txwin);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_paste_crb);
+
+static void poll_window_busy_state(struct vas_window *window)
+{
+ int busy;
+ u64 val;
+
+retry:
+ /*
+ * Poll Window Busy flag
+ */
+ val = read_hvwc_reg(window, VREG(WIN_STATUS));
+ busy = GET_FIELD(VAS_WIN_BUSY, val);
+ if (busy) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+ goto retry;
+ }
+}
+
+static void poll_window_castout(struct vas_window *window)
+{
+ int cached;
+ u64 val;
+
+ /* Cast window context out of the cache */
+retry:
+ val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
+ cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
+ if (cached) {
+ val = 0ULL;
+ val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
+ val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+ goto retry;
+ }
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ * - Disable new paste operations (unmap paste address)
+ * - Poll for the "Window Busy" bit to be cleared
+ * - Clear the Open/Enable bit for the Window.
+ * - Poll for return of window Credits (implies FIFO empty for Rx win?)
+ * - Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *window)
+{
+ u64 val;
+
+ if (!window)
+ return 0;
+
+ if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
+ pr_devel("Attempting to close an active Rx window!\n");
+ WARN_ON_ONCE(1);
+ return -EBUSY;
+ }
+
+ unmap_paste_region(window);
+
+ clear_vinst_win(window);
+
+ poll_window_busy_state(window);
+
+ /* Unpin window from cache and close it */
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+
+ poll_window_castout(window);
+
+ /* if send window, drop reference to matching receive window */
+ if (window->tx_win)
+ put_rx_win(window->rxwin);
+
+ vas_window_free(window);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vas_win_close);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000000000000..565a4878fefa
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+
+#include "vas.h"
+
+static DEFINE_MUTEX(vas_mutex);
+static LIST_HEAD(vas_instances);
+
+static int init_vas_instance(struct platform_device *pdev)
+{
+ int rc, vasid;
+ struct resource *res;
+ struct vas_instance *vinst;
+ struct device_node *dn = pdev->dev.of_node;
+
+ rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
+ if (rc) {
+ pr_err("No ibm,vas-id property for %s?\n", pdev->name);
+ return -ENODEV;
+ }
+
+ if (pdev->num_resources != 4) {
+ pr_err("Unexpected DT configuration for [%s, %d]\n",
+ pdev->name, vasid);
+ return -ENODEV;
+ }
+
+ vinst = kzalloc(sizeof(*vinst), GFP_KERNEL);
+ if (!vinst)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&vinst->node);
+ ida_init(&vinst->ida);
+ mutex_init(&vinst->mutex);
+ vinst->vas_id = vasid;
+ vinst->pdev = pdev;
+
+ res = &pdev->resource[0];
+ vinst->hvwc_bar_start = res->start;
+
+ res = &pdev->resource[1];
+ vinst->uwc_bar_start = res->start;
+
+ res = &pdev->resource[2];
+ vinst->paste_base_addr = res->start;
+
+ res = &pdev->resource[3];
+ if (res->end > 62) {
+ pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
+ goto free_vinst;
+ }
+
+ vinst->paste_win_id_shift = 63 - res->end;
+
+ pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, "
+ "paste_win_id_shift 0x%llx\n", pdev->name, vasid,
+ vinst->paste_base_addr, vinst->paste_win_id_shift);
+
+ mutex_lock(&vas_mutex);
+ list_add(&vinst->node, &vas_instances);
+ mutex_unlock(&vas_mutex);
+
+ dev_set_drvdata(&pdev->dev, vinst);
+
+ return 0;
+
+free_vinst:
+ kfree(vinst);
+ return -ENODEV;
+
+}
+
+/*
+ * Although this is read/used multiple times, it is written to only
+ * during initialization.
+ */
+struct vas_instance *find_vas_instance(int vasid)
+{
+ struct list_head *ent;
+ struct vas_instance *vinst;
+
+ mutex_lock(&vas_mutex);
+ list_for_each(ent, &vas_instances) {
+ vinst = list_entry(ent, struct vas_instance, node);
+ if (vinst->vas_id == vasid) {
+ mutex_unlock(&vas_mutex);
+ return vinst;
+ }
+ }
+ mutex_unlock(&vas_mutex);
+
+ pr_devel("Instance %d not found\n", vasid);
+ return NULL;
+}
+
+static int vas_probe(struct platform_device *pdev)
+{
+ return init_vas_instance(pdev);
+}
+
+static const struct of_device_id powernv_vas_match[] = {
+ { .compatible = "ibm,vas",},
+ {},
+};
+
+static struct platform_driver vas_driver = {
+ .driver = {
+ .name = "vas",
+ .of_match_table = powernv_vas_match,
+ },
+ .probe = vas_probe,
+};
+
+static int __init vas_init(void)
+{
+ int found = 0;
+ struct device_node *dn;
+
+ platform_driver_register(&vas_driver);
+
+ for_each_compatible_node(dn, NULL, "ibm,vas") {
+ of_platform_device_create(dn, NULL, NULL);
+ found++;
+ }
+
+ if (!found)
+ return -ENODEV;
+
+ pr_devel("Found %d instances\n", found);
+
+ return 0;
+}
+device_initcall(vas_init);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000000000000..38dee5d50f31
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <linux/atomic.h>
+#include <linux/idr.h>
+#include <asm/vas.h>
+#include <linux/io.h>
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" instructions which were introduced in Power9.
+ *
+ * A Power node can have (upto?) 8 Power chips. There is one instance of
+ * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
+ * Senders and receivers must each connect to a separate window before they
+ * can exchange messages through the switchboard.
+ *
+ * Each window is described by two types of window contexts:
+ *
+ * Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
+ *
+ * OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
+ *
+ * A window context can be viewed as a set of 64-bit registers. The settings
+ * in these registers configure/control/determine the behavior of the VAS
+ * hardware when messages are sent/received through the window. The registers
+ * in the HVWC are configured by the kernel while the registers in the UWC can
+ * be configured by the kernel or by the user space application that is using
+ * the window.
+ *
+ * The HVWCs for all windows on a specific instance of VAS are in a contiguous
+ * range of hardware addresses or Base address region (BAR) referred to as the
+ * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance
+ * are referred to as the UWC BAR for the instance.
+ *
+ * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet
+ * and available to the kernel in the VAS node's "reg" property in the device
+ * tree:
+ *
+ * /proc/device-tree/vasm@.../reg
+ *
+ * (see vas_probe() for details on the reg property).
+ *
+ * The kernel maps the HVWC and UWC BAR regions into the kernel address
+ * space (hvwc_map and uwc_map). The kernel can then access the window
+ * contexts of a specific window using:
+ *
+ * hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
+ * uwc = uwc_map + winid * VAS_UWC_SIZE.
+ *
+ * where winid is the window index (0..64K).
+ *
+ * As mentioned, a window context is used to "configure" a window. Besides
+ * this configuration address, each _send_ window also has a unique hardware
+ * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()).
+ *
+ * The hardware paste address for a window is computed using the "paste
+ * base address" and "paste win id shift" reg properties in the VAS device
+ * tree node using:
+ *
+ * paste_addr = paste_base + ((winid << paste_win_id_shift))
+ *
+ * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift).
+ *
+ * The kernel maps this hardware address into the sender's address space
+ * after which they can use the 'paste' instruction (new in Power9) to
+ * send a message (submit a request aka CRB) to the coprocessor.
+ *
+ * NOTE: In the initial version, senders can only in-kernel drivers/threads.
+ * Support for user space threads will be added in follow-on patches.
+ *
+ * TODO: Do we need to map the UWC into user address space so they can return
+ * credits? Its NA for NX but may be needed for other receive windows.
+ *
+ */
+
+#define VAS_WINDOWS_PER_CHIP (64 << 10)
+
+/*
+ * Hypervisor and OS/USer Window Context sizes
+ */
+#define VAS_HVWC_SIZE 512
+#define VAS_UWC_SIZE PAGE_SIZE
+
+/*
+ * Initial per-process credits.
+ * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED)
+ * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
+ *
+ * TODO: Needs tuning for per-process credits
+ */
+#define VAS_WCREDS_MIN 16
+#define VAS_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_WCREDS_DEFAULT (1 << 10)
+
+/*
+ * VAS Window Context Register Offsets and bitmasks.
+ * See Section 3.1.4 of VAS Work book
+ */
+#define VAS_LPID_OFFSET 0x010
+#define VAS_LPID PPC_BITMASK(0, 11)
+
+#define VAS_PID_OFFSET 0x018
+#define VAS_PID_ID PPC_BITMASK(0, 19)
+
+#define VAS_XLATE_MSR_OFFSET 0x020
+#define VAS_XLATE_MSR_DR PPC_BIT(0)
+#define VAS_XLATE_MSR_TA PPC_BIT(1)
+#define VAS_XLATE_MSR_PR PPC_BIT(2)
+#define VAS_XLATE_MSR_US PPC_BIT(3)
+#define VAS_XLATE_MSR_HV PPC_BIT(4)
+#define VAS_XLATE_MSR_SF PPC_BIT(5)
+
+#define VAS_XLATE_LPCR_OFFSET 0x028
+#define VAS_XLATE_LPCR_PAGE_SIZE PPC_BITMASK(0, 2)
+#define VAS_XLATE_LPCR_ISL PPC_BIT(3)
+#define VAS_XLATE_LPCR_TC PPC_BIT(4)
+#define VAS_XLATE_LPCR_SC PPC_BIT(5)
+
+#define VAS_XLATE_CTL_OFFSET 0x030
+#define VAS_XLATE_MODE PPC_BITMASK(0, 1)
+
+#define VAS_AMR_OFFSET 0x040
+#define VAS_AMR PPC_BITMASK(0, 63)
+
+#define VAS_SEIDR_OFFSET 0x048
+#define VAS_SEIDR PPC_BITMASK(0, 63)
+
+#define VAS_FAULT_TX_WIN_OFFSET 0x050
+#define VAS_FAULT_TX_WIN PPC_BITMASK(48, 63)
+
+#define VAS_OSU_INTR_SRC_RA_OFFSET 0x060
+#define VAS_OSU_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_HV_INTR_SRC_RA_OFFSET 0x070
+#define VAS_HV_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_PSWID_OFFSET 0x078
+#define VAS_PSWID_EA_HANDLE PPC_BITMASK(0, 31)
+
+#define VAS_SPARE1_OFFSET 0x080
+#define VAS_SPARE2_OFFSET 0x088
+#define VAS_SPARE3_OFFSET 0x090
+#define VAS_SPARE4_OFFSET 0x130
+#define VAS_SPARE5_OFFSET 0x160
+#define VAS_SPARE6_OFFSET 0x188
+
+#define VAS_LFIFO_BAR_OFFSET 0x0A0
+#define VAS_LFIFO_BAR PPC_BITMASK(8, 53)
+#define VAS_PAGE_MIGRATION_SELECT PPC_BITMASK(54, 56)
+
+#define VAS_LDATA_STAMP_CTL_OFFSET 0x0A8
+#define VAS_LDATA_STAMP PPC_BITMASK(0, 1)
+#define VAS_XTRA_WRITE PPC_BIT(2)
+
+#define VAS_LDMA_CACHE_CTL_OFFSET 0x0B0
+#define VAS_LDMA_TYPE PPC_BITMASK(0, 1)
+#define VAS_LDMA_FIFO_DISABLE PPC_BIT(2)
+
+#define VAS_LRFIFO_PUSH_OFFSET 0x0B8
+#define VAS_LRFIFO_PUSH PPC_BITMASK(0, 15)
+
+#define VAS_CURR_MSG_COUNT_OFFSET 0x0C0
+#define VAS_CURR_MSG_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LNOTIFY_AFTER_COUNT_OFFSET 0x0C8
+#define VAS_LNOTIFY_AFTER_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LRX_WCRED_OFFSET 0x0E0
+#define VAS_LRX_WCRED PPC_BITMASK(0, 15)
+
+#define VAS_LRX_WCRED_ADDER_OFFSET 0x190
+#define VAS_LRX_WCRED_ADDER PPC_BITMASK(0, 15)
+
+#define VAS_TX_WCRED_OFFSET 0x0F0
+#define VAS_TX_WCRED PPC_BITMASK(4, 15)
+
+#define VAS_TX_WCRED_ADDER_OFFSET 0x1A0
+#define VAS_TX_WCRED_ADDER PPC_BITMASK(4, 15)
+
+#define VAS_LFIFO_SIZE_OFFSET 0x100
+#define VAS_LFIFO_SIZE PPC_BITMASK(0, 3)
+
+#define VAS_WINCTL_OFFSET 0x108
+#define VAS_WINCTL_OPEN PPC_BIT(0)
+#define VAS_WINCTL_REJ_NO_CREDIT PPC_BIT(1)
+#define VAS_WINCTL_PIN PPC_BIT(2)
+#define VAS_WINCTL_TX_WCRED_MODE PPC_BIT(3)
+#define VAS_WINCTL_RX_WCRED_MODE PPC_BIT(4)
+#define VAS_WINCTL_TX_WORD_MODE PPC_BIT(5)
+#define VAS_WINCTL_RX_WORD_MODE PPC_BIT(6)
+#define VAS_WINCTL_RSVD_TXBUF PPC_BIT(7)
+#define VAS_WINCTL_THRESH_CTL PPC_BITMASK(8, 9)
+#define VAS_WINCTL_FAULT_WIN PPC_BIT(10)
+#define VAS_WINCTL_NX_WIN PPC_BIT(11)
+
+#define VAS_WIN_STATUS_OFFSET 0x110
+#define VAS_WIN_BUSY PPC_BIT(1)
+
+#define VAS_WIN_CTX_CACHING_CTL_OFFSET 0x118
+#define VAS_CASTOUT_REQ PPC_BIT(0)
+#define VAS_PUSH_TO_MEM PPC_BIT(1)
+#define VAS_WIN_CACHE_STATUS PPC_BIT(4)
+
+#define VAS_TX_RSVD_BUF_COUNT_OFFSET 0x120
+#define VAS_RXVD_BUF_COUNT PPC_BITMASK(58, 63)
+
+#define VAS_LRFIFO_WIN_PTR_OFFSET 0x128
+#define VAS_LRX_WIN_ID PPC_BITMASK(0, 15)
+
+/*
+ * Local Notification Control Register controls what happens in _response_
+ * to a paste command and hence applies only to receive windows.
+ */
+#define VAS_LNOTIFY_CTL_OFFSET 0x138
+#define VAS_NOTIFY_DISABLE PPC_BIT(0)
+#define VAS_INTR_DISABLE PPC_BIT(1)
+#define VAS_NOTIFY_EARLY PPC_BIT(2)
+#define VAS_NOTIFY_OSU_INTR PPC_BIT(3)
+
+#define VAS_LNOTIFY_PID_OFFSET 0x140
+#define VAS_LNOTIFY_PID PPC_BITMASK(0, 19)
+
+#define VAS_LNOTIFY_LPID_OFFSET 0x148
+#define VAS_LNOTIFY_LPID PPC_BITMASK(0, 11)
+
+#define VAS_LNOTIFY_TID_OFFSET 0x150
+#define VAS_LNOTIFY_TID PPC_BITMASK(0, 15)
+
+#define VAS_LNOTIFY_SCOPE_OFFSET 0x158
+#define VAS_LNOTIFY_MIN_SCOPE PPC_BITMASK(0, 1)
+#define VAS_LNOTIFY_MAX_SCOPE PPC_BITMASK(2, 3)
+
+#define VAS_NX_UTIL_OFFSET 0x1B0
+#define VAS_NX_UTIL PPC_BITMASK(0, 63)
+
+/* SE: Side effects */
+#define VAS_NX_UTIL_SE_OFFSET 0x1B8
+#define VAS_NX_UTIL_SE PPC_BITMASK(0, 63)
+
+#define VAS_NX_UTIL_ADDER_OFFSET 0x180
+#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ */
+enum vas_notify_scope {
+ VAS_SCOPE_LOCAL,
+ VAS_SCOPE_GROUP,
+ VAS_SCOPE_VECTORED_GROUP,
+ VAS_SCOPE_UNUSED,
+};
+
+/*
+ * Local DMA Cache Control Register (Receive windows only).
+ */
+enum vas_dma_type {
+ VAS_DMA_TYPE_INJECT,
+ VAS_DMA_TYPE_WRITE,
+};
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ * Not applicable to NX receive windows.
+ */
+enum vas_notify_after_count {
+ VAS_NOTIFY_AFTER_256 = 0,
+ VAS_NOTIFY_NONE,
+ VAS_NOTIFY_AFTER_2
+};
+
+/*
+ * One per instance of VAS. Each instance will have a separate set of
+ * receive windows, one per coprocessor type.
+ *
+ * See also function header of set_vinst_win() for details on ->windows[]
+ * and ->rxwin[] tables.
+ */
+struct vas_instance {
+ int vas_id;
+ struct ida ida;
+ struct list_head node;
+ struct platform_device *pdev;
+
+ u64 hvwc_bar_start;
+ u64 uwc_bar_start;
+ u64 paste_base_addr;
+ u64 paste_win_id_shift;
+
+ struct mutex mutex;
+ struct vas_window *rxwin[VAS_COP_TYPE_MAX];
+ struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+};
+
+/*
+ * In-kernel state a VAS window. One per window.
+ */
+struct vas_window {
+ /* Fields common to send and receive windows */
+ struct vas_instance *vinst;
+ int winid;
+ bool tx_win; /* True if send window */
+ bool nx_win; /* True if NX window */
+ bool user_win; /* True if user space window */
+ void *hvwc_map; /* HV window context */
+ void *uwc_map; /* OS/User window context */
+ pid_t pid; /* Linux process id of owner */
+
+ /* Fields applicable only to send windows */
+ void *paste_kaddr;
+ char *paste_addr_name;
+ struct vas_window *rxwin;
+
+ /* Feilds applicable only to receive windows */
+ enum vas_cop_type cop;
+ atomic_t num_txwins;
+};
+
+/*
+ * Container for the hardware state of a window. One per-window.
+ *
+ * A VAS Window context is a 512-byte area in the hardware that contains
+ * a set of 64-bit registers. Individual bit-fields in these registers
+ * determine the configuration/operation of the hardware. struct vas_winctx
+ * is a container for the register fields in the window context.
+ */
+struct vas_winctx {
+ void *rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+ int rsvd_txbuf_count;
+
+ bool user_win;
+ bool nx_win;
+ bool fault_win;
+ bool rsvd_txbuf_enable;
+ bool pin_win;
+ bool rej_no_credit;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_word_mode;
+ bool rx_word_mode;
+ bool data_stamp;
+ bool xtra_write;
+ bool notify_disable;
+ bool intr_disable;
+ bool fifo_disable;
+ bool notify_early;
+ bool notify_os_intr_reg;
+
+ int lpid;
+ int pidr; /* value from SPRN_PID, not linux pid */
+ int lnotify_lpid;
+ int lnotify_pid;
+ int lnotify_tid;
+ u32 pswid;
+ int rx_win_id;
+ int fault_win_id;
+ int tc_mode;
+
+ u64 irq_port;
+
+ enum vas_dma_type dma_type;
+ enum vas_notify_scope min_scope;
+ enum vas_notify_scope max_scope;
+ enum vas_notify_after_count notify_after_count;
+};
+
+extern struct vas_instance *find_vas_instance(int vasid);
+
+/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ * - the register's short name in string form ("LPID"), and
+ * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ * register's offset in the window context
+ */
+#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
+#define VREG(r) VREG_SFX(r, _OFFSET)
+
+#ifdef vas_debug
+static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
+{
+ pr_err("fault %d, notify %d, intr %d early %d\n",
+ attr->fault_win, attr->notify_disable,
+ attr->intr_disable, attr->notify_early);
+
+ pr_err("rx_fifo_size %d, max value %d\n",
+ attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
+}
+
+static inline void vas_log_write(struct vas_window *win, char *name,
+ void *regptr, u64 val)
+{
+ if (val)
+ pr_err("%swin #%d: %s reg %p, val 0x%016llx\n",
+ win->tx_win ? "Tx" : "Rx", win->winid, name,
+ regptr, val);
+}
+
+#else /* vas_debug */
+
+#define vas_log_write(win, name, reg, val)
+#define dump_rx_win_attr(attr)
+
+#endif /* vas_debug */
+
+static inline void write_uwc_reg(struct vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->uwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline void write_hvwc_reg(struct vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->hvwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline u64 read_hvwc_reg(struct vas_window *win,
+ char *name __maybe_unused, s32 reg)
+{
+ return in_be64(win->hvwc_map+reg);
+}
+
+#ifdef vas_debug
+
+static void print_fifo_msg_count(struct vas_window *txwin)
+{
+ uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o);
+ pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
+ (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
+}
+#else /* vas_debug */
+
+#define print_fifo_msg_count(window)
+
+#endif /* vas_debug */
+
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
index 814a7eaa7769..50dbaf24b1ee 100644
--- a/arch/powerpc/platforms/ps3/repository.c
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -170,14 +170,8 @@ int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id)
{
- int result;
-
- result = read_node(PS3_LPAR_ID_PME,
- make_first_field("bus", bus_index),
- make_field("id", 0),
- 0, 0,
- bus_id, NULL);
- return result;
+ return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+ make_field("id", 0), 0, 0, bus_id, NULL);
}
int ps3_repository_read_bus_type(unsigned int bus_index,
@@ -224,15 +218,9 @@ int ps3_repository_read_dev_str(unsigned int bus_index,
int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
u64 *dev_id)
{
- int result;
-
- result = read_node(PS3_LPAR_ID_PME,
- make_first_field("bus", bus_index),
- make_field("dev", dev_index),
- make_field("id", 0),
- 0,
- dev_id, NULL);
- return result;
+ return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+ make_field("dev", dev_index), make_field("id", 0), 0,
+ dev_id, NULL);
}
int ps3_repository_read_dev_type(unsigned int bus_index,
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 6244bc849469..9dabea6e1443 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,20 +104,6 @@ static void __noreturn ps3_halt(void)
ps3_sys_manager_halt(); /* never returns */
}
-static void ps3_panic(char *str)
-{
- DBG("%s:%d %s\n", __func__, __LINE__, str);
-
- smp_send_stop();
- printk("\n");
- printk(" System does not reboot automatically.\n");
- printk(" Please press POWER button.\n");
- printk("\n");
-
- while(1)
- lv1_pause(1);
-}
-
#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
static void __init prealloc(struct ps3_prealloc *p)
@@ -269,7 +255,6 @@ define_machine(ps3) {
.probe = ps3_probe,
.setup_arch = ps3_setup_arch,
.init_IRQ = ps3_init_IRQ,
- .panic = ps3_panic,
.get_boot_time = ps3_get_boot_time,
.set_dabr = ps3_set_dabr,
.calibrate_decr = ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 3a6dfd14f64b..71dd69d9ec64 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -7,6 +7,7 @@ config PPC_PSERIES
select PCI
select PCI_MSI
select PPC_XICS
+ select PPC_XIVE_SPAPR
select PPC_ICP_NATIVE
select PPC_ICP_HV
select PPC_ICS_RTAS
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 39187696ee74..783f36364690 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -254,18 +254,15 @@ cc_error:
return first_dn;
}
-int dlpar_attach_node(struct device_node *dn)
+int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
{
int rc;
- dn->parent = pseries_of_derive_parent(dn->full_name);
- if (IS_ERR(dn->parent))
- return PTR_ERR(dn->parent);
+ dn->parent = parent;
rc = of_attach_node(dn);
if (rc) {
- printk(KERN_ERR "Failed to add device node %s\n",
- dn->full_name);
+ printk(KERN_ERR "Failed to add device node %pOF\n", dn);
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1eef46d9cf30..6b812ad990e4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -247,14 +247,13 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
/* Initialize the fake PE */
memset(&pe, 0, sizeof(struct eeh_pe));
- pe.phb = edev->phb;
+ pe.phb = pdn->phb;
pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
/* Enable EEH on the device */
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
if (!ret) {
/* Retrieve PE address */
- edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
pe.addr = edev->pe_config_addr;
@@ -279,7 +278,6 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, void *data)
/* This device doesn't support EEH, but it may have an
* EEH parent, in which case we mark it as supported.
*/
- edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr;
edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
eeh_add_to_parent_pe(edev);
}
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 32187dc76730..6eeb0d4bab61 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -36,8 +36,8 @@ void request_event_sources_irqs(struct device_node *np,
virqs[count] = irq_create_of_mapping(&oirq);
if (!virqs[count]) {
pr_err("event-sources: Unable to allocate "
- "interrupt number for %s\n",
- np->full_name);
+ "interrupt number for %pOF\n",
+ np);
WARN_ON(1);
} else {
count++;
@@ -48,7 +48,7 @@ void request_event_sources_irqs(struct device_node *np,
for (i = 0; i < count; i++) {
if (request_irq(virqs[i], handler, 0, name, NULL)) {
pr_err("event-sources: Unable to request interrupt "
- "%d for %s\n", virqs[i], np->full_name);
+ "%d for %pOF\n", virqs[i], np);
WARN_ON(1);
return;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6afd1efd3633..fc0d8f97c03a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -34,6 +34,7 @@
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/xics.h>
+#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
#include "pseries.h"
@@ -109,7 +110,10 @@ static void pseries_mach_cpu_die(void)
local_irq_disable();
idle_task_exit();
- xics_teardown_cpu();
+ if (xive_enabled())
+ xive_teardown_cpu();
+ else
+ xics_teardown_cpu();
if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
@@ -174,7 +178,10 @@ static int pseries_cpu_disable(void)
boot_cpuid = cpumask_any(cpu_online_mask);
/* FIXME: abstract this to not be platform specific later on */
- xics_migrate_irqs_away();
+ if (xive_enabled())
+ xive_smp_disable_cpu();
+ else
+ xics_migrate_irqs_away();
return 0;
}
@@ -264,8 +271,8 @@ static int pseries_add_processor(struct device_node *np)
/* If we get here, it most likely means that NR_CPUS is
* less than the partition's max processors setting.
*/
- printk(KERN_ERR "Cannot add cpu %s; this system configuration"
- " supports %d logical cpus.\n", np->full_name,
+ printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
+ " supports %d logical cpus.\n", np,
num_possible_cpus());
goto out_unlock;
}
@@ -463,7 +470,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return -EINVAL;
}
- rc = dlpar_attach_node(dn);
+ rc = dlpar_attach_node(dn, parent);
if (rc) {
saved_rc = rc;
pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ca9b2f4aaa22..1d48ab424bd9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -336,7 +336,38 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
return mem_block;
}
+static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+{
+ struct memory_block *mem_block;
+ int rc;
+
+ mem_block = lmb_to_memblock(lmb);
+ if (!mem_block)
+ return -EINVAL;
+
+ if (online && mem_block->dev.offline)
+ rc = device_online(&mem_block->dev);
+ else if (!online && !mem_block->dev.offline)
+ rc = device_offline(&mem_block->dev);
+ else
+ rc = 0;
+
+ put_device(&mem_block->dev);
+
+ return rc;
+}
+
+static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+{
+ return dlpar_change_lmb_state(lmb, true);
+}
+
#ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+{
+ return dlpar_change_lmb_state(lmb, false);
+}
+
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
unsigned long block_sz, start_pfn;
@@ -431,19 +462,13 @@ static int dlpar_add_lmb(struct of_drconf_cell *);
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
- struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
- mem_block = lmb_to_memblock(lmb);
- if (!mem_block)
- return -EINVAL;
-
- rc = device_offline(&mem_block->dev);
- put_device(&mem_block->dev);
+ rc = dlpar_offline_lmb(lmb);
if (rc)
return rc;
@@ -737,20 +762,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
-{
- struct memory_block *mem_block;
- int rc;
-
- mem_block = lmb_to_memblock(lmb);
- if (!mem_block)
- return -EINVAL;
-
- rc = device_online(&mem_block->dev);
- put_device(&mem_block->dev);
- return rc;
-}
-
static int dlpar_add_lmb(struct of_drconf_cell *lmb)
{
unsigned long block_sz;
@@ -817,6 +828,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
return -EINVAL;
for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
+ if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+ continue;
+
rc = dlpar_acquire_drc(lmbs[i].drc_index);
if (rc)
continue;
@@ -859,6 +873,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop)
lmbs[i].base_addr, lmbs[i].drc_index);
lmbs[i].reserved = 0;
}
+ rc = 0;
}
return rc;
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 74b5b8e239c8..c511a1743a44 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -23,7 +23,7 @@
.globl hcall_tracepoint_refcount
hcall_tracepoint_refcount:
- .llong 0
+ .8byte 0
.section ".text"
#endif
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 52146b1356d2..408a86044133 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -150,8 +150,7 @@ static const struct dma_map_ops ibmebus_dma_ops = {
static int ibmebus_match_path(struct device *dev, void *data)
{
struct device_node *dn = to_platform_device(dev)->dev.of_node;
- return (dn->full_name &&
- (strcasecmp((char *)data, dn->full_name) == 0));
+ return (of_find_node_by_path(data) == dn);
}
static int ibmebus_match_node(struct device *dev, void *data)
@@ -395,7 +394,7 @@ static ssize_t devspec_show(struct device *dev,
struct platform_device *ofdev;
ofdev = to_platform_device(dev);
- return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
+ return sprintf(buf, "%pOF\n", ofdev->dev.of_node);
}
static DEVICE_ATTR_RO(devspec);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8374adee27e3..7c181467d0ad 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -511,8 +511,8 @@ static void iommu_table_setparms(struct pci_controller *phb,
basep = of_get_property(node, "linux,tce-base", NULL);
sizep = of_get_property(node, "linux,tce-size", NULL);
if (basep == NULL || sizep == NULL) {
- printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
- "missing tce entries !\n", dn->full_name);
+ printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
+ "missing tce entries !\n", dn);
return;
}
@@ -587,7 +587,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
dn = pci_bus_to_OF_node(bus);
- pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
+ pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
if (bus->self) {
/* This is not a root bus, any setup will be done for the
@@ -701,8 +701,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
dn = pci_bus_to_OF_node(bus);
- pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n",
- dn->full_name);
+ pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
+ dn);
/* Find nearest ibm,dma-window, walking up the device tree */
for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
@@ -718,8 +718,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
ppci = PCI_DN(pdn);
- pr_debug(" parent is %s, iommu_table: 0x%p\n",
- pdn->full_name, ppci->table_group);
+ pr_debug(" parent is %pOF, iommu_table: 0x%p\n",
+ pdn, ppci->table_group);
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
@@ -817,28 +817,28 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
ret = tce_clearrange_multi_pSeriesLP(0,
1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
if (ret)
- pr_warning("%s failed to clear tces in window.\n",
- np->full_name);
+ pr_warning("%pOF failed to clear tces in window.\n",
+ np);
else
- pr_debug("%s successfully cleared tces in window.\n",
- np->full_name);
+ pr_debug("%pOF successfully cleared tces in window.\n",
+ np);
ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
if (ret)
- pr_warning("%s: failed to remove direct window: rtas returned "
+ pr_warning("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[2], liobn);
else
- pr_debug("%s: successfully removed direct window: rtas returned "
+ pr_debug("%pOF: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[2], liobn);
delprop:
if (remove_prop)
ret = of_remove_property(np, win64);
if (ret)
- pr_warning("%s: failed to remove direct window property: %d\n",
- np->full_name, ret);
+ pr_warning("%pOF: failed to remove direct window property: %d\n",
+ np, ret);
}
static u64 find_existing_ddw(struct device_node *pdn)
@@ -1004,7 +1004,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* list.
*/
list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
- if (!strcmp(fpdn->pdn->full_name, pdn->full_name))
+ if (fpdn->pdn == pdn)
goto out_unlock;
}
@@ -1087,8 +1087,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
ddwprop->tce_shift = cpu_to_be32(page_shift);
ddwprop->window_shift = cpu_to_be32(len);
- dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
- create.liobn, dn->full_name);
+ dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
+ create.liobn, dn);
window = kzalloc(sizeof(*window), GFP_KERNEL);
if (!window)
@@ -1097,15 +1097,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
win64->value, tce_setrange_multi_pSeriesLP_walk);
if (ret) {
- dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
- dn->full_name, ret);
+ dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
+ dn, ret);
goto out_free_window;
}
ret = of_add_property(pdn, win64);
if (ret) {
- dev_err(&dev->dev, "unable to add dma window property for %s: %d",
- pdn->full_name, ret);
+ dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
+ pdn, ret);
goto out_free_window;
}
@@ -1158,7 +1158,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
* already allocated.
*/
dn = pci_device_to_OF_node(dev);
- pr_debug(" node is %s\n", dn->full_name);
+ pr_debug(" node is %pOF\n", dn);
for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
@@ -1169,11 +1169,11 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
if (!pdn || !PCI_DN(pdn)) {
printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
- "no DMA window found for pci dev=%s dn=%s\n",
- pci_name(dev), of_node_full_name(dn));
+ "no DMA window found for pci dev=%s dn=%pOF\n",
+ pci_name(dev), dn);
return;
}
- pr_debug(" parent is %s\n", pdn->full_name);
+ pr_debug(" parent is %pOF\n", pdn);
pci = PCI_DN(pdn);
if (!pci->table_group) {
@@ -1213,7 +1213,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
/* only attempt to use a new window if 64-bit DMA is requested */
if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
dn = pci_device_to_OF_node(pdev);
- dev_dbg(dev, "node is %s\n", dn->full_name);
+ dev_dbg(dev, "node is %pOF\n", dn);
/*
* the device tree might contain the dma-window properties
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 6681ac97fb18..eeb13429d685 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -15,6 +15,7 @@
#include <asm/firmware.h>
#include <asm/kexec.h>
#include <asm/xics.h>
+#include <asm/xive.h>
#include <asm/smp.h>
#include <asm/plpar_wrappers.h>
@@ -51,5 +52,8 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
}
}
- xics_kexec_teardown_cpu(secondary);
+ if (xive_enabled())
+ xive_kexec_teardown_cpu(secondary);
+ else
+ xics_kexec_teardown_cpu(secondary);
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 2da4851eff99..210ce632d63e 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -229,7 +229,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
if (!dn)
return -ENOENT;
- rc = dlpar_attach_node(dn);
+ rc = dlpar_attach_node(dn, parent_dn);
if (rc)
dlpar_free_cc_nodes(dn);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 326ef0dd6038..b7496948129e 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -132,19 +132,14 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
{
struct device_node *dn;
- struct pci_dn *pdn;
const __be32 *p;
u32 req_msi;
- pdn = pci_get_pdn(pdev);
- if (!pdn)
- return -ENODEV;
-
- dn = pdn->node;
+ dn = pci_device_to_OF_node(pdev);
p = of_get_property(dn, prop_name, NULL);
if (!p) {
- pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name);
+ pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn);
return -ENOENT;
}
@@ -182,8 +177,8 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
while (dn) {
p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
if (p) {
- pr_debug("rtas_msi: found prop on dn %s\n",
- dn->full_name);
+ pr_debug("rtas_msi: found prop on dn %pOF\n",
+ dn);
*total = be32_to_cpup(p);
return dn;
}
@@ -197,7 +192,6 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
{
struct device_node *dn;
- struct pci_dn *pdn;
struct eeh_dev *edev;
/* Found our PE and assume 8 at that point. */
@@ -210,8 +204,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
edev = pdn_to_eeh_dev(PCI_DN(dn));
if (edev->pe)
edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
- pdn = eeh_dev_to_pdn(edev);
- dn = pdn ? pdn->node : NULL;
+ dn = pci_device_to_OF_node(edev->pdev);
if (!dn)
return NULL;
@@ -222,7 +215,7 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
/* Hardcode of 8 for old firmwares */
*total = 8;
- pr_debug("rtas_msi: using PE dn %s\n", dn->full_name);
+ pr_debug("rtas_msi: using PE dn %pOF\n", dn);
return dn;
}
@@ -242,7 +235,7 @@ static void *count_non_bridge_devices(struct device_node *dn, void *data)
const __be32 *p;
u32 class;
- pr_debug("rtas_msi: counting %s\n", dn->full_name);
+ pr_debug("rtas_msi: counting %pOF\n", dn);
p = of_get_property(dn, "class-code", NULL);
class = p ? be32_to_cpup(p) : 0;
@@ -300,7 +293,7 @@ static int msi_quota_for_device(struct pci_dev *dev, int request)
goto out;
}
- pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name);
+ pr_debug("rtas_msi: found PE %pOF\n", pe_dn);
memset(&counts, 0, sizeof(struct msi_counts));
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 547fd13e4f8e..561917fa54a8 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -38,7 +38,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn)
{
struct pci_controller *phb;
- pr_debug("PCI: Initializing new hotplug PHB %s\n", dn->full_name);
+ pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);
phb = pcibios_alloc_controller(dn);
if (!phb)
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1361a9db534b..4470a3194311 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -46,7 +46,7 @@ extern void dlpar_free_cc_nodes(struct device_node *);
extern void dlpar_free_cc_property(struct property *);
extern struct device_node *dlpar_configure_connector(__be32,
struct device_node *);
-extern int dlpar_attach_node(struct device_node *);
+extern int dlpar_attach_node(struct device_node *, struct device_node *);
extern int dlpar_detach_node(struct device_node *);
extern int dlpar_acquire_drc(u32 drc_index);
extern int dlpar_release_drc(u32 drc_index);
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 164a13d3998a..35c891aabef0 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -229,10 +229,9 @@ static int __init pseries_energy_init(void)
int cpu, err;
struct device *cpu_dev;
- if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) {
- printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
- return 0;
- }
+ if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY))
+ return 0; /* H_BEST_ENERGY hcall not supported */
+
/* Create the sysfs files */
err = device_create_file(cpu_subsys.dev_root,
&attr_cpu_activate_hint_list);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index bb70b26334f0..4923ffe230cf 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -379,6 +379,21 @@ static void fwnmi_release_errinfo(void)
int pSeries_system_reset_exception(struct pt_regs *regs)
{
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
+ * to detect the bad SRR1 pattern here. Flip the NIP back to correct
+ * endian for reporting purposes. Unfortunately the MSR can't be fixed,
+ * so clear it. It will be missing MSR_RI so we won't try to recover.
+ */
+ if ((be64_to_cpu(regs->msr) &
+ (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
+ MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
+ regs->nip = be64_to_cpu((__be64)regs->nip);
+ regs->msr = 0;
+ }
+#endif
+
if (fwnmi_active) {
struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
if (errhdr) {
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 011ef2180fe6..296c188fd5ca 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -362,20 +362,13 @@ static int do_update_property(char *buf, size_t bufsize)
static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
loff_t *off)
{
- int rv = 0;
+ int rv;
char *kbuf;
char *tmp;
- if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) {
- rv = -ENOMEM;
- goto out;
- }
- if (copy_from_user(kbuf, buf, count)) {
- rv = -EFAULT;
- goto out;
- }
-
- kbuf[count] = '\0';
+ kbuf = memdup_user_nul(buf, count);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
tmp = strchr(kbuf, ' ');
if (!tmp) {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b5d86426e97b..5f1beb8367ac 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -57,6 +57,7 @@
#include <asm/nvram.h>
#include <asm/pmc.h>
#include <asm/xics.h>
+#include <asm/xive.h>
#include <asm/ppc-pci.h>
#include <asm/i8259.h>
#include <asm/udbg.h>
@@ -176,8 +177,11 @@ static void __init pseries_setup_i8259_cascade(void)
static void __init pseries_init_irq(void)
{
- xics_init();
- pseries_setup_i8259_cascade();
+ /* Try using a XIVE if available, otherwise use a XICS */
+ if (!xive_spapr_init()) {
+ xics_init();
+ pseries_setup_i8259_cascade();
+ }
}
static void pseries_lpar_enable_pmcs(void)
@@ -722,7 +726,6 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
- .panic = rtas_os_term,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 24785f63fb40..2e184829e5d4 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -41,6 +41,7 @@
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
#include <asm/xics.h>
+#include <asm/xive.h>
#include <asm/dbell.h>
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
@@ -136,7 +137,9 @@ out:
static void smp_setup_cpu(int cpu)
{
- if (cpu != boot_cpuid)
+ if (xive_enabled())
+ xive_smp_setup_cpu();
+ else if (cpu != boot_cpuid)
xics_setup_cpu();
if (firmware_has_feature(FW_FEATURE_SPLPAR))
@@ -181,6 +184,13 @@ static int smp_pSeries_kick_cpu(int nr)
return 0;
}
+static int pseries_smp_prepare_cpu(int cpu)
+{
+ if (xive_enabled())
+ return xive_smp_prepare_cpu(cpu);
+ return 0;
+}
+
static void smp_pseries_cause_ipi(int cpu)
{
/* POWER9 should not use this handler */
@@ -211,7 +221,7 @@ static int pseries_cause_nmi_ipi(int cpu)
return 0;
}
-static __init void pSeries_smp_probe(void)
+static __init void pSeries_smp_probe_xics(void)
{
xics_smp_probe();
@@ -221,11 +231,24 @@ static __init void pSeries_smp_probe(void)
smp_ops->cause_ipi = icp_ops->cause_ipi;
}
+static __init void pSeries_smp_probe(void)
+{
+ if (xive_enabled())
+ /*
+ * Don't use P9 doorbells when XIVE is enabled. IPIs
+ * using MMIOs should be faster
+ */
+ xive_smp_probe();
+ else
+ pSeries_smp_probe_xics();
+}
+
static struct smp_ops_t pseries_smp_ops = {
.message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
.cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */
.cause_nmi_ipi = pseries_cause_nmi_ipi,
.probe = pSeries_smp_probe,
+ .prepare_cpu = pseries_smp_prepare_cpu,
.kick_cpu = smp_pSeries_kick_cpu,
.setup_cpu = smp_setup_cpu,
.cpu_bootable = smp_generic_cpu_bootable,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 8a47f168476b..12277bc9fd9e 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1357,14 +1357,14 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
*/
parent_node = of_get_parent(of_node);
if (parent_node) {
- if (!strcmp(parent_node->full_name, "/ibm,platform-facilities"))
+ if (!strcmp(parent_node->type, "ibm,platform-facilities"))
family = PFO;
- else if (!strcmp(parent_node->full_name, "/vdevice"))
+ else if (!strcmp(parent_node->type, "vdevice"))
family = VDEVICE;
else {
- pr_warn("%s: parent(%s) of %s not recognized.\n",
+ pr_warn("%s: parent(%pOF) of %s not recognized.\n",
__func__,
- parent_node->full_name,
+ parent_node,
of_node_name);
of_node_put(parent_node);
return NULL;
@@ -1555,7 +1555,7 @@ static ssize_t devspec_show(struct device *dev,
{
struct device_node *of_node = dev->of_node;
- return sprintf(buf, "%s\n", of_node_full_name(of_node));
+ return sprintf(buf, "%pOF\n", of_node);
}
static DEVICE_ATTR_RO(devspec);
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index 3696ea6c4826..4aad9dd10ace 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -67,7 +67,7 @@ master:
mr %r16,%r3 /* save dt address in reg16 */
li %r4,20
LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */
- cmpwi %r0,%r6,2 /* v2 or later? */
+ cmpwi %cr0,%r6,2 /* v2 or later? */
blt 1f
li %r4,28
STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */
@@ -104,13 +104,13 @@ master:
.balign 8
.globl kernel
kernel:
- .llong 0x0
+ .8byte 0x0
.size kernel, . - kernel
.balign 8
.globl dt_offset
dt_offset:
- .llong 0x0
+ .8byte 0x0
.size dt_offset, . - dt_offset
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index c0ae11d4f62f..79416fa2e3ba 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -36,25 +36,15 @@ obj-$(CONFIG_AXON_RAM) += axonram.o
obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o
obj-$(CONFIG_PPC_I8259) += i8259.o
obj-$(CONFIG_IPIC) += ipic.o
-obj-$(CONFIG_4xx) += uic.o
-obj-$(CONFIG_PPC4xx_OCM) += ppc4xx_ocm.o
-obj-$(CONFIG_4xx_SOC) += ppc4xx_soc.o
obj-$(CONFIG_XILINX_VIRTEX) += xilinx_intc.o
obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o
obj-$(CONFIG_OF_RTC) += of_rtc.o
-ifeq ($(CONFIG_PCI),y)
-obj-$(CONFIG_4xx) += ppc4xx_pci.o
-endif
-obj-$(CONFIG_PPC4xx_HSTA_MSI) += ppc4xx_hsta_msi.o
-obj-$(CONFIG_PPC4xx_MSI) += ppc4xx_msi.o
-obj-$(CONFIG_PPC4xx_CPM) += ppc4xx_cpm.o
-obj-$(CONFIG_PPC4xx_GPIO) += ppc4xx_gpio.o
obj-$(CONFIG_CPM) += cpm_common.o
+obj-$(CONFIG_CPM1) += cpm1.o
obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o
obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o
obj-$(CONFIG_PPC_DCR) += dcr.o
-obj-$(CONFIG_8xx) += mpc8xx_pic.o cpm1.o
obj-$(CONFIG_UCODE_PATCH) += micropatch.o
obj-$(CONFIG_PPC_MPC512x) += mpc5xxx_clocks.o
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 2799706106c6..c60e84e4558d 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev)
static blk_qc_t
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
{
- struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
+ struct axon_ram_bank *bank = bio->bi_disk->private_data;
unsigned long phys_mem, phys_end;
void *user_mem;
struct bio_vec vec;
@@ -188,15 +188,12 @@ static int axon_ram_probe(struct platform_device *device)
axon_ram_bank_id++;
- dev_info(&device->dev, "Found memory controller on %s\n",
- device->dev.of_node->full_name);
+ dev_info(&device->dev, "Found memory controller on %pOF\n",
+ device->dev.of_node);
- bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
- if (bank == NULL) {
- dev_err(&device->dev, "Out of memory\n");
- rc = -ENOMEM;
- goto failed;
- }
+ bank = kzalloc(sizeof(*bank), GFP_KERNEL);
+ if (!bank)
+ return -ENOMEM;
device->dev.platform_data = bank;
@@ -292,25 +289,22 @@ static int axon_ram_probe(struct platform_device *device)
return 0;
failed:
- if (bank != NULL) {
- if (bank->irq_id)
- free_irq(bank->irq_id, device);
- if (bank->disk != NULL) {
- if (bank->disk->major > 0)
- unregister_blkdev(bank->disk->major,
- bank->disk->disk_name);
- if (bank->disk->flags & GENHD_FL_UP)
- del_gendisk(bank->disk);
- put_disk(bank->disk);
- }
- kill_dax(bank->dax_dev);
- put_dax(bank->dax_dev);
- device->dev.platform_data = NULL;
- if (bank->io_addr != 0)
- iounmap((void __iomem *) bank->io_addr);
- kfree(bank);
+ if (bank->irq_id)
+ free_irq(bank->irq_id, device);
+ if (bank->disk != NULL) {
+ if (bank->disk->major > 0)
+ unregister_blkdev(bank->disk->major,
+ bank->disk->disk_name);
+ if (bank->disk->flags & GENHD_FL_UP)
+ del_gendisk(bank->disk);
+ put_disk(bank->disk);
}
-
+ kill_dax(bank->dax_dev);
+ put_dax(bank->dax_dev);
+ device->dev.platform_data = NULL;
+ if (bank->io_addr != 0)
+ iounmap((void __iomem *) bank->io_addr);
+ kfree(bank);
return rc;
}
diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c
index 121e26fffd50..d72eda568b7d 100644
--- a/arch/powerpc/sysdev/dcr.c
+++ b/arch/powerpc/sysdev/dcr.c
@@ -195,8 +195,8 @@ dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
dcr_host_mmio_t ret = { .token = NULL, .stride = 0, .base = dcr_n };
u64 addr;
- pr_debug("dcr_map(%s, 0x%x, 0x%x)\n",
- dev->full_name, dcr_n, dcr_c);
+ pr_debug("dcr_map(%pOF, 0x%x, 0x%x)\n",
+ dev, dcr_n, dcr_c);
addr = of_translate_dcr_address(dev, dcr_n, &ret.stride);
pr_debug("translates to addr: 0x%llx, stride: 0x%x\n",
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 37a69097e022..00ccf3e4fcb4 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -101,8 +101,8 @@ int __init instantiate_cache_sram(struct platform_device *dev,
if (!request_mem_region(cache_sram->base_phys, cache_sram->size,
"fsl_85xx_cache_sram")) {
- dev_err(&dev->dev, "%s: request memory failed\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "%pOF: request memory failed\n",
+ dev->dev.of_node);
ret = -ENXIO;
goto out_free;
}
@@ -110,16 +110,16 @@ int __init instantiate_cache_sram(struct platform_device *dev,
cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
if (!cache_sram->base_virt) {
- dev_err(&dev->dev, "%s: ioremap_prot failed\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "%pOF: ioremap_prot failed\n",
+ dev->dev.of_node);
ret = -ENOMEM;
goto out_release;
}
cache_sram->rh = rh_create(sizeof(unsigned int));
if (IS_ERR(cache_sram->rh)) {
- dev_err(&dev->dev, "%s: Unable to create remote heap\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "%pOF: Unable to create remote heap\n",
+ dev->dev.of_node);
ret = PTR_ERR(cache_sram->rh);
goto out_unmap;
}
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index a6f0b96ce2c9..d902306f4718 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -388,8 +388,8 @@ static int __init fsl_gtm_init(void)
gtm = kzalloc(sizeof(*gtm), GFP_KERNEL);
if (!gtm) {
- pr_err("%s: unable to allocate memory\n",
- np->full_name);
+ pr_err("%pOF: unable to allocate memory\n",
+ np);
continue;
}
@@ -397,7 +397,7 @@ static int __init fsl_gtm_init(void)
clock = of_get_property(np, "clock-frequency", &size);
if (!clock || size != sizeof(*clock)) {
- pr_err("%s: no clock-frequency\n", np->full_name);
+ pr_err("%pOF: no clock-frequency\n", np);
goto err;
}
gtm->clock = *clock;
@@ -407,8 +407,8 @@ static int __init fsl_gtm_init(void)
irq = irq_of_parse_and_map(np, i);
if (!irq) {
- pr_err("%s: not enough interrupts specified\n",
- np->full_name);
+ pr_err("%pOF: not enough interrupts specified\n",
+ np);
goto err;
}
gtm->timers[i].irq = irq;
@@ -417,8 +417,8 @@ static int __init fsl_gtm_init(void)
gtm->regs = of_iomap(np, 0);
if (!gtm->regs) {
- pr_err("%s: unable to iomap registers\n",
- np->full_name);
+ pr_err("%pOF: unable to iomap registers\n",
+ np);
goto err;
}
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 8a244828782e..44cbf4c12ea1 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -214,8 +214,8 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
phandle = np->phandle;
else {
dev_err(&pdev->dev,
- "node %s has an invalid fsl,msi phandle %u\n",
- hose->dn->full_name, np->phandle);
+ "node %pOF has an invalid fsl,msi phandle %u\n",
+ hose->dn, np->phandle);
return -EINVAL;
}
}
@@ -438,16 +438,16 @@ static int fsl_of_msi_probe(struct platform_device *dev)
if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC) {
err = of_address_to_resource(dev->dev.of_node, 0, &res);
if (err) {
- dev_err(&dev->dev, "invalid resource for node %s\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "invalid resource for node %pOF\n",
+ dev->dev.of_node);
goto error_out;
}
msi->msi_regs = ioremap(res.start, resource_size(&res));
if (!msi->msi_regs) {
err = -ENOMEM;
- dev_err(&dev->dev, "could not map node %s\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "could not map node %pOF\n",
+ dev->dev.of_node);
goto error_out;
}
msi->msiir_offset =
@@ -522,8 +522,8 @@ static int fsl_of_msi_probe(struct platform_device *dev)
for (irq_index = 0, i = 0; i < len / (2 * sizeof(u32)); i++) {
if (p[i * 2] % IRQS_PER_MSI_REG ||
p[i * 2 + 1] % IRQS_PER_MSI_REG) {
- pr_warn("%s: %s: msi available range of %u at %u is not IRQ-aligned\n",
- __func__, dev->dev.of_node->full_name,
+ pr_warn("%s: %pOF: msi available range of %u at %u is not IRQ-aligned\n",
+ __func__, dev->dev.of_node,
p[i * 2 + 1], p[i * 2]);
err = -EINVAL;
goto error_out;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index d3a597456b6e..22d98057f773 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -202,7 +202,6 @@ static void setup_pci_atmu(struct pci_controller *hose)
u32 pcicsrbar = 0, pcicsrbar_sz;
u32 piwar = PIWAR_EN | PIWAR_PF | PIWAR_TGI_LOCAL |
PIWAR_READ_SNOOP | PIWAR_WRITE_SNOOP;
- const char *name = hose->dn->full_name;
const u64 *reg;
int len;
bool setup_inbound;
@@ -290,12 +289,12 @@ static void setup_pci_atmu(struct pci_controller *hose)
paddr_lo -= offset;
if (paddr_hi == paddr_lo) {
- pr_err("%s: No outbound window space\n", name);
+ pr_err("%pOF: No outbound window space\n", hose->dn);
return;
}
if (paddr_lo == 0) {
- pr_err("%s: No space for inbound window\n", name);
+ pr_err("%pOF: No space for inbound window\n", hose->dn);
return;
}
@@ -313,7 +312,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
paddr_lo = min(paddr_lo, (u64)pcicsrbar);
- pr_info("%s: PCICSRBAR @ 0x%x\n", name, pcicsrbar);
+ pr_info("%pOF: PCICSRBAR @ 0x%x\n", hose->dn, pcicsrbar);
/* Setup inbound mem window */
mem = memblock_end_of_DRAM();
@@ -336,12 +335,12 @@ static void setup_pci_atmu(struct pci_controller *hose)
u64 address = be64_to_cpup(reg);
if ((address >= mem) && (address < (mem + PAGE_SIZE))) {
- pr_info("%s: extending DDR ATMU to cover MSIIR", name);
+ pr_info("%pOF: extending DDR ATMU to cover MSIIR", hose->dn);
mem += PAGE_SIZE;
} else {
/* TODO: Create a new ATMU for MSIIR */
- pr_warn("%s: msi-address-64 address of %llx is "
- "unsupported\n", name, address);
+ pr_warn("%pOF: msi-address-64 address of %llx is "
+ "unsupported\n", hose->dn, address);
}
}
@@ -354,8 +353,8 @@ static void setup_pci_atmu(struct pci_controller *hose)
if ((1ull << mem_log) != mem) {
mem_log++;
if ((1ull << mem_log) > mem)
- pr_info("%s: Setting PCI inbound window "
- "greater than memory size\n", name);
+ pr_info("%pOF: Setting PCI inbound window "
+ "greater than memory size\n", hose->dn);
}
piwar |= ((mem_log - 1) & PIWAR_SZ_MASK);
@@ -402,7 +401,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
*/
ppc_md.dma_set_mask = fsl_pci_dma_set_mask;
- pr_info("%s: Setup 64-bit PCI DMA window\n", name);
+ pr_info("%pOF: Setup 64-bit PCI DMA window\n", hose->dn);
}
} else {
u64 paddr = 0;
@@ -443,18 +442,18 @@ static void setup_pci_atmu(struct pci_controller *hose)
#ifdef CONFIG_SWIOTLB
ppc_swiotlb_enable = 1;
#else
- pr_err("%s: ERROR: Memory size exceeds PCI ATMU ability to "
+ pr_err("%pOF: ERROR: Memory size exceeds PCI ATMU ability to "
"map - enable CONFIG_SWIOTLB to avoid dma errors.\n",
- name);
+ hose->dn);
#endif
/* adjusting outbound windows could reclaim space in mem map */
if (paddr_hi < 0xffffffffull)
- pr_warning("%s: WARNING: Outbound window cfg leaves "
+ pr_warning("%pOF: WARNING: Outbound window cfg leaves "
"gaps in memory map. Adjusting the memory map "
"could reduce unnecessary bounce buffering.\n",
- name);
+ hose->dn);
- pr_info("%s: DMA window size is 0x%llx\n", name,
+ pr_info("%pOF: DMA window size is 0x%llx\n", hose->dn,
(u64)hose->dma_window_size);
}
}
@@ -532,11 +531,11 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
dev = pdev->dev.of_node;
if (!of_device_is_available(dev)) {
- pr_warning("%s: disabled\n", dev->full_name);
+ pr_warning("%pOF: disabled\n", dev);
return -ENODEV;
}
- pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+ pr_debug("Adding PCI host bridge %pOF\n", dev);
/* Fetch host bridge registers address */
if (of_address_to_resource(dev, 0, &rsrc)) {
@@ -547,8 +546,8 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int))
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
pci_add_flags(PCI_REASSIGN_ALL_BUS);
hose = pcibios_alloc_controller(dev);
@@ -809,11 +808,11 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
is_mpc83xx_pci = 1;
if (!of_device_is_available(dev)) {
- pr_warning("%s: disabled by the firmware.\n",
- dev->full_name);
+ pr_warning("%pOF: disabled by the firmware.\n",
+ dev);
return -ENODEV;
}
- pr_debug("Adding PCI host bridge %s\n", dev->full_name);
+ pr_debug("Adding PCI host bridge %pOF\n", dev);
/* Fetch host bridge registers address */
if (of_address_to_resource(dev, 0, &rsrc_reg)) {
@@ -848,8 +847,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
}
pci_add_flags(PCI_REASSIGN_ALL_BUS);
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 1c41c51f22cb..9234be1e66f5 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -450,12 +450,12 @@ int fsl_rio_setup(struct platform_device *dev)
rc = of_address_to_resource(dev->dev.of_node, 0, &regs);
if (rc) {
- dev_err(&dev->dev, "Can't get %s property 'reg'\n",
- dev->dev.of_node->full_name);
+ dev_err(&dev->dev, "Can't get %pOF property 'reg'\n",
+ dev->dev.of_node);
return -EFAULT;
}
- dev_info(&dev->dev, "Of-device full name %s\n",
- dev->dev.of_node->full_name);
+ dev_info(&dev->dev, "Of-device full name %pOF\n",
+ dev->dev.of_node);
dev_info(&dev->dev, "Regs: %pR\n", &regs);
rio_regs_win = ioremap(regs.start, resource_size(&regs));
@@ -494,8 +494,8 @@ int fsl_rio_setup(struct platform_device *dev)
}
rc = of_address_to_resource(rmu_node, 0, &rmu_regs);
if (rc) {
- dev_err(&dev->dev, "Can't get %s property 'reg'\n",
- rmu_node->full_name);
+ dev_err(&dev->dev, "Can't get %pOF property 'reg'\n",
+ rmu_node);
goto err_rmu;
}
rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs));
@@ -529,8 +529,8 @@ int fsl_rio_setup(struct platform_device *dev)
aw = of_n_addr_cells(np);
dt_range = of_get_property(np, "reg", &rlen);
if (!dt_range) {
- pr_err("%s: unable to find 'reg' property\n",
- np->full_name);
+ pr_err("%pOF: unable to find 'reg' property\n",
+ np);
rc = -ENOMEM;
goto err_pw;
}
@@ -557,8 +557,8 @@ int fsl_rio_setup(struct platform_device *dev)
aw = of_n_addr_cells(np);
dt_range = of_get_property(np, "reg", &rlen);
if (!dt_range) {
- pr_err("%s: unable to find 'reg' property\n",
- np->full_name);
+ pr_err("%pOF: unable to find 'reg' property\n",
+ np);
rc = -ENOMEM;
goto err;
}
@@ -569,15 +569,15 @@ int fsl_rio_setup(struct platform_device *dev)
for_each_child_of_node(dev->dev.of_node, np) {
port_index = of_get_property(np, "cell-index", NULL);
if (!port_index) {
- dev_err(&dev->dev, "Can't get %s property 'cell-index'\n",
- np->full_name);
+ dev_err(&dev->dev, "Can't get %pOF property 'cell-index'\n",
+ np);
continue;
}
dt_range = of_get_property(np, "ranges", &rlen);
if (!dt_range) {
- dev_err(&dev->dev, "Can't get %s property 'ranges'\n",
- np->full_name);
+ dev_err(&dev->dev, "Can't get %pOF property 'ranges'\n",
+ np);
continue;
}
@@ -598,8 +598,8 @@ int fsl_rio_setup(struct platform_device *dev)
range_start = of_read_number(dt_range + aw, paw);
range_size = of_read_number(dt_range + aw + paw, sw);
- dev_info(&dev->dev, "%s: LAW start 0x%016llx, size 0x%016llx.\n",
- np->full_name, range_start, range_size);
+ dev_info(&dev->dev, "%pOF: LAW start 0x%016llx, size 0x%016llx.\n",
+ np, range_start, range_size);
port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
if (!port)
@@ -757,8 +757,8 @@ err_rio_regs:
*/
static int fsl_of_rio_rpn_probe(struct platform_device *dev)
{
- printk(KERN_INFO "Setting up RapidIO peer-to-peer network %s\n",
- dev->dev.of_node->full_name);
+ printk(KERN_INFO "Setting up RapidIO peer-to-peer network %pOF\n",
+ dev->dev.of_node);
return fsl_rio_setup(dev);
};
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index c1826de4e749..ab7a74c75be8 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -1074,8 +1074,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
priv = mport->priv;
if (!node) {
- dev_warn(priv->dev, "Can't get %s property 'fsl,rmu'\n",
- priv->dev->of_node->full_name);
+ dev_warn(priv->dev, "Can't get %pOF property 'fsl,rmu'\n",
+ priv->dev->of_node);
return -EINVAL;
}
@@ -1086,8 +1086,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
aw = of_n_addr_cells(node);
msg_addr = of_get_property(node, "reg", &mlen);
if (!msg_addr) {
- pr_err("%s: unable to find 'reg' property of message-unit\n",
- node->full_name);
+ pr_err("%pOF: unable to find 'reg' property of message-unit\n",
+ node);
kfree(rmu);
return -ENOMEM;
}
@@ -1098,8 +1098,8 @@ int fsl_rio_setup_rmu(struct rio_mport *mport, struct device_node *node)
rmu->txirq = irq_of_parse_and_map(node, 0);
rmu->rxirq = irq_of_parse_and_map(node, 1);
- printk(KERN_INFO "%s: txirq: %d, rxirq %d\n",
- node->full_name, rmu->txirq, rmu->rxirq);
+ printk(KERN_INFO "%pOF: txirq: %d, rxirq %d\n",
+ node, rmu->txirq, rmu->rxirq);
priv->rmm_handle = rmu;
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 19101f9cfcfc..1f614fb2be56 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -98,7 +98,7 @@ u32 fsl_get_sys_freq(void)
}
EXPORT_SYMBOL(fsl_get_sys_freq);
-#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
u32 get_brgfreq(void)
{
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index d73daa4f0ccf..2640446f8bc4 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -7,7 +7,7 @@
struct spi_device;
extern phys_addr_t get_immrbase(void);
-#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx)
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
extern u32 get_brgfreq(void);
extern u32 get_baudrate(void);
#else
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index f267ee0afc08..16f1edd78c40 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -315,6 +315,7 @@ static struct ipic_info ipic_info[] = {
.prio_mask = 7,
},
[48] = {
+ .ack = IPIC_SEPNR,
.mask = IPIC_SEMSR,
.prio = IPIC_SMPRR_A,
.force = IPIC_SEFCR,
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b9aac951a90f..ead3e2549ebf 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1650,8 +1650,8 @@ void __init mpic_init(struct mpic *mpic)
if (mpic->flags & MPIC_SECONDARY) {
int virq = irq_of_parse_and_map(mpic->node, 0);
if (virq) {
- printk(KERN_INFO "%s: hooking up to IRQ %d\n",
- mpic->node->full_name, virq);
+ printk(KERN_INFO "%pOF: hooking up to IRQ %d\n",
+ mpic->node, virq);
irq_set_handler_data(virq, mpic);
irq_set_chained_handler(virq, &mpic_cascade);
}
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index db2286be5d9a..eb69a5186243 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -192,7 +192,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
return -ENOMEM;
}
}
- dev_info(&dev->dev, "Of-device full name %s\n", np->full_name);
+ dev_info(&dev->dev, "Of-device full name %pOF\n", np);
/* IO map the message register block. */
of_address_to_resource(np, 0, &rsrc);
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index 1d48a5385905..9ed860aee9c3 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -60,7 +60,7 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic)
np = NULL;
while ((np = of_find_all_nodes(np))) {
- pr_debug("mpic: mapping hwirqs for %s\n", np->full_name);
+ pr_debug("mpic: mapping hwirqs for %pOF\n", np);
index = 0;
while (of_irq_parse_one(np, index++, &oirq) == 0) {
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index 9d9b06217f8b..a418579591be 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -466,8 +466,7 @@ static int timer_group_get_irq(struct device_node *np,
p = of_get_property(np, "fsl,available-ranges", &len);
if (p && len % (2 * sizeof(u32)) != 0) {
- pr_err("%s: malformed available-ranges property.\n",
- np->full_name);
+ pr_err("%pOF: malformed available-ranges property.\n", np);
return -EINVAL;
}
@@ -484,8 +483,7 @@ static int timer_group_get_irq(struct device_node *np,
for (j = 0; j < count; j++) {
irq = irq_of_parse_and_map(np, irq_index);
if (!irq) {
- pr_err("%s: irq parse and map failed.\n",
- np->full_name);
+ pr_err("%pOF: irq parse and map failed.\n", np);
return -EINVAL;
}
@@ -508,8 +506,7 @@ static void timer_group_init(struct device_node *np)
priv = kzalloc(sizeof(struct timer_group_priv), GFP_KERNEL);
if (!priv) {
- pr_err("%s: cannot allocate memory for group.\n",
- np->full_name);
+ pr_err("%pOF: cannot allocate memory for group.\n", np);
return;
}
@@ -518,29 +515,27 @@ static void timer_group_init(struct device_node *np)
priv->regs = of_iomap(np, i++);
if (!priv->regs) {
- pr_err("%s: cannot ioremap timer register address.\n",
- np->full_name);
+ pr_err("%pOF: cannot ioremap timer register address.\n", np);
goto out;
}
if (priv->flags & FSL_GLOBAL_TIMER) {
priv->group_tcr = of_iomap(np, i++);
if (!priv->group_tcr) {
- pr_err("%s: cannot ioremap tcr address.\n",
- np->full_name);
+ pr_err("%pOF: cannot ioremap tcr address.\n", np);
goto out;
}
}
ret = timer_group_get_freq(np, priv);
if (ret < 0) {
- pr_err("%s: cannot get timer frequency.\n", np->full_name);
+ pr_err("%pOF: cannot get timer frequency.\n", np);
goto out;
}
ret = timer_group_get_irq(np, priv);
if (ret < 0) {
- pr_err("%s: cannot get timer irqs.\n", np->full_name);
+ pr_err("%pOF: cannot get timer irqs.\n", np);
goto out;
}
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 5ebd3f018295..c4dae27172b3 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -86,13 +86,13 @@ int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp)
p = of_get_property(bmp->of_node, "msi-available-ranges", &len);
if (!p) {
pr_debug("msi_bitmap: no msi-available-ranges property " \
- "found on %s\n", bmp->of_node->full_name);
+ "found on %pOF\n", bmp->of_node);
return 1;
}
if (len % (2 * sizeof(u32)) != 0) {
printk(KERN_WARNING "msi_bitmap: Malformed msi-available-ranges"
- " property on %s\n", bmp->of_node->full_name);
+ " property on %pOF\n", bmp->of_node);
return -EINVAL;
}
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 026bbc3b2c47..185a67e742a6 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -452,8 +452,8 @@ static int __init mv64x60_device_setup(void)
err = mv64x60_mpsc_device_setup(np, id++);
if (err)
printk(KERN_ERR "Failed to initialize MV64x60 "
- "serial device %s: error %d.\n",
- np->full_name, err);
+ "serial device %pOF: error %d.\n",
+ np, err);
}
id = 0;
@@ -463,8 +463,8 @@ static int __init mv64x60_device_setup(void)
if (IS_ERR(pdev)) {
err = PTR_ERR(pdev);
printk(KERN_ERR "Failed to initialize MV64x60 "
- "network block %s: error %d.\n",
- np->full_name, err);
+ "network block %pOF: error %d.\n",
+ np, err);
continue;
}
for_each_child_of_node(np, np2) {
@@ -474,9 +474,9 @@ static int __init mv64x60_device_setup(void)
err = mv64x60_eth_device_setup(np2, id2++, pdev);
if (err)
printk(KERN_ERR "Failed to initialize "
- "MV64x60 network device %s: "
+ "MV64x60 network device %pOF: "
"error %d.\n",
- np2->full_name, err);
+ np2, err);
}
}
@@ -485,8 +485,8 @@ static int __init mv64x60_device_setup(void)
err = mv64x60_i2c_device_setup(np, id++);
if (err)
printk(KERN_ERR "Failed to initialize MV64x60 I2C "
- "bus %s: error %d.\n",
- np->full_name, err);
+ "bus %pOF: error %d.\n",
+ np, err);
}
/* support up to one watchdog timer */
@@ -494,8 +494,8 @@ static int __init mv64x60_device_setup(void)
if (np) {
if ((err = mv64x60_wdt_device_setup(np, id)))
printk(KERN_ERR "Failed to initialize MV64x60 "
- "Watchdog %s: error %d.\n",
- np->full_name, err);
+ "Watchdog %pOF: error %d.\n",
+ np, err);
of_node_put(np);
}
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index 330d56613c5a..d52b3b81e05f 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -70,7 +70,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
return count;
}
-static struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
+static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */
.attr = {
.name = "hs_reg",
.mode = S_IRUGO | S_IWUSR,
@@ -136,8 +136,8 @@ static int __init mv64x60_add_bridge(struct device_node *dev)
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int))
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
hose = pcibios_alloc_controller(dev);
if (!hose)
diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c
index 6f54b54b1328..153fdac4720f 100644
--- a/arch/powerpc/sysdev/of_rtc.c
+++ b/arch/powerpc/sysdev/of_rtc.c
@@ -38,21 +38,21 @@ void __init of_instantiate_rtc(void)
res = kmalloc(sizeof(*res), GFP_KERNEL);
if (!res) {
printk(KERN_ERR "OF RTC: Out of memory "
- "allocating resource structure for %s\n",
- node->full_name);
+ "allocating resource structure for %pOF\n",
+ node);
continue;
}
err = of_address_to_resource(node, 0, res);
if (err) {
printk(KERN_ERR "OF RTC: Error "
- "translating resources for %s\n",
- node->full_name);
+ "translating resources for %pOF\n",
+ node);
continue;
}
- printk(KERN_INFO "OF_RTC: %s is a %s @ 0x%llx-0x%llx\n",
- node->full_name, plat_name,
+ printk(KERN_INFO "OF_RTC: %pOF is a %s @ 0x%llx-0x%llx\n",
+ node, plat_name,
(unsigned long long)res->start,
(unsigned long long)res->end);
platform_device_register_simple(plat_name, -1, res, 1);
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
index 76ea32c1b664..0f6fd5d04d33 100644
--- a/arch/powerpc/sysdev/scom.c
+++ b/arch/powerpc/sysdev/scom.c
@@ -194,12 +194,13 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
ent->dn = of_node_get(dn);
snprintf(ent->name, 16, "%08x", i);
- ent->path.data = (void*) dn->full_name;
- ent->path.size = strlen(dn->full_name);
+ ent->path.data = (void*)kasprintf(GFP_KERNEL, "%pOF", dn);
+ ent->path.size = strlen((char *)ent->path.data);
dir = debugfs_create_dir(ent->name, root);
if (!dir) {
of_node_put(dn);
+ kfree(ent->path.data);
kfree(ent);
return -1;
}
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index 6afddae2fb47..f02d4576138c 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -142,7 +142,6 @@ void __init simple_gpiochip_init(const char *compatible)
}
continue;
err:
- pr_err("%s: registration failed, status %d\n",
- np->full_name, ret);
+ pr_err("%pOF: registration failed, status %d\n", np, ret);
}
}
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 5692dd569b9b..28ff1f53cefc 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -213,8 +213,8 @@ int __init tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary)
/* Get bus range if any */
bus_range = of_get_property(dev, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
}
hose = pcibios_alloc_controller(dev);
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 12ccd7373d2f..3e3e25b5e30d 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -9,3 +9,8 @@ config PPC_XIVE_NATIVE
default n
select PPC_XIVE
depends on PPC_POWERNV
+
+config PPC_XIVE_SPAPR
+ bool
+ default n
+ select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile
index 3fab303fc169..536d6e5706e3 100644
--- a/arch/powerpc/sysdev/xive/Makefile
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -2,3 +2,4 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-y += common.o
obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o
+obj-$(CONFIG_PPC_XIVE_SPAPR) += spapr.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 6595462b1fc8..f387318678b9 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -40,7 +40,8 @@
#undef DEBUG_ALL
#ifdef DEBUG_ALL
-#define DBG_VERBOSE(fmt...) pr_devel(fmt)
+#define DBG_VERBOSE(fmt, ...) pr_devel("cpu %d - " fmt, \
+ smp_processor_id(), ## __VA_ARGS__)
#else
#define DBG_VERBOSE(fmt...) do { } while(0)
#endif
@@ -190,7 +191,7 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
* This is used to perform the magic loads from an ESB
* described in xive.h
*/
-static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
+static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
{
u64 val;
@@ -198,13 +199,28 @@ static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset)
if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
offset |= offset << 4;
- val = in_be64(xd->eoi_mmio + offset);
+ if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+ val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
+ else
+ val = in_be64(xd->eoi_mmio + offset);
return (u8)val;
}
+static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
+{
+ /* Handle HW errata */
+ if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+ offset |= offset << 4;
+
+ if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
+ xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
+ else
+ out_be64(xd->eoi_mmio + offset, data);
+}
+
#ifdef CONFIG_XMON
-static void xive_dump_eq(const char *name, struct xive_q *q)
+static notrace void xive_dump_eq(const char *name, struct xive_q *q)
{
u32 i0, i1, idx;
@@ -218,7 +234,7 @@ static void xive_dump_eq(const char *name, struct xive_q *q)
q->toggle, i0, i1);
}
-void xmon_xive_do_dump(int cpu)
+notrace void xmon_xive_do_dump(int cpu)
{
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
@@ -227,7 +243,7 @@ void xmon_xive_do_dump(int cpu)
xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]);
#ifdef CONFIG_SMP
{
- u64 val = xive_poke_esb(&xc->ipi_data, XIVE_ESB_GET);
+ u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi,
val & XIVE_ESB_VAL_P ? 'P' : 'p',
val & XIVE_ESB_VAL_P ? 'Q' : 'q');
@@ -297,7 +313,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
- out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
+ xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
/*
* The FW told us to call it. This happens for some
@@ -326,10 +342,10 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
* properly.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
- in_be64(xd->eoi_mmio);
+ xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
else {
- eoi_val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
- DBG_VERBOSE("eoi_val=%x\n", offset, eoi_val);
+ eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+ DBG_VERBOSE("eoi_val=%x\n", eoi_val);
/* Re-trigger if needed */
if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
@@ -383,12 +399,12 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
* ESB accordingly on unmask.
*/
if (mask) {
- val = xive_poke_esb(xd, XIVE_ESB_SET_PQ_01);
+ val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
xd->saved_p = !!(val & XIVE_ESB_VAL_P);
} else if (xd->saved_p)
- xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
else
- xive_poke_esb(xd, XIVE_ESB_SET_PQ_00);
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
}
/*
@@ -447,7 +463,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
int cpu, first, num, i;
/* Pick up a starting point CPU in the mask based on fuzz */
- num = cpumask_weight(mask);
+ num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
first = fuzz % num;
/* Locate it */
@@ -672,6 +688,10 @@ static int xive_irq_set_affinity(struct irq_data *d,
if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
return -EINVAL;
+ /* Don't do anything if the interrupt isn't started */
+ if (!irqd_is_started(d))
+ return IRQ_SET_MASK_OK;
+
/*
* If existing target is already in the new mask, and is
* online then do nothing.
@@ -768,7 +788,7 @@ static int xive_irq_retrigger(struct irq_data *d)
* To perform a retrigger, we first set the PQ bits to
* 11, then perform an EOI.
*/
- xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Note: We pass "0" to the hw_irq argument in order to
@@ -803,7 +823,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
irqd_set_forwarded_to_vcpu(d);
/* Set it to PQ=10 state to prevent further sends */
- pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
+ pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
@@ -832,7 +852,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
* for sure the queue slot is no longer in use.
*/
if (pq & 2) {
- pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
+ pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
xd->saved_p = true;
/*
@@ -989,6 +1009,9 @@ static void xive_ipi_eoi(struct irq_data *d)
{
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+ DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+ d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
/* Handle possible race with unplug and drop stale IPIs */
if (!xc)
return;
@@ -1368,6 +1391,19 @@ void xive_flush_interrupt(void)
#endif /* CONFIG_SMP */
+void xive_teardown_cpu(void)
+{
+ struct xive_cpu *xc = __this_cpu_read(xive_cpu);
+ unsigned int cpu = smp_processor_id();
+
+ /* Set CPPR to 0 to disable flow of interrupts */
+ xc->cppr = 0;
+ out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
+
+ if (xive_ops->teardown_cpu)
+ xive_ops->teardown_cpu(cpu, xc);
+}
+
void xive_kexec_teardown_cpu(int secondary)
{
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
@@ -1395,8 +1431,8 @@ void xive_shutdown(void)
xive_ops->shutdown();
}
-bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
- u8 max_prio)
+bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
+ u8 max_prio)
{
xive_tima = area;
xive_tima_offset = offset;
@@ -1424,6 +1460,22 @@ bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
return true;
}
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
+{
+ unsigned int alloc_order;
+ struct page *pages;
+ __be32 *qpage;
+
+ alloc_order = xive_alloc_order(queue_shift);
+ pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+ qpage = (__be32 *)page_address(pages);
+ memset(qpage, 0, 1 << queue_shift);
+
+ return qpage;
+}
+
static int __init xive_off(char *arg)
{
xive_cmdline_disabled = true;
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 0f95476b01f6..ebc244b08d67 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -82,6 +82,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
return -ENOMEM;
}
+ data->hw_irq = hw_irq;
+
if (!data->trig_page)
return 0;
if (data->trig_page == data->eoi_page) {
@@ -202,17 +204,12 @@ EXPORT_SYMBOL_GPL(xive_native_disable_queue);
static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
{
struct xive_q *q = &xc->queue[prio];
- unsigned int alloc_order;
- struct page *pages;
__be32 *qpage;
- alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
- (xive_queue_shift - PAGE_SHIFT) : 0;
- pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
- if (!pages)
- return -ENOMEM;
- qpage = (__be32 *)page_address(pages);
- memset(qpage, 0, 1 << xive_queue_shift);
+ qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+ if (IS_ERR(qpage))
+ return PTR_ERR(qpage);
+
return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
q, prio, qpage, xive_queue_shift, false);
}
@@ -227,8 +224,7 @@ static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8
* from an IPI and iounmap isn't safe
*/
__xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
- alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
- (xive_queue_shift - PAGE_SHIFT) : 0;
+ alloc_order = xive_alloc_order(xive_queue_shift);
free_pages((unsigned long)q->qpage, alloc_order);
q->qpage = NULL;
}
@@ -515,13 +511,13 @@ static bool xive_parse_provisioning(struct device_node *np)
static void xive_native_setup_pools(void)
{
/* Allocate a pool big enough */
- pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
+ pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids);
xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
- pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
+ pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n",
xive_pool_vps, nr_cpu_ids);
}
@@ -531,7 +527,7 @@ u32 xive_native_default_eq_shift(void)
}
EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
-bool xive_native_init(void)
+bool __init xive_native_init(void)
{
struct device_node *np;
struct resource r;
@@ -551,7 +547,7 @@ bool xive_native_init(void)
pr_devel("not found !\n");
return false;
}
- pr_devel("Found %s\n", np->full_name);
+ pr_devel("Found %pOF\n", np);
/* Resource 1 is HV window */
if (of_address_to_resource(np, 1, &r)) {
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
new file mode 100644
index 000000000000..f24a70bc6855
--- /dev/null
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "xive: " fmt
+
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+#include <linux/mm.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/hvcall.h>
+
+#include "xive-internal.h"
+
+static u32 xive_queue_shift;
+
+struct xive_irq_bitmap {
+ unsigned long *bitmap;
+ unsigned int base;
+ unsigned int count;
+ spinlock_t lock;
+ struct list_head list;
+};
+
+static LIST_HEAD(xive_irq_bitmaps);
+
+static int xive_irq_bitmap_add(int base, int count)
+{
+ struct xive_irq_bitmap *xibm;
+
+ xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC);
+ if (!xibm)
+ return -ENOMEM;
+
+ spin_lock_init(&xibm->lock);
+ xibm->base = base;
+ xibm->count = count;
+ xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+ list_add(&xibm->list, &xive_irq_bitmaps);
+
+ pr_info("Using IRQ range [%x-%x]", xibm->base,
+ xibm->base + xibm->count - 1);
+ return 0;
+}
+
+static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm)
+{
+ int irq;
+
+ irq = find_first_zero_bit(xibm->bitmap, xibm->count);
+ if (irq != xibm->count) {
+ set_bit(irq, xibm->bitmap);
+ irq += xibm->base;
+ } else {
+ irq = -ENOMEM;
+ }
+
+ return irq;
+}
+
+static int xive_irq_bitmap_alloc(void)
+{
+ struct xive_irq_bitmap *xibm;
+ unsigned long flags;
+ int irq = -ENOENT;
+
+ list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+ spin_lock_irqsave(&xibm->lock, flags);
+ irq = __xive_irq_bitmap_alloc(xibm);
+ spin_unlock_irqrestore(&xibm->lock, flags);
+ if (irq >= 0)
+ break;
+ }
+ return irq;
+}
+
+static void xive_irq_bitmap_free(int irq)
+{
+ unsigned long flags;
+ struct xive_irq_bitmap *xibm;
+
+ list_for_each_entry(xibm, &xive_irq_bitmaps, list) {
+ if ((irq >= xibm->base) && (irq < xibm->base + xibm->count)) {
+ spin_lock_irqsave(&xibm->lock, flags);
+ clear_bit(irq - xibm->base, xibm->bitmap);
+ spin_unlock_irqrestore(&xibm->lock, flags);
+ break;
+ }
+ }
+}
+
+static long plpar_int_get_source_info(unsigned long flags,
+ unsigned long lisn,
+ unsigned long *src_flags,
+ unsigned long *eoi_page,
+ unsigned long *trig_page,
+ unsigned long *esb_shift)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_INT_GET_SOURCE_INFO, retbuf, flags, lisn);
+ if (rc) {
+ pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc);
+ return rc;
+ }
+
+ *src_flags = retbuf[0];
+ *eoi_page = retbuf[1];
+ *trig_page = retbuf[2];
+ *esb_shift = retbuf[3];
+
+ pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n",
+ retbuf[0], retbuf[1], retbuf[2], retbuf[3]);
+
+ return 0;
+}
+
+#define XIVE_SRC_SET_EISN (1ull << (63 - 62))
+#define XIVE_SRC_MASK (1ull << (63 - 63)) /* unused */
+
+static long plpar_int_set_source_config(unsigned long flags,
+ unsigned long lisn,
+ unsigned long target,
+ unsigned long prio,
+ unsigned long sw_irq)
+{
+ long rc;
+
+
+ pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n",
+ flags, lisn, target, prio, sw_irq);
+
+
+ rc = plpar_hcall_norets(H_INT_SET_SOURCE_CONFIG, flags, lisn,
+ target, prio, sw_irq);
+ if (rc) {
+ pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n",
+ lisn, target, prio, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+static long plpar_int_get_queue_info(unsigned long flags,
+ unsigned long target,
+ unsigned long priority,
+ unsigned long *esn_page,
+ unsigned long *esn_size)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_INT_GET_QUEUE_INFO, retbuf, flags, target, priority);
+ if (rc) {
+ pr_err("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld failed %ld\n",
+ target, priority, rc);
+ return rc;
+ }
+
+ *esn_page = retbuf[0];
+ *esn_size = retbuf[1];
+
+ pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n",
+ retbuf[0], retbuf[1]);
+
+ return 0;
+}
+
+#define XIVE_EQ_ALWAYS_NOTIFY (1ull << (63 - 63))
+
+static long plpar_int_set_queue_config(unsigned long flags,
+ unsigned long target,
+ unsigned long priority,
+ unsigned long qpage,
+ unsigned long qsize)
+{
+ long rc;
+
+ pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n",
+ flags, target, priority, qpage, qsize);
+
+ rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target,
+ priority, qpage, qsize);
+ if (rc) {
+ pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n",
+ target, priority, qpage, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+static long plpar_int_sync(unsigned long flags, unsigned long lisn)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_INT_SYNC, flags, lisn);
+ if (rc) {
+ pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+#define XIVE_ESB_FLAG_STORE (1ull << (63 - 63))
+
+static long plpar_int_esb(unsigned long flags,
+ unsigned long lisn,
+ unsigned long offset,
+ unsigned long in_data,
+ unsigned long *out_data)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n",
+ flags, lisn, offset, in_data);
+
+ rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, in_data);
+ if (rc) {
+ pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n",
+ lisn, offset, rc);
+ return rc;
+ }
+
+ *out_data = retbuf[0];
+
+ return 0;
+}
+
+static u64 xive_spapr_esb_rw(u32 lisn, u32 offset, u64 data, bool write)
+{
+ unsigned long read_data;
+ long rc;
+
+ rc = plpar_int_esb(write ? XIVE_ESB_FLAG_STORE : 0,
+ lisn, offset, data, &read_data);
+ if (rc)
+ return -1;
+
+ return write ? 0 : read_data;
+}
+
+#define XIVE_SRC_H_INT_ESB (1ull << (63 - 60))
+#define XIVE_SRC_LSI (1ull << (63 - 61))
+#define XIVE_SRC_TRIGGER (1ull << (63 - 62))
+#define XIVE_SRC_STORE_EOI (1ull << (63 - 63))
+
+static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
+{
+ long rc;
+ unsigned long flags;
+ unsigned long eoi_page;
+ unsigned long trig_page;
+ unsigned long esb_shift;
+
+ memset(data, 0, sizeof(*data));
+
+ rc = plpar_int_get_source_info(0, hw_irq, &flags, &eoi_page, &trig_page,
+ &esb_shift);
+ if (rc)
+ return -EINVAL;
+
+ if (flags & XIVE_SRC_H_INT_ESB)
+ data->flags |= XIVE_IRQ_FLAG_H_INT_ESB;
+ if (flags & XIVE_SRC_STORE_EOI)
+ data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
+ if (flags & XIVE_SRC_LSI)
+ data->flags |= XIVE_IRQ_FLAG_LSI;
+ data->eoi_page = eoi_page;
+ data->esb_shift = esb_shift;
+ data->trig_page = trig_page;
+
+ /*
+ * No chip-id for the sPAPR backend. This has an impact how we
+ * pick a target. See xive_pick_irq_target().
+ */
+ data->src_chip = XIVE_INVALID_CHIP_ID;
+
+ data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
+ if (!data->eoi_mmio) {
+ pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
+ return -ENOMEM;
+ }
+
+ data->hw_irq = hw_irq;
+
+ /* Full function page supports trigger */
+ if (flags & XIVE_SRC_TRIGGER) {
+ data->trig_mmio = data->eoi_mmio;
+ return 0;
+ }
+
+ data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
+ if (!data->trig_mmio) {
+ pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
+{
+ long rc;
+
+ rc = plpar_int_set_source_config(XIVE_SRC_SET_EISN, hw_irq, target,
+ prio, sw_irq);
+
+ return rc == 0 ? 0 : -ENXIO;
+}
+
+/* This can be called multiple time to change a queue configuration */
+static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
+ __be32 *qpage, u32 order)
+{
+ s64 rc = 0;
+ unsigned long esn_page;
+ unsigned long esn_size;
+ u64 flags, qpage_phys;
+
+ /* If there's an actual queue page, clean it */
+ if (order) {
+ if (WARN_ON(!qpage))
+ return -EINVAL;
+ qpage_phys = __pa(qpage);
+ } else {
+ qpage_phys = 0;
+ }
+
+ /* Initialize the rest of the fields */
+ q->msk = order ? ((1u << (order - 2)) - 1) : 0;
+ q->idx = 0;
+ q->toggle = 0;
+
+ rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
+ if (rc) {
+ pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+ rc = -EIO;
+ goto fail;
+ }
+
+ /* TODO: add support for the notification page */
+ q->eoi_phys = esn_page;
+
+ /* Default is to always notify */
+ flags = XIVE_EQ_ALWAYS_NOTIFY;
+
+ /* Configure and enable the queue in HW */
+ rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
+ if (rc) {
+ pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+ rc = -EIO;
+ } else {
+ q->qpage = qpage;
+ }
+fail:
+ return rc;
+}
+
+static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
+ u8 prio)
+{
+ struct xive_q *q = &xc->queue[prio];
+ __be32 *qpage;
+
+ qpage = xive_queue_page_alloc(cpu, xive_queue_shift);
+ if (IS_ERR(qpage))
+ return PTR_ERR(qpage);
+
+ return xive_spapr_configure_queue(cpu, q, prio, qpage,
+ xive_queue_shift);
+}
+
+static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
+ u8 prio)
+{
+ struct xive_q *q = &xc->queue[prio];
+ unsigned int alloc_order;
+ long rc;
+
+ rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+ if (rc)
+ pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+
+ alloc_order = xive_alloc_order(xive_queue_shift);
+ free_pages((unsigned long)q->qpage, alloc_order);
+ q->qpage = NULL;
+}
+
+static bool xive_spapr_match(struct device_node *node)
+{
+ /* Ignore cascaded controllers for the moment */
+ return 1;
+}
+
+#ifdef CONFIG_SMP
+static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+ int irq = xive_irq_bitmap_alloc();
+
+ if (irq < 0) {
+ pr_err("Failed to allocate IPI on CPU %d\n", cpu);
+ return -ENXIO;
+ }
+
+ xc->hw_ipi = irq;
+ return 0;
+}
+
+static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
+{
+ xive_irq_bitmap_free(xc->hw_ipi);
+}
+#endif /* CONFIG_SMP */
+
+static void xive_spapr_shutdown(void)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_INT_RESET, 0);
+ if (rc)
+ pr_err("H_INT_RESET failed %ld\n", rc);
+}
+
+/*
+ * Perform an "ack" cycle on the current thread. Grab the pending
+ * active priorities and update the CPPR to the most favored one.
+ */
+static void xive_spapr_update_pending(struct xive_cpu *xc)
+{
+ u8 nsr, cppr;
+ u16 ack;
+
+ /*
+ * Perform the "Acknowledge O/S to Register" cycle.
+ *
+ * Let's speedup the access to the TIMA using the raw I/O
+ * accessor as we don't need the synchronisation routine of
+ * the higher level ones
+ */
+ ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+ /* Synchronize subsequent queue accesses */
+ mb();
+
+ /*
+ * Grab the CPPR and the "NSR" field which indicates the source
+ * of the interrupt (if any)
+ */
+ cppr = ack & 0xff;
+ nsr = ack >> 8;
+
+ if (nsr & TM_QW1_NSR_EO) {
+ if (cppr == 0xff)
+ return;
+ /* Mark the priority pending */
+ xc->pending_prio |= 1 << cppr;
+
+ /*
+ * A new interrupt should never have a CPPR less favored
+ * than our current one.
+ */
+ if (cppr >= xc->cppr)
+ pr_err("CPU %d odd ack CPPR, got %d at %d\n",
+ smp_processor_id(), cppr, xc->cppr);
+
+ /* Update our idea of what the CPPR is */
+ xc->cppr = cppr;
+ }
+}
+
+static void xive_spapr_eoi(u32 hw_irq)
+{
+ /* Not used */;
+}
+
+static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+ /* Only some debug on the TIMA settings */
+ pr_debug("(HW value: %08x %08x %08x)\n",
+ in_be32(xive_tima + TM_QW1_OS + TM_WORD0),
+ in_be32(xive_tima + TM_QW1_OS + TM_WORD1),
+ in_be32(xive_tima + TM_QW1_OS + TM_WORD2));
+}
+
+static void xive_spapr_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
+{
+ /* Nothing to do */;
+}
+
+static void xive_spapr_sync_source(u32 hw_irq)
+{
+ /* Specs are unclear on what this is doing */
+ plpar_int_sync(0, hw_irq);
+}
+
+static const struct xive_ops xive_spapr_ops = {
+ .populate_irq_data = xive_spapr_populate_irq_data,
+ .configure_irq = xive_spapr_configure_irq,
+ .setup_queue = xive_spapr_setup_queue,
+ .cleanup_queue = xive_spapr_cleanup_queue,
+ .match = xive_spapr_match,
+ .shutdown = xive_spapr_shutdown,
+ .update_pending = xive_spapr_update_pending,
+ .eoi = xive_spapr_eoi,
+ .setup_cpu = xive_spapr_setup_cpu,
+ .teardown_cpu = xive_spapr_teardown_cpu,
+ .sync_source = xive_spapr_sync_source,
+ .esb_rw = xive_spapr_esb_rw,
+#ifdef CONFIG_SMP
+ .get_ipi = xive_spapr_get_ipi,
+ .put_ipi = xive_spapr_put_ipi,
+#endif /* CONFIG_SMP */
+ .name = "spapr",
+};
+
+/*
+ * get max priority from "/ibm,plat-res-int-priorities"
+ */
+static bool xive_get_max_prio(u8 *max_prio)
+{
+ struct device_node *rootdn;
+ const __be32 *reg;
+ u32 len;
+ int prio, found;
+
+ rootdn = of_find_node_by_path("/");
+ if (!rootdn) {
+ pr_err("not root node found !\n");
+ return false;
+ }
+
+ reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len);
+ if (!reg) {
+ pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n");
+ return false;
+ }
+
+ if (len % (2 * sizeof(u32)) != 0) {
+ pr_err("invalid 'ibm,plat-res-int-priorities' property\n");
+ return false;
+ }
+
+ /* HW supports priorities in the range [0-7] and 0xFF is a
+ * wildcard priority used to mask. We scan the ranges reserved
+ * by the hypervisor to find the lowest priority we can use.
+ */
+ found = 0xFF;
+ for (prio = 0; prio < 8; prio++) {
+ int reserved = 0;
+ int i;
+
+ for (i = 0; i < len / (2 * sizeof(u32)); i++) {
+ int base = be32_to_cpu(reg[2 * i]);
+ int range = be32_to_cpu(reg[2 * i + 1]);
+
+ if (prio >= base && prio < base + range)
+ reserved++;
+ }
+
+ if (!reserved)
+ found = prio;
+ }
+
+ if (found == 0xFF) {
+ pr_err("no valid priority found in 'ibm,plat-res-int-priorities'\n");
+ return false;
+ }
+
+ *max_prio = found;
+ return true;
+}
+
+bool __init xive_spapr_init(void)
+{
+ struct device_node *np;
+ struct resource r;
+ void __iomem *tima;
+ struct property *prop;
+ u8 max_prio;
+ u32 val;
+ u32 len;
+ const __be32 *reg;
+ int i;
+
+ if (xive_cmdline_disabled)
+ return false;
+
+ pr_devel("%s()\n", __func__);
+ np = of_find_compatible_node(NULL, NULL, "ibm,power-ivpe");
+ if (!np) {
+ pr_devel("not found !\n");
+ return false;
+ }
+ pr_devel("Found %s\n", np->full_name);
+
+ /* Resource 1 is the OS ring TIMA */
+ if (of_address_to_resource(np, 1, &r)) {
+ pr_err("Failed to get thread mgmnt area resource\n");
+ return false;
+ }
+ tima = ioremap(r.start, resource_size(&r));
+ if (!tima) {
+ pr_err("Failed to map thread mgmnt area\n");
+ return false;
+ }
+
+ if (!xive_get_max_prio(&max_prio))
+ return false;
+
+ /* Feed the IRQ number allocator with the ranges given in the DT */
+ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len);
+ if (!reg) {
+ pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n");
+ return false;
+ }
+
+ if (len % (2 * sizeof(u32)) != 0) {
+ pr_err("invalid 'ibm,xive-lisn-ranges' property\n");
+ return false;
+ }
+
+ for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2)
+ xive_irq_bitmap_add(be32_to_cpu(reg[0]),
+ be32_to_cpu(reg[1]));
+
+ /* Iterate the EQ sizes and pick one */
+ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) {
+ xive_queue_shift = val;
+ if (val == PAGE_SHIFT)
+ break;
+ }
+
+ /* Initialize XIVE core with our backend */
+ if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio))
+ return false;
+
+ pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
+ return true;
+}
diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index d07ef2d29caf..f34abed0c05f 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -47,6 +47,7 @@ struct xive_ops {
void (*update_pending)(struct xive_cpu *xc);
void (*eoi)(u32 hw_irq);
void (*sync_source)(u32 hw_irq);
+ u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write);
#ifdef CONFIG_SMP
int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc);
void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc);
@@ -56,6 +57,12 @@ struct xive_ops {
bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
u8 max_prio);
+__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift);
+
+static inline u32 xive_alloc_order(u32 queue_shift)
+{
+ return (queue_shift > PAGE_SHIFT) ? (queue_shift - PAGE_SHIFT) : 0;
+}
extern bool xive_cmdline_disabled;
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 0b2f771593eb..1dd88315cff4 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
GCOV_PROFILE := n
UBSAN_SANITIZE := n
+# Disable ftrace for the entire directory
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)))
+
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y += xmon.o nonstdio.o spr_access.o
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 08e367e3e8c3..33351c6704b1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -89,6 +89,7 @@ static unsigned long nidump = 16;
static unsigned long ncsum = 4096;
static int termch;
static char tmpstr[128];
+static int tracing_enabled;
static long bus_error_jmp[JMP_BUF_LEN];
static int catch_memory_errors;
@@ -234,6 +235,7 @@ Commands:\n\
"\
dr dump stream of raw bytes\n\
dt dump the tracing buffers (uses printk)\n\
+ dtc dump the tracing buffers for current CPU (uses printk)\n\
"
#ifdef CONFIG_PPC_POWERNV
" dx# dump xive on CPU #\n\
@@ -461,6 +463,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
local_irq_save(flags);
hard_irq_disable();
+ tracing_enabled = tracing_is_on();
+ tracing_off();
+
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
regs->nip = bp->address + offset;
@@ -981,6 +986,8 @@ cmds(struct pt_regs *excp)
break;
case 'x':
case 'X':
+ if (tracing_enabled)
+ tracing_on();
return cmd;
case EOF:
printf(" <no input ...>\n");
@@ -1732,23 +1739,25 @@ static void dump_206_sprs(void)
/* Actually some of these pre-date 2.06, but whatevs */
- printf("srr0 = %.16x srr1 = %.16x dsisr = %.8x\n",
+ printf("srr0 = %.16lx srr1 = %.16lx dsisr = %.8x\n",
mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR));
- printf("dscr = %.16x ppr = %.16x pir = %.8x\n",
+ printf("dscr = %.16lx ppr = %.16lx pir = %.8x\n",
mfspr(SPRN_DSCR), mfspr(SPRN_PPR), mfspr(SPRN_PIR));
+ printf("amr = %.16lx uamor = %.16lx\n",
+ mfspr(SPRN_AMR), mfspr(SPRN_UAMOR));
if (!(mfmsr() & MSR_HV))
return;
- printf("sdr1 = %.16x hdar = %.16x hdsisr = %.8x\n",
+ printf("sdr1 = %.16lx hdar = %.16lx hdsisr = %.8x\n",
mfspr(SPRN_SDR1), mfspr(SPRN_HDAR), mfspr(SPRN_HDSISR));
- printf("hsrr0 = %.16x hsrr1 = %.16x hdec = %.8x\n",
+ printf("hsrr0 = %.16lx hsrr1 = %.16lx hdec = %.16lx\n",
mfspr(SPRN_HSRR0), mfspr(SPRN_HSRR1), mfspr(SPRN_HDEC));
- printf("lpcr = %.16x pcr = %.16x lpidr = %.8x\n",
+ printf("lpcr = %.16lx pcr = %.16lx lpidr = %.8x\n",
mfspr(SPRN_LPCR), mfspr(SPRN_PCR), mfspr(SPRN_LPID));
- printf("hsprg0 = %.16x hsprg1 = %.16x\n",
- mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1));
- printf("dabr = %.16x dabrx = %.16x\n",
+ printf("hsprg0 = %.16lx hsprg1 = %.16lx amor = %.16lx\n",
+ mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1), mfspr(SPRN_AMOR));
+ printf("dabr = %.16lx dabrx = %.16lx\n",
mfspr(SPRN_DABR), mfspr(SPRN_DABRX));
#endif
}
@@ -1761,42 +1770,65 @@ static void dump_207_sprs(void)
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return;
- printf("dpdes = %.16x tir = %.16x cir = %.8x\n",
+ printf("dpdes = %.16lx tir = %.16lx cir = %.8x\n",
mfspr(SPRN_DPDES), mfspr(SPRN_TIR), mfspr(SPRN_CIR));
- printf("fscr = %.16x tar = %.16x pspb = %.8x\n",
+ printf("fscr = %.16lx tar = %.16lx pspb = %.8x\n",
mfspr(SPRN_FSCR), mfspr(SPRN_TAR), mfspr(SPRN_PSPB));
msr = mfmsr();
if (msr & MSR_TM) {
/* Only if TM has been enabled in the kernel */
- printf("tfhar = %.16x tfiar = %.16x texasr = %.16x\n",
+ printf("tfhar = %.16lx tfiar = %.16lx texasr = %.16lx\n",
mfspr(SPRN_TFHAR), mfspr(SPRN_TFIAR),
mfspr(SPRN_TEXASR));
}
- printf("mmcr0 = %.16x mmcr1 = %.16x mmcr2 = %.16x\n",
+ printf("mmcr0 = %.16lx mmcr1 = %.16lx mmcr2 = %.16lx\n",
mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCR2));
printf("pmc1 = %.8x pmc2 = %.8x pmc3 = %.8x pmc4 = %.8x\n",
mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
mfspr(SPRN_PMC3), mfspr(SPRN_PMC4));
- printf("mmcra = %.16x siar = %.16x pmc5 = %.8x\n",
+ printf("mmcra = %.16lx siar = %.16lx pmc5 = %.8x\n",
mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR), mfspr(SPRN_PMC5));
- printf("sdar = %.16x sier = %.16x pmc6 = %.8x\n",
+ printf("sdar = %.16lx sier = %.16lx pmc6 = %.8x\n",
mfspr(SPRN_SDAR), mfspr(SPRN_SIER), mfspr(SPRN_PMC6));
- printf("ebbhr = %.16x ebbrr = %.16x bescr = %.16x\n",
+ printf("ebbhr = %.16lx ebbrr = %.16lx bescr = %.16lx\n",
mfspr(SPRN_EBBHR), mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+ printf("iamr = %.16lx\n", mfspr(SPRN_IAMR));
if (!(msr & MSR_HV))
return;
- printf("hfscr = %.16x dhdes = %.16x rpr = %.16x\n",
+ printf("hfscr = %.16lx dhdes = %.16lx rpr = %.16lx\n",
mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR));
- printf("dawr = %.16x dawrx = %.16x ciabr = %.16x\n",
+ printf("dawr = %.16lx dawrx = %.16lx ciabr = %.16lx\n",
mfspr(SPRN_DAWR), mfspr(SPRN_DAWRX), mfspr(SPRN_CIABR));
#endif
}
+static void dump_300_sprs(void)
+{
+#ifdef CONFIG_PPC64
+ bool hv = mfmsr() & MSR_HV;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
+ printf("pidr = %.16lx tidr = %.16lx\n",
+ mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+ printf("asdr = %.16lx psscr = %.16lx\n",
+ mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR)
+ : mfspr(SPRN_PSSCR_PR));
+
+ if (!hv)
+ return;
+
+ printf("ptcr = %.16lx\n",
+ mfspr(SPRN_PTCR));
+#endif
+}
+
static void dump_one_spr(int spr, bool show_unimplemented)
{
unsigned long val;
@@ -1850,6 +1882,7 @@ static void super_regs(void)
dump_206_sprs();
dump_207_sprs();
+ dump_300_sprs();
return;
}
@@ -2231,6 +2264,17 @@ static void xmon_rawdump (unsigned long adrs, long ndump)
printf("\n");
}
+static void dump_tracing(void)
+{
+ int c;
+
+ c = inchar();
+ if (c == 'c')
+ ftrace_dump(DUMP_ORIG);
+ else
+ ftrace_dump(DUMP_ALL);
+}
+
#ifdef CONFIG_PPC64
static void dump_one_paca(int cpu)
{
@@ -2507,6 +2551,11 @@ dump(void)
}
#endif
+ if (c == 't') {
+ dump_tracing();
+ return;
+ }
+
if (c == '\n')
termch = c;
@@ -2525,9 +2574,6 @@ dump(void)
dump_log_buf();
} else if (c == 'o') {
dump_opal_msglog();
- } else if (c == 't') {
- ftrace_dump(DUMP_ALL);
- tracing_on();
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7eeb75d758c1..48af970320cb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -222,6 +222,10 @@ config HAVE_MARCH_Z13_FEATURES
def_bool n
select HAVE_MARCH_ZEC12_FEATURES
+config HAVE_MARCH_Z14_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z13_FEATURES
+
choice
prompt "Processor type"
default MARCH_Z196
@@ -282,6 +286,14 @@ config MARCH_Z13
2964 series). The kernel will be slightly faster but will not work on
older machines.
+config MARCH_Z14
+ bool "IBM z14"
+ select HAVE_MARCH_Z14_FEATURES
+ help
+ Select this to enable optimizations for IBM z14 (3906 series).
+ The kernel will be slightly faster but will not work on older
+ machines.
+
endchoice
config MARCH_Z900_TUNE
@@ -305,6 +317,9 @@ config MARCH_ZEC12_TUNE
config MARCH_Z13_TUNE
def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
+config MARCH_Z14_TUNE
+ def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
+
choice
prompt "Tune code generation"
default TUNE_DEFAULT
@@ -343,6 +358,9 @@ config TUNE_ZEC12
config TUNE_Z13
bool "IBM z13"
+config TUNE_Z14
+ bool "IBM z14"
+
endchoice
config 64BIT
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 54e00526b8df..dac821cfcd43 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -31,7 +31,8 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
mflags-$(CONFIG_MARCH_Z10) := -march=z10
mflags-$(CONFIG_MARCH_Z196) := -march=z196
mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
-mflags-$(CONFIG_MARCH_Z13) := -march=z13
+mflags-$(CONFIG_MARCH_Z13) := -march=z13
+mflags-$(CONFIG_MARCH_Z14) := -march=z14
export CC_FLAGS_MARCH := $(mflags-y)
@@ -44,7 +45,8 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
-cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
+cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
+cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3c88479feba..6e2c9f7e47fa 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -16,4 +16,5 @@ generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += preempt.h
generic-y += trace_clock.h
+generic-y += unaligned.h
generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index b9300f8aee10..07a82bc933a7 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -8,11 +8,12 @@
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
-#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
+ typeof(0?(__force t)0:0ULL), u64))
#define __SC_DELOUSE(t,v) ({ \
BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
- (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+ (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
})
#define PSW32_MASK_PER 0x40000000UL
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
index 3dfadb5d648f..ca2b0624ad46 100644
--- a/arch/s390/include/asm/cpcmd.h
+++ b/arch/s390/include/asm/cpcmd.h
@@ -10,9 +10,8 @@
/*
* the lowlevel function for cpcmd
- * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
*/
-extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
/*
* cpcmd is the in-kernel interface for issuing CP commands
@@ -25,8 +24,8 @@ extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code
* response_code: return pointer for VM's error code
* return value: the size of the response. The caller can check if the buffer
* was large enough by comparing the return value and rlen
- * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ * NOTE: If the response buffer is not in real storage, cpcmd can sleep
*/
-extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index c5befc5a3bf5..b71735eab23f 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -9,9 +9,7 @@
#ifndef _EBCDIC_H
#define _EBCDIC_H
-#ifndef _S390_TYPES_H
-#include <types.h>
-#endif
+#include <linux/types.h>
extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */
extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c92ed0170be2..65998a1f5d43 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -191,7 +191,7 @@ struct arch_elf_state {
} while (0)
#define CORE_DUMP_USE_REGSET
-#define ELF_EXEC_PAGESIZE 4096
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index a4811aa0304d..8f8eec9e1198 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -21,17 +21,12 @@
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc");
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, newval, ret;
load_kernel_asce();
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
pagefault_disable();
switch (op) {
@@ -60,17 +55,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index edb5161df7e2..6810bd757312 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -81,7 +81,7 @@ struct ipl_parameter_block {
struct ipl_block_fcp fcp;
struct ipl_block_ccw ccw;
} ipl_info;
-} __attribute__((packed,aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
/*
* IPL validity flags
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a409d5991934..51375e766e90 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -226,7 +226,9 @@ struct kvm_s390_sie_block {
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
- __u8 reserved68[4]; /* 0x0068 */
+ __u8 reserved68; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
__u8 reserved70[16]; /* 0x0070 */
__u64 mso; /* 0x0080 */
@@ -265,6 +267,7 @@ struct kvm_s390_sie_block {
__u64 cbrlo; /* 0x01b8 */
__u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -739,6 +742,7 @@ struct kvm_arch{
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
struct kvm_s390_vsie vsie;
+ u8 epdx;
u64 epoch;
struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8a5b082797f8..a6870ea6ea8b 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -95,46 +95,46 @@ struct lowcore {
__u64 int_clock; /* 0x0310 */
__u64 mcck_clock; /* 0x0318 */
__u64 clock_comparator; /* 0x0320 */
+ __u64 boot_clock[2]; /* 0x0328 */
/* Current process. */
- __u64 current_task; /* 0x0328 */
- __u8 pad_0x318[0x320-0x318]; /* 0x0330 */
- __u64 kernel_stack; /* 0x0338 */
+ __u64 current_task; /* 0x0338 */
+ __u64 kernel_stack; /* 0x0340 */
/* Interrupt, panic and restart stack. */
- __u64 async_stack; /* 0x0340 */
- __u64 panic_stack; /* 0x0348 */
- __u64 restart_stack; /* 0x0350 */
+ __u64 async_stack; /* 0x0348 */
+ __u64 panic_stack; /* 0x0350 */
+ __u64 restart_stack; /* 0x0358 */
/* Restart function and parameter. */
- __u64 restart_fn; /* 0x0358 */
- __u64 restart_data; /* 0x0360 */
- __u64 restart_source; /* 0x0368 */
+ __u64 restart_fn; /* 0x0360 */
+ __u64 restart_data; /* 0x0368 */
+ __u64 restart_source; /* 0x0370 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0370 */
- __u64 user_asce; /* 0x0378 */
+ __u64 kernel_asce; /* 0x0378 */
+ __u64 user_asce; /* 0x0380 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
- __u32 lpp; /* 0x0380 */
- __u32 current_pid; /* 0x0384 */
+ __u32 lpp; /* 0x0388 */
+ __u32 current_pid; /* 0x038c */
/* SMP info area */
- __u32 cpu_nr; /* 0x0388 */
- __u32 softirq_pending; /* 0x038c */
- __u64 percpu_offset; /* 0x0390 */
- __u64 vdso_per_cpu_data; /* 0x0398 */
- __u64 machine_flags; /* 0x03a0 */
- __u32 preempt_count; /* 0x03a8 */
- __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */
- __u64 gmap; /* 0x03b0 */
- __u32 spinlock_lockval; /* 0x03b8 */
- __u32 fpu_flags; /* 0x03bc */
- __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
+ __u32 cpu_nr; /* 0x0390 */
+ __u32 softirq_pending; /* 0x0394 */
+ __u64 percpu_offset; /* 0x0398 */
+ __u64 vdso_per_cpu_data; /* 0x03a0 */
+ __u64 machine_flags; /* 0x03a8 */
+ __u32 preempt_count; /* 0x03b0 */
+ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
+ __u64 gmap; /* 0x03b8 */
+ __u32 spinlock_lockval; /* 0x03c0 */
+ __u32 fpu_flags; /* 0x03c4 */
+ __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
deleted file mode 100644
index b79813d9cf68..000000000000
--- a/arch/s390/include/asm/mman.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/mman.h"
- */
-#ifndef __S390_MMAN_H__
-#define __S390_MMAN_H__
-
-#include <uapi/asm/mman.h>
-
-#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 4541ac44b35f..72e9ca83a668 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,6 +12,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctl_reg.h>
+#include <asm-generic/mm_hooks.h>
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
@@ -33,7 +34,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.use_cmma = 0;
#endif
switch (mm->context.asce_limit) {
- case 1UL << 42:
+ case _REGION2_SIZE:
/*
* forked 3-level task, fall through to set new asce with new
* mm->pgd
@@ -44,12 +45,17 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
break;
- case 1UL << 53:
+ case -PAGE_SIZE:
+ /* forked 5-level task, set new asce with new_mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ break;
+ case _REGION1_SIZE:
/* forked 4-level task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
break;
- case 1UL << 31:
+ case _REGION3_SIZE:
/* forked 2-level compat task, set new asce with new mm->pgd */
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
@@ -133,30 +139,4 @@ static inline void activate_mm(struct mm_struct *prev,
set_user_asce(next);
}
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
-{
-}
-
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-
-static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void arch_bprm_mm_init(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
-}
-
-static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
- bool write, bool execute, bool foreign)
-{
- /* by default, allow everything */
- return true;
-}
#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 9d91cf3e427f..c8e211b9a002 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -72,7 +72,7 @@ union mci {
u64 ar : 1; /* 33 access register validity */
u64 da : 1; /* 34 delayed access exception */
u64 : 1; /* 35 */
- u64 gs : 1; /* 36 guarded storage registers */
+ u64 gs : 1; /* 36 guarded storage registers validity */
u64 : 5; /* 37-41 */
u64 pr : 1; /* 42 tod programmable register validity */
u64 fc : 1; /* 43 fp control register validity */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 42267a2fe29e..22b0f49e87c1 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -13,7 +13,8 @@
#define ESSA_SET_POT_VOLATILE 4
#define ESSA_SET_STABLE_RESIDENT 5
#define ESSA_SET_STABLE_IF_RESIDENT 6
+#define ESSA_SET_STABLE_NODAT 7
-#define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT
+#define ESSA_MAX ESSA_SET_STABLE_NODAT
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 624deaa44230..5d5c2b3500a4 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,10 +10,14 @@
#include <linux/const.h>
#include <asm/types.h>
+#define _PAGE_SHIFT 12
+#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
+#define _PAGE_MASK (~(_PAGE_SIZE - 1))
+
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_SHIFT _PAGE_SHIFT
+#define PAGE_SIZE _PAGE_SIZE
+#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC 0
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
@@ -133,6 +137,9 @@ static inline int page_reset_referenced(unsigned long addr)
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
+void arch_set_page_dat(struct page *page, int order);
+void arch_set_page_nodat(struct page *page, int order);
+int arch_test_page_nodat(struct page *page);
void arch_set_page_states(int make_stable);
static inline int devmem_is_allowed(unsigned long pfn)
@@ -145,16 +152,26 @@ static inline int devmem_is_allowed(unsigned long pfn)
#endif /* !__ASSEMBLY__ */
-#define __PAGE_OFFSET 0x0UL
-#define PAGE_OFFSET 0x0UL
-#define __pa(x) (unsigned long)(x)
-#define __va(x) (void *)(unsigned long)(x)
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define __PAGE_OFFSET 0x0UL
+#define PAGE_OFFSET 0x0UL
+
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)(unsigned long)(x))
+
+#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
+#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bb0ff1bb0c4a..a0d9167519b1 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -15,6 +15,8 @@
#include <linux/gfp.h>
#include <linux/mm.h>
+#define CRST_ALLOC_ORDER 2
+
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
@@ -42,16 +44,16 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
- clear_table(crst, entry, sizeof(unsigned long)*2048);
+ clear_table(crst, entry, _CRST_TABLE_SIZE);
}
static inline unsigned long pgd_entry_type(struct mm_struct *mm)
{
- if (mm->context.asce_limit <= (1UL << 31))
+ if (mm->context.asce_limit <= _REGION3_SIZE)
return _SEGMENT_ENTRY_EMPTY;
- if (mm->context.asce_limit <= (1UL << 42))
+ if (mm->context.asce_limit <= _REGION2_SIZE)
return _REGION3_ENTRY_EMPTY;
- if (mm->context.asce_limit <= (1UL << 53))
+ if (mm->context.asce_limit <= _REGION1_SIZE)
return _REGION2_ENTRY_EMPTY;
return _REGION1_ENTRY_EMPTY;
}
@@ -119,7 +121,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (!table)
return NULL;
- if (mm->context.asce_limit == (1UL << 31)) {
+ if (mm->context.asce_limit == _REGION3_SIZE) {
/* Forking a compat process with 2 page table levels */
if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
crst_table_free(mm, table);
@@ -131,7 +133,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- if (mm->context.asce_limit == (1UL << 31))
+ if (mm->context.asce_limit == _REGION3_SIZE)
pgtable_pmd_page_dtor(virt_to_page(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
@@ -158,4 +160,8 @@ static inline void pmd_populate(struct mm_struct *mm,
extern void rcu_table_freelist_finish(void);
+void vmem_map_init(void);
+void *vmem_crst_alloc(unsigned long val);
+pte_t *vmem_pte_alloc(void);
+
#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57057fb1cc07..dce708e061ea 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -11,19 +11,6 @@
#ifndef _ASM_S390_PGTABLE_H
#define _ASM_S390_PGTABLE_H
-/*
- * The Linux memory management assumes a three-level page table setup.
- * For s390 64 bit we use up to four of the five levels the hardware
- * provides (region first tables are not used).
- *
- * The "pgd_xxx()" functions are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- *
- * This file contains the functions and defines necessary to modify and use
- * the S390 page table tree.
- */
-#ifndef __ASSEMBLY__
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -34,9 +21,6 @@
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
-extern void vmem_map_init(void);
-pmd_t *vmem_pmd_alloc(void);
-pte_t *vmem_pte_alloc(void);
enum {
PG_DIRECT_MAP_4K = 0,
@@ -77,38 +61,6 @@ extern unsigned long zero_page_mask;
#define __HAVE_COLOR_ZERO_PAGE
/* TODO: s390 cannot support io_remap_pfn_range... */
-#endif /* !__ASSEMBLY__ */
-
-/*
- * PMD_SHIFT determines the size of the area a second-level page
- * table can map
- * PGDIR_SHIFT determines what a third-level page table entry can map
- */
-#define PMD_SHIFT 20
-#define PUD_SHIFT 31
-#define P4D_SHIFT 42
-#define PGDIR_SHIFT 53
-
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-#define P4D_SIZE (1UL << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE-1))
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/*
- * entries per page directory level: the S390 is two-level, so
- * we don't really have any PMD directory physically.
- * for S390 segment-table entries are combined to one PGD
- * that leads to 1024 pte per pgd
- */
-#define PTRS_PER_PTE 256
-#define PTRS_PER_PMD 2048
-#define PTRS_PER_PUD 2048
-#define PTRS_PER_P4D 2048
-#define PTRS_PER_PGD 2048
#define FIRST_USER_ADDRESS 0UL
@@ -123,7 +75,6 @@ extern unsigned long zero_page_mask;
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
-#ifndef __ASSEMBLY__
/*
* The vmalloc and module area will always be on the topmost area of the
* kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
@@ -269,7 +220,7 @@ static inline int is_module_addr(void *addr)
*/
/* Bits in the segment/region table address-space-control-element */
-#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */
+#define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */
#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */
@@ -320,9 +271,9 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
-#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
-#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
-#define _SEGMENT_ENTRY_NOEXEC 0x100 /* region no-execute bit */
+#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
+#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY (0)
@@ -340,6 +291,54 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
#endif
+#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
+#define _PAGE_ENTRIES 256 /* number of page table entries */
+
+#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
+#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
+
+#define _REGION1_SHIFT 53
+#define _REGION2_SHIFT 42
+#define _REGION3_SHIFT 31
+#define _SEGMENT_SHIFT 20
+
+#define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT)
+#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT)
+#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT)
+#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT)
+#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT)
+
+#define _REGION1_SIZE (1UL << _REGION1_SHIFT)
+#define _REGION2_SIZE (1UL << _REGION2_SHIFT)
+#define _REGION3_SIZE (1UL << _REGION3_SHIFT)
+#define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT)
+
+#define _REGION1_MASK (~(_REGION1_SIZE - 1))
+#define _REGION2_MASK (~(_REGION2_SIZE - 1))
+#define _REGION3_MASK (~(_REGION3_SIZE - 1))
+#define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1))
+
+#define PMD_SHIFT _SEGMENT_SHIFT
+#define PUD_SHIFT _REGION3_SHIFT
+#define P4D_SHIFT _REGION2_SHIFT
+#define PGDIR_SHIFT _REGION1_SHIFT
+
+#define PMD_SIZE _SEGMENT_SIZE
+#define PUD_SIZE _REGION3_SIZE
+#define P4D_SIZE _REGION2_SIZE
+#define PGDIR_SIZE _REGION1_SIZE
+
+#define PMD_MASK _SEGMENT_MASK
+#define PUD_MASK _REGION3_MASK
+#define P4D_MASK _REGION2_MASK
+#define PGDIR_MASK _REGION1_MASK
+
+#define PTRS_PER_PTE _PAGE_ENTRIES
+#define PTRS_PER_PMD _CRST_ENTRIES
+#define PTRS_PER_PUD _CRST_ENTRIES
+#define PTRS_PER_P4D _CRST_ENTRIES
+#define PTRS_PER_PGD _CRST_ENTRIES
+
/*
* Segment table and region3 table entry encoding
* (R = read-only, I = invalid, y = young bit):
@@ -376,6 +375,7 @@ static inline int is_module_addr(void *addr)
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_NODAT 0x0000000040000000UL
#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
@@ -505,7 +505,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
*/
-#define mm_forbids_zeropage mm_use_skey
+#define mm_forbids_zeropage mm_has_pgste
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -952,15 +952,30 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_GLOBAL 0
#define IPTE_LOCAL 1
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
+#define IPTE_NODAT 0x400
+#define IPTE_GUEST_ASCE 0x800
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long pto = (unsigned long) ptep;
- /* Invalidation + TLB flush for the pte */
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* Invalidation + TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+ [m4] "i" (local));
+ return;
+ }
+
+ /* Invalidate ptes with options + TLB flush of the ptes */
+ opt = opt | (asce & _ASCE_ORIGIN);
asm volatile(
- " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
- : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
- [m4] "i" (local));
+ " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ : [r2] "+a" (address), [r3] "+a" (opt)
+ : [r1] "a" (pto), [m4] "i" (local) : "memory");
}
static inline void __ptep_ipte_range(unsigned long address, int nr,
@@ -1341,31 +1356,61 @@ static inline void __pmdp_csp(pmd_t *pmdp)
#define IDTE_GLOBAL 0
#define IDTE_LOCAL 1
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
+#define IDTE_PTOA 0x0800
+#define IDTE_NODAT 0x1000
+#define IDTE_GUEST_ASCE 0x2000
+
+static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long sto;
- sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
- asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
- : "+m" (*pmdp)
- : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
- [m4] "i" (local)
- : "cc" );
+ sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
+ [m4] "i" (local)
+ : "cc" );
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
}
-static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
+static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+ unsigned long opt, unsigned long asce,
+ int local)
{
unsigned long r3o;
- r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+ r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
r3o |= _ASCE_TYPE_REGION3;
- asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
- : "+m" (*pudp)
- : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
- [m4] "i" (local)
- : "cc");
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
+ [m4] "i" (local)
+ : "cc");
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
}
pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
@@ -1548,8 +1593,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#endif /* !__ASSEMBLY__ */
-
#define kern_addr_valid(addr) (1)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 998b61cd0e56..eaee69e7c42a 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -80,7 +80,7 @@ struct qdr {
u32 qkey : 4;
u32 : 28;
struct qdesfmt0 qdf0[126];
-} __attribute__ ((packed, aligned(4096)));
+} __packed __aligned(PAGE_SIZE);
#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40
#define QIB_RFLAGS_ENABLE_QEBSM 0x80
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index cd78155b1829..490e035b3716 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,10 @@
#define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12)
#define MACHINE_FLAG_VX _BITUL(13)
-#define MACHINE_FLAG_NX _BITUL(14)
-#define MACHINE_FLAG_GS _BITUL(15)
+#define MACHINE_FLAG_TLB_GUEST _BITUL(14)
+#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_GS _BITUL(16)
+#define MACHINE_FLAG_SCC _BITUL(17)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -68,8 +70,10 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
/*
* Console mode. Override with conmode=
@@ -104,9 +108,16 @@ extern void pfault_fini(void);
#define pfault_fini() do { } while (0)
#endif /* CONFIG_PFAULT */
+#ifdef CONFIG_VMCP
+void vmcp_cma_reserve(void);
+#else
+static inline void vmcp_cma_reserve(void) { }
+#endif
+
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
-extern void cmma_init(void);
+void cmma_init(void);
+void cmma_init_nodat(void);
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f7838ecd83c6..8182b521c42f 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -92,17 +92,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
asm volatile(
- "st %1,%0\n"
- : "+Q" (lp->lock)
- : "d" (0)
- : "cc", "memory");
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- while (arch_spin_is_locked(lock))
- arch_spin_relax(lock);
- smp_acquire__after_ctrl_dep();
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0070\n" /* NIAI 7 */
+#endif
+ " st %1,%0\n"
+ : "=Q" (lp->lock) : "d" (0) : "cc", "memory");
}
/*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 118535123f34..93f2eb3f277c 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -15,6 +15,8 @@
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+extern u64 clock_comparator_max;
+
/* Inline functions for clock register access. */
static inline int set_tod_clock(__u64 time)
{
@@ -126,7 +128,7 @@ static inline unsigned long long local_tick_disable(void)
unsigned long long old;
old = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
set_clock_comparator(S390_lowcore.clock_comparator);
return old;
}
@@ -174,24 +176,24 @@ static inline cycles_t get_cycles(void)
return (cycles_t) get_tod_clock() >> 2;
}
-int get_phys_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
unsigned long long monotonic_clock(void);
-extern u64 sched_clock_base_cc;
+extern unsigned char tod_clock_base[16] __aligned(8);
/**
* get_clock_monotonic - returns current time in clock rate units
*
* The caller must ensure that preemption is disabled.
- * The clock and sched_clock_base get changed via stop_machine.
+ * The clock and tod_clock_base get changed via stop_machine.
* Therefore preemption must be disabled when calling this
* function, otherwise the returned value is not guaranteed to
* be monotonic.
*/
static inline unsigned long long get_tod_clock_monotonic(void)
{
- return get_tod_clock() - sched_clock_base_cc;
+ return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
}
/**
@@ -218,4 +220,32 @@ static inline unsigned long long tod_to_ns(unsigned long long todval)
return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
+/**
+ * tod_after - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a > (long long) b;
+ return a > b;
+}
+
+/**
+ * tod_after_eq - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after_eq(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a >= (long long) b;
+ return a >= b;
+}
+
#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7317b3108a88..3a14b864b2e3 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
extern void tlb_table_flush(struct mmu_gather *tlb);
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
tlb->mm = mm;
tlb->start = start;
@@ -76,9 +75,15 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
tlb_flush_mmu_free(tlb);
}
-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end)
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
{
+ if (force) {
+ tlb->start = start;
+ tlb->end = end;
+ }
+
tlb_flush_mmu(tlb);
}
@@ -130,7 +135,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 31))
+ if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
tlb_remove_table(tlb, pmd);
@@ -146,7 +151,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 53))
+ if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
return;
tlb_remove_table(tlb, p4d);
}
@@ -161,7 +166,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
unsigned long address)
{
- if (tlb->mm->context.asce_limit <= (1UL << 42))
+ if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
return;
tlb_remove_table(tlb, pud);
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 39846100682a..4d759f8f4bc7 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -20,10 +20,15 @@ static inline void __tlb_flush_local(void)
*/
static inline void __tlb_flush_idte(unsigned long asce)
{
+ unsigned long opt;
+
+ opt = IDTE_PTOA;
+ if (MACHINE_HAS_TLB_GUEST)
+ opt |= IDTE_GUEST_ASCE;
/* Global TLB flush for the mm */
asm volatile(
" .insn rrf,0xb98e0000,0,%0,%1,0"
- : : "a" (2048), "a" (asce) : "cc");
+ : : "a" (opt), "a" (asce) : "cc");
}
#ifdef CONFIG_SMP
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fa1bfce10370..5222da162b69 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -77,12 +77,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
return &node_to_cpumask_map[node];
}
-/*
- * Returns the number of the node containing node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
#define pcibus_to_node(bus) __pcibus_to_node(bus)
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
deleted file mode 100644
index 6740f4f9781f..000000000000
--- a/arch/s390/include/asm/types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/types.h"
- */
-#ifndef _S390_TYPES_H
-#define _S390_TYPES_H
-
-#include <uapi/asm/types.h>
-
-#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
deleted file mode 100644
index da9627afe5d8..000000000000
--- a/arch/s390/include/asm/unaligned.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_S390_UNALIGNED_H
-#define _ASM_S390_UNALIGNED_H
-
-/*
- * The S390 can do unaligned accesses itself.
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#define get_unaligned __get_unaligned_be
-#define put_unaligned __put_unaligned_be
-
-#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index ca62066895e0..098f28778a13 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -9,4 +9,5 @@ generic-y += param.h
generic-y += poll.h
generic-y += resource.h
generic-y += sockios.h
+generic-y += swab.h
generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 1340311dab77..ab5797cdc1b7 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -72,7 +72,10 @@ typedef struct dasd_information2_t {
* 0x02: use diag discipline (diag)
* 0x04: set the device initially online (internal use only)
* 0x08: enable ERP related logging
- * 0x20: give access to raw eckd data
+ * 0x10: allow I/O to fail on lost paths
+ * 0x20: allow I/O to fail when a lock was stolen
+ * 0x40: give access to raw eckd data
+ * 0x80: enable discard support
*/
#define DASD_FEATURE_DEFAULT 0x00
#define DASD_FEATURE_READONLY 0x01
@@ -82,6 +85,7 @@ typedef struct dasd_information2_t {
#define DASD_FEATURE_FAILFAST 0x10
#define DASD_FEATURE_FAILONSLCK 0x20
#define DASD_FEATURE_USERAW 0x40
+#define DASD_FEATURE_DISCARD 0x80
#define DASD_PARTN_BITS 2
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 69d09c39bbcd..cd7359e23d86 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
/* kvm attributes for KVM_S390_VM_TOD */
#define KVM_S390_VM_TOD_LOW 0
#define KVM_S390_VM_TOD_HIGH 1
+#define KVM_S390_VM_TOD_EXT 2
+
+struct kvm_s390_vm_tod_clock {
+ __u8 epoch_idx;
+ __u64 tod;
+};
/* kvm attributes for KVM_S390_VM_CPU_MODEL */
/* processor related attributes are r/w */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 52a63f4175cb..a56916c83565 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
deleted file mode 100644
index da3bfe5cc161..000000000000
--- a/arch/s390/include/uapi/asm/swab.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _S390_SWAB_H
-#define _S390_SWAB_H
-
-/*
- * S390 version
- * Copyright IBM Corp. 1999
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#include <linux/types.h>
-
-#ifndef __s390x__
-# define __SWAB_64_THRU_32__
-#endif
-
-#ifdef __s390x__
-static inline __u64 __arch_swab64p(const __u64 *x)
-{
- __u64 result;
-
- asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
- return result;
-}
-#define __arch_swab64p __arch_swab64p
-
-static inline __u64 __arch_swab64(__u64 x)
-{
- __u64 result;
-
- asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
- return result;
-}
-#define __arch_swab64 __arch_swab64
-
-static inline void __arch_swab64s(__u64 *x)
-{
- *x = __arch_swab64p(x);
-}
-#define __arch_swab64s __arch_swab64s
-#endif /* __s390x__ */
-
-static inline __u32 __arch_swab32p(const __u32 *x)
-{
- __u32 result;
-
- asm volatile(
-#ifndef __s390x__
- " icm %0,8,%O1+3(%R1)\n"
- " icm %0,4,%O1+2(%R1)\n"
- " icm %0,2,%O1+1(%R1)\n"
- " ic %0,%1"
- : "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
- " lrv %0,%1"
- : "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
- return result;
-}
-#define __arch_swab32p __arch_swab32p
-
-#ifdef __s390x__
-static inline __u32 __arch_swab32(__u32 x)
-{
- __u32 result;
-
- asm volatile("lrvr %0,%1" : "=d" (result) : "d" (x));
- return result;
-}
-#define __arch_swab32 __arch_swab32
-#endif /* __s390x__ */
-
-static inline __u16 __arch_swab16p(const __u16 *x)
-{
- __u16 result;
-
- asm volatile(
-#ifndef __s390x__
- " icm %0,2,%O1+1(%R1)\n"
- " ic %0,%1\n"
- : "=&d" (result) : "Q" (*x) : "cc");
-#else /* __s390x__ */
- " lrvh %0,%1"
- : "=d" (result) : "m" (*x));
-#endif /* __s390x__ */
- return result;
-}
-#define __arch_swab16p __arch_swab16p
-
-#endif /* _S390_SWAB_H */
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
new file mode 100644
index 000000000000..4caf71714a55
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright IBM Corp. 2004, 2005
+ * Interface implementation for communication with the z/VM control program
+ * Version 1.0
+ * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ *
+ * z/VMs CP offers the possibility to issue commands via the diagnose code 8
+ * this driver implements a character device that issues these commands and
+ * returns the answer of CP.
+ *
+ * The idea of this driver is based on cpint from Neale Ferguson
+ */
+
+#ifndef _UAPI_ASM_VMCP_H
+#define _UAPI_ASM_VMCP_H
+
+#include <linux/ioctl.h>
+
+#define VMCP_GETCODE _IOR(0x10, 1, int)
+#define VMCP_SETBUF _IOW(0x10, 2, int)
+#define VMCP_GETSIZE _IOR(0x10, 3, int)
+
+#endif /* _UAPI_ASM_VMCP_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index b65c414b6c0e..3d42f91c95fd 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -158,6 +158,7 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
+ OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 9f0e4a2785f7..63bc6603e0ed 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/mm.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
@@ -28,9 +29,7 @@ static int diag8_noresponse(int cmdlen)
register unsigned long reg3 asm ("3") = cmdlen;
asm volatile(
- " sam31\n"
" diag %1,%0,0x8\n"
- " sam64\n"
: "+d" (reg3) : "d" (reg2) : "cc");
return reg3;
}
@@ -43,9 +42,7 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
register unsigned long reg5 asm ("5") = *rlen;
asm volatile(
- " sam31\n"
" diag %2,%0,0x8\n"
- " sam64\n"
" brc 8,1f\n"
" agr %1,%4\n"
"1:\n"
@@ -57,7 +54,6 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
/*
* __cpcmd has some restrictions over cpcmd
- * - the response buffer must reside below 2GB (if any)
* - __cpcmd is unlocked and therefore not SMP-safe
*/
int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
@@ -88,13 +84,12 @@ EXPORT_SYMBOL(__cpcmd);
int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
{
+ unsigned long flags;
char *lowbuf;
int len;
- unsigned long flags;
- if ((virt_to_phys(response) != (unsigned long) response) ||
- (((unsigned long)response + rlen) >> 31)) {
- lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+ if (is_vmalloc_or_module_addr(response)) {
+ lowbuf = kmalloc(rlen, GFP_KERNEL);
if (!lowbuf) {
pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 86b3e74f569e..1d9e83c401fc 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -866,7 +866,8 @@ static inline void
debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
int exception)
{
- active->id.stck = get_tod_clock_fast() - sched_clock_base_cc;
+ active->id.stck = get_tod_clock_fast() -
+ *(unsigned long long *) &tod_clock_base[1];
active->id.fields.cpuid = smp_processor_id();
active->caller = __builtin_return_address(0);
active->id.fields.exception = exception;
@@ -1455,15 +1456,15 @@ int
debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
int area, debug_entry_t * entry, char *out_buf)
{
- unsigned long sec, usec;
+ unsigned long base, sec, usec;
char *except_str;
unsigned long caller;
int rc = 0;
unsigned int level;
level = entry->id.fields.level;
- sec = (entry->id.stck >> 12) + (sched_clock_base_cc >> 12);
- sec = sec - (TOD_UNIX_EPOCH >> 12);
+ base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
+ sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
usec = do_div(sec, USEC_PER_SEC);
if (entry->id.fields.exception)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dab78babfab6..2aa545dca4d5 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -76,7 +76,7 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
sp = __dump_trace(func, data, sp,
- S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
S390_lowcore.panic_stack + frame_size);
#endif
sp = __dump_trace(func, data, sp,
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 5d20182ee8ae..ca8cd80e8feb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -53,8 +53,9 @@ static void __init reset_tod_clock(void)
if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
disabled_wait(0);
- sched_clock_base_cc = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = sched_clock_base_cc;
+ memset(tod_clock_base, 0, 16);
+ *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
}
#ifdef CONFIG_SHARED_KERNEL
@@ -165,8 +166,8 @@ static noinline __init void create_kernel_nss(void)
}
/* re-initialize cputime accounting. */
- sched_clock_base_cc = get_tod_clock();
- S390_lowcore.last_update_clock = sched_clock_base_cc;
+ get_tod_clock_ext(tod_clock_base);
+ S390_lowcore.last_update_clock = *(__u64 *) &tod_clock_base[1];
S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
S390_lowcore.user_timer = 0;
S390_lowcore.system_timer = 0;
@@ -387,6 +388,12 @@ static __init void detect_machine_facilities(void)
}
if (test_facility(133))
S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+ if (test_facility(139) && (tod_clock_base[1] & 0x80)) {
+ /* Enabled signed clock comparator comparisons */
+ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+ clock_comparator_max = -1ULL >> 1;
+ __ctl_set_bit(0, 53);
+ }
}
static inline void save_vector_registers(void)
@@ -413,7 +420,7 @@ static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
__ctl_clear_bit(0, 17);
- return 1;
+ return 0;
}
early_param("novx", disable_vector_extension);
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index eff5b31671d4..8ed753c72d9b 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -302,7 +302,8 @@ ENTRY(startup_kdump)
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
lctlg %c0,%c15,0x200(%r0) # initialize control registers
- stck __LC_LAST_UPDATE_CLOCK
+ stcke __LC_BOOT_CLOCK
+ mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
spt 6f-.LPG0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
l %r15,.Lstack-.LPG0(%r13)
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 31c91f24e562..0d8f2a858ced 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -21,8 +21,8 @@ ENTRY(startup_continue)
xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
mvi __LC_LPP,0x80 # and set LPP_MAGIC
.insn s,0xb2800000,__LC_LPP # load program parameter
-0: larl %r1,sched_clock_base_cc
- mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
+0: larl %r1,tod_clock_base
+ mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6dca93b29bed..a2fdff0e730b 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -105,7 +105,8 @@ void do_IRQ(struct pt_regs *regs, int irq)
old_regs = set_irq_regs(regs);
irq_enter();
- if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ if (tod_after_eq(S390_lowcore.int_clock,
+ S390_lowcore.clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index cfac28330b03..4bdc65636603 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -7,6 +7,7 @@
*/
#include <linux/linkage.h>
+#include <asm/page.h>
#include <asm/sigp.h>
/*
@@ -55,8 +56,8 @@ ENTRY(relocate_kernel)
.back_pgm:
lmg %r0,%r15,gprregs-.base(%r13)
.top:
- lghi %r7,4096 # load PAGE_SIZE in r7
- lghi %r9,4096 # load PAGE_SIZE in r9
+ lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
+ lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
lg %r5,0(%r2) # read another word for indirection page
aghi %r2,8 # increment pointer
tml %r5,0x1 # is it a destination page?
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3d1d808ea8a9..164a1e16b53e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,7 +305,7 @@ static void __init setup_lowcore(void)
/*
* Setup lowcore for boot cpu
*/
- BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
+ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr = (unsigned long) restart_int_handler;
@@ -323,7 +323,7 @@ static void __init setup_lowcore(void)
lc->io_new_psw.mask = PSW_KERNEL_BITS |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
- lc->clock_comparator = -1ULL;
+ lc->clock_comparator = clock_comparator_max;
lc->kernel_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
@@ -469,10 +469,10 @@ static void __init setup_memory_end(void)
vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
- if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
- vmax = 1UL << 42; /* 3-level kernel page table */
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
else
- vmax = 1UL << 53; /* 4-level kernel page table */
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -818,6 +818,9 @@ static int __init setup_hwcaps(void)
case 0x2965:
strcpy(elf_platform, "z13");
break;
+ case 0x3906:
+ strcpy(elf_platform, "z14");
+ break;
}
/*
@@ -922,6 +925,7 @@ void __init setup_arch(char **cmdline_p)
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
+ vmcp_cma_reserve();
check_initrd();
reserve_crashkernel();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1020a11a24e5..1cee6753d47a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1181,6 +1181,7 @@ static int __init s390_smp_init(void)
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
+ rc = rc <= 0 ? rc : 0;
out:
return rc;
}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 39e2f41b6cf0..c8ea715bfe10 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -98,10 +98,16 @@ int page_key_alloc(unsigned long pages)
*/
void page_key_read(unsigned long *pfn)
{
+ struct page *page;
unsigned long addr;
-
- addr = (unsigned long) page_address(pfn_to_page(*pfn));
- *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+ unsigned char key;
+
+ page = pfn_to_page(*pfn);
+ addr = (unsigned long) page_address(page);
+ key = (unsigned char) page_get_storage_key(addr) & 0x7f;
+ if (arch_test_page_nodat(page))
+ key |= 0x80;
+ *(unsigned char *) pfn = key;
}
/*
@@ -126,8 +132,16 @@ void page_key_memorize(unsigned long *pfn)
*/
void page_key_write(void *address)
{
- page_set_storage_key((unsigned long) address,
- page_key_rp->data[page_key_rx], 0);
+ struct page *page;
+ unsigned char key;
+
+ key = page_key_rp->data[page_key_rx];
+ page_set_storage_key((unsigned long) address, key & 0x7f, 0);
+ page = virt_to_page(address);
+ if (key & 0x80)
+ arch_set_page_nodat(page, 0);
+ else
+ arch_set_page_dat(page, 0);
if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
return;
page_key_rp = page_key_rp->next;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 192efdfac918..5cbd52169348 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -51,8 +51,15 @@
#include <asm/cio.h>
#include "entry.h"
-u64 sched_clock_base_cc = -1; /* Force to data section. */
-EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+unsigned char tod_clock_base[16] __aligned(8) = {
+ /* Force to data section. */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -75,7 +82,7 @@ void __init time_early_init(void)
struct ptff_qui qui;
/* Initialize TOD steering parameters */
- tod_steering_end = sched_clock_base_cc;
+ tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
vdso_data->ts_end = tod_steering_end;
if (!test_facility(28))
@@ -111,22 +118,27 @@ unsigned long long monotonic_clock(void)
}
EXPORT_SYMBOL(monotonic_clock);
-static void tod_to_timeval(__u64 todval, struct timespec64 *xt)
+static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
- unsigned long long sec;
+ unsigned long long high, low, rem, sec, nsec;
+
+ /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
+ high = (*(unsigned long long *) clk) >> 4;
+ low = (*(unsigned long long *)&clk[7]) << 4;
+ /* Calculate seconds and nano-seconds */
+ sec = high;
+ rem = do_div(sec, 1000000);
+ nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
- sec = todval >> 12;
- do_div(sec, 1000000);
xt->tv_sec = sec;
- todval -= (sec * 1000000) << 12;
- xt->tv_nsec = ((todval * 1000) >> 12);
+ xt->tv_nsec = nsec;
}
void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -148,7 +160,7 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = -1ULL;
+ S390_lowcore.clock_comparator = clock_comparator_max;
set_clock_comparator(S390_lowcore.clock_comparator);
cpu = smp_processor_id();
@@ -179,7 +191,7 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == -1ULL)
+ if (S390_lowcore.clock_comparator == clock_comparator_max)
set_clock_comparator(S390_lowcore.clock_comparator);
}
@@ -197,18 +209,28 @@ static void stp_reset(void);
void read_persistent_clock64(struct timespec64 *ts)
{
- __u64 clock;
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ __u64 delta;
- clock = get_tod_clock() - initial_leap_seconds;
- tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ get_tod_clock_ext(clk);
+ *(__u64 *) &clk[1] -= delta;
+ if (*(__u64 *) &clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, ts);
}
void read_boot_clock64(struct timespec64 *ts)
{
- __u64 clock;
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ __u64 delta;
- clock = sched_clock_base_cc - initial_leap_seconds;
- tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ memcpy(clk, tod_clock_base, 16);
+ *(__u64 *) &clk[1] -= delta;
+ if (*(__u64 *) &clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, ts);
}
static u64 read_tod_clock(struct clocksource *cs)
@@ -335,7 +357,7 @@ static unsigned long clock_sync_flags;
* source. If the clock mode is local it will return -EOPNOTSUPP and
* -EAGAIN if the clock is not in sync with the external reference.
*/
-int get_phys_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long *clock)
{
atomic_t *sw_ptr;
unsigned int sw0, sw1;
@@ -406,7 +428,10 @@ static void clock_sync_global(unsigned long long delta)
struct ptff_qto qto;
/* Fixup the monotonic sched clock. */
- sched_clock_base_cc += delta;
+ *(unsigned long long *) &tod_clock_base[1] += delta;
+ if (*(unsigned long long *) &tod_clock_base[1] < delta)
+ /* Epoch overflow */
+ tod_clock_base[0]++;
/* Adjust TOD steering parameters. */
vdso_data->tb_update_count++;
now = get_tod_clock();
@@ -437,7 +462,7 @@ static void clock_sync_global(unsigned long long delta)
static void clock_sync_local(unsigned long long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != -1ULL) {
+ if (S390_lowcore.clock_comparator != clock_comparator_max) {
S390_lowcore.clock_comparator += delta;
set_clock_comparator(S390_lowcore.clock_comparator);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index b89d19f6f2ab..eacda05b45d7 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -157,6 +157,8 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore)
page_frame = get_zeroed_page(GFP_KERNEL);
if (!segment_table || !page_table || !page_frame)
goto out;
+ arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
+ arch_set_page_dat(virt_to_page(page_table), 0);
/* Initialize per-cpu vdso data page */
vd = (struct vdso_per_cpu_data *) page_frame;
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index 8f048c2d6d13..263a7f9eee1e 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -2,6 +2,8 @@
* This is the infamous ld script for the 32 bits vdso
* library
*/
+
+#include <asm/page.h>
#include <asm/vdso.h>
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
@@ -91,7 +93,7 @@ SECTIONS
.debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
PROVIDE(_vdso_data = .);
/DISCARD/ : {
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index f35455d497fe..9e3dbbcc1cfc 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -2,6 +2,8 @@
* This is the infamous ld script for the 64 bits vdso
* library
*/
+
+#include <asm/page.h>
#include <asm/vdso.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -91,7 +93,7 @@ SECTIONS
.debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
- . = ALIGN(4096);
+ . = ALIGN(PAGE_SIZE);
PROVIDE(_vdso_data = .);
/DISCARD/ : {
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index ce865bd4f81d..d93a2c0474bf 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -27,7 +27,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
unsigned long prefix = kvm_s390_get_prefix(vcpu);
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
- end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+ end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
vcpu->stat.diagnose_10++;
if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
@@ -51,9 +51,9 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
*/
gmap_discard(vcpu->arch.gmap, start, prefix);
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, 4096);
- if (end > prefix + 4096)
- gmap_discard(vcpu->arch.gmap, 4096, 8192);
+ gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ if (end > prefix + PAGE_SIZE)
+ gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
}
return 0;
@@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
vcpu->stat.diagnose_44++;
- kvm_vcpu_on_spin(vcpu);
+ kvm_vcpu_on_spin(vcpu, true);
return 0;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 653cae5e1ee1..3cc77391a102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -629,7 +629,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * 4096;
+ ptr = asce.origin * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -674,7 +674,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_REGION_SECOND_TRANS;
if (edat1)
dat_protection |= rfte.p;
- ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+ ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION2: {
@@ -692,7 +692,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_REGION_THIRD_TRANS;
if (edat1)
dat_protection |= rste.p;
- ptr = rste.rto * 4096 + vaddr.rtx * 8;
+ ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION3: {
@@ -720,7 +720,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_SEGMENT_TRANSLATION;
if (edat1)
dat_protection |= rtte.fc0.p;
- ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+ ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
/* fallthrough */
case ASCE_TYPE_SEGMENT: {
@@ -743,7 +743,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
goto absolute_address;
}
dat_protection |= ste.fc0.p;
- ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
}
}
if (kvm_is_error_gpa(vcpu->kvm, ptr))
@@ -993,7 +993,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * 4096;
+ ptr = asce.origin * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
@@ -1029,7 +1029,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
union region1_table_entry rfte;
if (*fake) {
- ptr += (unsigned long) vaddr.rfx << 53;
+ ptr += vaddr.rfx * _REGION1_SIZE;
rfte.val = ptr;
goto shadow_r2t;
}
@@ -1044,7 +1044,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
return PGM_REGION_SECOND_TRANS;
if (sg->edat_level >= 1)
*dat_protection |= rfte.p;
- ptr = rfte.rto << 12UL;
+ ptr = rfte.rto * PAGE_SIZE;
shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
@@ -1055,7 +1055,7 @@ shadow_r2t:
union region2_table_entry rste;
if (*fake) {
- ptr += (unsigned long) vaddr.rsx << 42;
+ ptr += vaddr.rsx * _REGION2_SIZE;
rste.val = ptr;
goto shadow_r3t;
}
@@ -1070,7 +1070,7 @@ shadow_r2t:
return PGM_REGION_THIRD_TRANS;
if (sg->edat_level >= 1)
*dat_protection |= rste.p;
- ptr = rste.rto << 12UL;
+ ptr = rste.rto * PAGE_SIZE;
shadow_r3t:
rste.p |= *dat_protection;
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
@@ -1082,7 +1082,7 @@ shadow_r3t:
union region3_table_entry rtte;
if (*fake) {
- ptr += (unsigned long) vaddr.rtx << 31;
+ ptr += vaddr.rtx * _REGION3_SIZE;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1098,7 +1098,7 @@ shadow_r3t:
if (rtte.fc && sg->edat_level >= 2) {
*dat_protection |= rtte.fc0.p;
*fake = 1;
- ptr = rtte.fc1.rfaa << 31UL;
+ ptr = rtte.fc1.rfaa * _REGION3_SIZE;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1106,7 +1106,7 @@ shadow_r3t:
return PGM_SEGMENT_TRANSLATION;
if (sg->edat_level >= 1)
*dat_protection |= rtte.fc0.p;
- ptr = rtte.fc0.sto << 12UL;
+ ptr = rtte.fc0.sto * PAGE_SIZE;
shadow_sgt:
rtte.fc0.p |= *dat_protection;
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
@@ -1118,7 +1118,7 @@ shadow_sgt:
union segment_table_entry ste;
if (*fake) {
- ptr += (unsigned long) vaddr.sx << 20;
+ ptr += vaddr.sx * _SEGMENT_SIZE;
ste.val = ptr;
goto shadow_pgt;
}
@@ -1134,11 +1134,11 @@ shadow_sgt:
*dat_protection |= ste.fc0.p;
if (ste.fc && sg->edat_level >= 1) {
*fake = 1;
- ptr = ste.fc1.sfaa << 20UL;
+ ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
ste.val = ptr;
goto shadow_pgt;
}
- ptr = ste.fc0.pto << 11UL;
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2);
shadow_pgt:
ste.fc0.p |= *dat_protection;
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
@@ -1187,8 +1187,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
vaddr.addr = saddr;
if (fake) {
- /* offset in 1MB guest memory block */
- pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
+ pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
if (!rc)
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index c2e0ddc1356e..bcbd86621d01 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b)
return (addr >= a) && (addr <= b);
else
/* "overflowing" interval */
- return (addr <= a) && (addr >= b);
+ return (addr >= a) || (addr <= b);
}
#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a619ddae610d..a832ad031cee 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
struct kvm_s390_mchk_info *mchk;
union mci mci;
__u64 cr14 = 0; /* upper bits are not used */
+ int rc;
mci.val = mcck_info->mcic;
if (mci.sr)
@@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
if (mci.ck) {
/* Inject the floating machine check */
inti.type = KVM_S390_MCHK;
- WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+ rc = __inject_vm(vcpu->kvm, &inti);
} else {
/* Inject the machine check to specified vcpu */
irq.type = KVM_S390_MCHK;
- WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+ rc = kvm_s390_inject_vcpu(vcpu, &irq);
}
+ WARN_ON_ONCE(rc);
}
int kvm_set_routing_entry(struct kvm *kvm,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index af09d3437631..40d0a1a97889 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+struct kvm_s390_tod_clock_ext {
+ __u8 epoch_idx;
+ __u64 tod;
+ __u8 reserved[7];
+} __packed;
+
/* allow nested virtualization in KVM (if enabled by user space) */
static int nested;
module_param(nested, int, S_IRUGO);
@@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
return 0;
}
+static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_tod_clock gtod;
+
+ if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+ return -EFAULT;
+
+ if (test_kvm_facility(kvm, 139))
+ kvm_s390_set_tod_clock_ext(kvm, &gtod);
+ else if (gtod.epoch_idx == 0)
+ kvm_s390_set_tod_clock(kvm, gtod.tod);
+ else
+ return -EINVAL;
+
+ VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
+ gtod.epoch_idx, gtod.tod);
+
+ return 0;
+}
+
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high;
@@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
return -EINVAL;
switch (attr->attr) {
+ case KVM_S390_VM_TOD_EXT:
+ ret = kvm_s390_set_tod_ext(kvm, attr);
+ break;
case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_set_tod_high(kvm, attr);
break;
@@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
return ret;
}
+static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
+ struct kvm_s390_vm_tod_clock *gtod)
+{
+ struct kvm_s390_tod_clock_ext htod;
+
+ preempt_disable();
+
+ get_tod_clock_ext((char *)&htod);
+
+ gtod->tod = htod.tod + kvm->arch.epoch;
+ gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
+
+ if (gtod->tod < htod.tod)
+ gtod->epoch_idx += 1;
+
+ preempt_enable();
+}
+
+static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_tod_clock gtod;
+
+ memset(&gtod, 0, sizeof(gtod));
+
+ if (test_kvm_facility(kvm, 139))
+ kvm_s390_get_tod_clock_ext(kvm, &gtod);
+ else
+ gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
+
+ if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+ return -EFAULT;
+
+ VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
+ gtod.epoch_idx, gtod.tod);
+ return 0;
+}
+
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high = 0;
@@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
return -EINVAL;
switch (attr->attr) {
+ case KVM_S390_VM_TOD_EXT:
+ ret = kvm_s390_get_tod_ext(kvm, attr);
+ break;
case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_get_tod_high(kvm, attr);
break;
@@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
if (r < 0)
pgstev = 0;
/* save the value */
- res[i++] = (pgstev >> 24) & 0x3;
+ res[i++] = (pgstev >> 24) & 0x43;
/*
* if the next bit is too far away, stop.
* if we reached the previous "next", find the next one
@@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
pgstev = bits[i];
pgstev = pgstev << 24;
- mask &= _PGSTE_GPS_USAGE_MASK;
+ mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
set_pgste_bits(kvm->mm, hva, mask, pgstev);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
+ /* we are always in czam mode - even on pre z14 machines */
+ set_kvm_facility(kvm->arch.model.fac_mask, 138);
+ set_kvm_facility(kvm->arch.model.fac_list, 138);
+ /* we emulate STHYI in kvm */
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74);
+ if (MACHINE_HAS_TLB_GUEST) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 147);
+ set_kvm_facility(kvm->arch.model.fac_list, 147);
+ }
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
@@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= ECA_VX;
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
}
+ if (test_kvm_facility(vcpu->kvm, 139))
+ vcpu->arch.sie_block->ecd |= ECD_MEF;
+
vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
| SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
@@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
+}
+
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
@@ -2855,6 +2940,35 @@ retry:
return 0;
}
+void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
+ const struct kvm_s390_vm_tod_clock *gtod)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_s390_tod_clock_ext htod;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ preempt_disable();
+
+ get_tod_clock_ext((char *)&htod);
+
+ kvm->arch.epoch = gtod->tod - htod.tod;
+ kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+
+ if (kvm->arch.epoch > gtod->tod)
+ kvm->arch.epdx -= 1;
+
+ kvm_s390_vcpu_block_all(kvm);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+ vcpu->arch.sie_block->epdx = kvm->arch.epdx;
+ }
+
+ kvm_s390_vcpu_unblock_all(kvm);
+ preempt_enable();
+ mutex_unlock(&kvm->lock);
+}
+
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
{
struct kvm_vcpu *vcpu;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6fedc8bc7a37..9f8fdd7b2311 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
int handle_sthyi(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
+void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
+ const struct kvm_s390_vm_tod_clock *gtod);
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8a1dac793d6b..c954ac49eee4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -329,7 +329,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
start = kvm_s390_logical_to_effective(vcpu, start);
if (m3 & SSKE_MB) {
/* start already designates an absolute address */
- end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
} else {
start = kvm_s390_real_to_abs(vcpu, start);
end = start + PAGE_SIZE;
@@ -893,10 +893,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
case 0x00000000:
/* only 4k frames specify a real address */
start = kvm_s390_real_to_abs(vcpu, start);
- end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+ end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
break;
case 0x00001000:
- end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
break;
case 0x00002000:
/* only support 2G frame size if EDAT2 is available and we are
@@ -904,7 +904,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (!test_kvm_facility(vcpu->kvm, 78) ||
psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+ end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
break;
default:
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
if (pgstev & _PGSTE_GPS_ZERO)
res |= 1;
}
+ if (pgstev & _PGSTE_GPS_NODAT)
+ res |= 0x20;
vcpu->run->s.regs.gprs[r1] = res;
/*
* It is possible that all the normal 511 slots were full, in which case
@@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
/* Check for invalid operation request code */
orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
- if (orc > ESSA_MAX)
+ /* ORCs 0-6 are always valid */
+ if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT
+ : ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (likely(!vcpu->kvm->arch.migration_state)) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1a252f537081..9d592ef4104b 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
return rc;
}
-static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
+ u64 *status_reg)
{
- int rc;
unsigned int i;
struct kvm_vcpu *v;
+ bool all_stopped = true;
- switch (parameter & 0xff) {
- case 0:
- rc = SIGP_CC_NOT_OPERATIONAL;
- break;
- case 1:
- case 2:
- kvm_for_each_vcpu(i, v, vcpu->kvm) {
- v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
- kvm_clear_async_pf_completion_queue(v);
- }
-
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- break;
- default:
- rc = -EOPNOTSUPP;
+ kvm_for_each_vcpu(i, v, vcpu->kvm) {
+ if (v == vcpu)
+ continue;
+ if (!is_vcpu_stopped(v))
+ all_stopped = false;
}
- return rc;
+
+ *status_reg &= 0xffffffff00000000UL;
+
+ /* Reject set arch order, with czam we're always in z/Arch mode. */
+ *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
+ SIGP_STATUS_INCORRECT_STATE);
+ return SIGP_CC_STATUS_STORED;
}
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
@@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
switch (order_code) {
case SIGP_SET_ARCHITECTURE:
vcpu->stat.instruction_sigp_arch++;
- rc = __sigp_set_arch(vcpu, parameter);
+ rc = __sigp_set_arch(vcpu, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
break;
default:
rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 926b5244263e..395926b8c1ed 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -394,7 +394,7 @@ static int sthyi(u64 vaddr)
"srl %[cc],28\n"
: [cc] "=d" (cc)
: [code] "d" (code), [addr] "a" (addr)
- : "memory", "cc");
+ : "3", "memory", "cc");
return cc;
}
@@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
trace_kvm_s390_handle_sthyi(vcpu, code, addr);
- if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
+ if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (code & 0xffff) {
@@ -433,13 +433,8 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
goto out;
}
- /*
- * If the page has not yet been faulted in, we want to do that
- * now and not after all the expensive calculations.
- */
- r = write_guest(vcpu, addr, reg2, &cc, 1);
- if (r)
- return kvm_s390_inject_prog_cond(vcpu, r);
+ if (addr & ~PAGE_MASK)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
if (!sctns)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 715c19c45d9a..b18b5652e5c5 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->eca |= scb_o->eca & ECA_IB;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
scb_s->eca |= scb_o->eca & ECA_CEI;
+ /* Epoch Extension */
+ if (test_kvm_facility(vcpu->kvm, 139))
+ scb_s->ecd |= scb_o->ecd & ECD_MEF;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
@@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
- struct mcck_volatile_info *mcck_info;
- struct sie_page *sie_page;
int rc;
handle_last_fault(vcpu, vsie_page);
@@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc == -EINTR) {
VCPU_EVENT(vcpu, 3, "%s", "machine check");
- sie_page = container_of(scb_s, struct sie_page, sie_block);
- mcck_info = &sie_page->mcck_info;
- kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
return 0;
}
@@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
*/
preempt_disable();
scb_s->epoch += vcpu->kvm->arch.epoch;
+
+ if (scb_s->ecd & ECD_MEF) {
+ scb_s->epdx += vcpu->kvm->arch.epdx;
+ if (scb_s->epoch < vcpu->kvm->arch.epoch)
+ scb_s->epdx += 1;
+ }
+
preempt_enable();
}
@@ -1069,7 +1075,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
+ BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
/* 512 byte alignment */
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 92e90e40b6fb..7f17555ad4d5 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -57,7 +57,7 @@ static void __udelay_enabled(unsigned long long usecs)
end = get_tod_clock_fast() + (usecs << 12);
do {
clock_saved = 0;
- if (end < S390_lowcore.clock_comparator) {
+ if (tod_after(S390_lowcore.clock_comparator, end)) {
clock_saved = local_tick_disable();
set_clock_comparator(end);
}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ffb15bd4c593..b12663d653d8 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -32,42 +32,63 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+static inline int arch_load_niai4(int *lock)
+{
+ int owner;
+
+ asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0040\n" /* NIAI 4 */
+#endif
+ " l %0,%1\n"
+ : "=d" (owner) : "Q" (*lock) : "memory");
+ return owner;
+}
+
+static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+{
+ int expected = old;
+
+ asm volatile(
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ " .long 0xb2fa0080\n" /* NIAI 8 */
+#endif
+ " cs %0,%3,%1\n"
+ : "=d" (old), "=Q" (*lock)
+ : "0" (old), "d" (new), "Q" (*lock)
+ : "cc", "memory");
+ return expected == old;
+}
+
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
int cpu = SPINLOCK_LOCKVAL;
- int owner, count, first_diag;
+ int owner, count;
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_load_niai4(&lp->lock);
+ if (owner && arch_vcpu_is_preempted(~owner))
+ smp_yield_cpu(~owner);
- first_diag = 1;
+ count = spin_retry;
while (1) {
- owner = ACCESS_ONCE(lp->lock);
+ owner = arch_load_niai4(&lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
- if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
return;
continue;
}
- /* First iteration: check if the lock owner is running. */
- if (first_diag && arch_vcpu_is_preempted(~owner)) {
- smp_yield_cpu(~owner);
- first_diag = 0;
+ if (count-- >= 0)
continue;
- }
- /* Loop for a while on the lock value. */
count = spin_retry;
- do {
- owner = ACCESS_ONCE(lp->lock);
- } while (owner && count-- > 0);
- if (!owner)
- continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
* yield the CPU unconditionally. For LPAR rely on the
* sense running status.
*/
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
smp_yield_cpu(~owner);
- first_diag = 0;
- }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -75,42 +96,36 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
{
int cpu = SPINLOCK_LOCKVAL;
- int owner, count, first_diag;
+ int owner, count;
local_irq_restore(flags);
- first_diag = 1;
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_load_niai4(&lp->lock);
+ if (owner && arch_vcpu_is_preempted(~owner))
+ smp_yield_cpu(~owner);
+
+ count = spin_retry;
while (1) {
- owner = ACCESS_ONCE(lp->lock);
+ owner = arch_load_niai4(&lp->lock);
/* Try to get the lock if it is free. */
if (!owner) {
local_irq_disable();
- if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
return;
local_irq_restore(flags);
continue;
}
- /* Check if the lock owner is running. */
- if (first_diag && arch_vcpu_is_preempted(~owner)) {
- smp_yield_cpu(~owner);
- first_diag = 0;
+ if (count-- >= 0)
continue;
- }
- /* Loop for a while on the lock value. */
count = spin_retry;
- do {
- owner = ACCESS_ONCE(lp->lock);
- } while (owner && count-- > 0);
- if (!owner)
- continue;
/*
* For multiple layers of hypervisors, e.g. z/VM + LPAR
* yield the CPU unconditionally. For LPAR rely on the
* sense running status.
*/
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) {
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
smp_yield_cpu(~owner);
- first_diag = 0;
- }
}
}
EXPORT_SYMBOL(arch_spin_lock_wait_flags);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index b3bd3f23b8e8..4ea9106417ee 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,8 +15,30 @@
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
static DEFINE_STATIC_KEY_FALSE(have_mvcos);
+static int __init uaccess_init(void)
+{
+ if (test_facility(27))
+ static_branch_enable(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
+
+static inline int copy_with_mvcos(void)
+{
+ if (static_branch_likely(&have_mvcos))
+ return 1;
+ return 0;
+}
+#else
+static inline int copy_with_mvcos(void)
+{
+ return 1;
+}
+#endif
+
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
unsigned long size)
{
@@ -84,7 +106,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_from_user_mvcos(to, from, n);
return copy_from_user_mvcp(to, from, n);
}
@@ -157,7 +179,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_to_user_mvcos(to, from, n);
return copy_to_user_mvcs(to, from, n);
}
@@ -220,7 +242,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return copy_in_user_mvcos(to, from, n);
return copy_in_user_mvc(to, from, n);
}
@@ -292,7 +314,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
unsigned long __clear_user(void __user *to, unsigned long size)
{
- if (static_branch_likely(&have_mvcos))
+ if (copy_with_mvcos())
return clear_user_mvcos(to, size);
return clear_user_xc(to, size);
}
@@ -349,11 +371,3 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
return done;
}
EXPORT_SYMBOL(__strncpy_from_user);
-
-static int __init uaccess_init(void)
-{
- if (test_facility(27))
- static_branch_enable(&have_mvcos);
- return 0;
-}
-early_initcall(uaccess_init);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14f25798b001..bdabb013537b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -135,7 +135,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table = table + ((address >> 53) & 0x7ff);
+ table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R1:%016lx ", *table);
@@ -144,7 +144,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION2:
- table = table + ((address >> 42) & 0x7ff);
+ table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R2:%016lx ", *table);
@@ -153,7 +153,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_REGION3:
- table = table + ((address >> 31) & 0x7ff);
+ table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("R3:%016lx ", *table);
@@ -162,7 +162,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* fallthrough */
case _ASCE_TYPE_SEGMENT:
- table = table + ((address >> 20) & 0x7ff);
+ table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("S:%016lx ", *table);
@@ -170,7 +170,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
goto out;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
}
- table = table + ((address >> 12) & 0xff);
+ table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
if (bad_address(table))
goto bad;
pr_cont("P:%016lx ", *table);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fb3d3cdb370..9e1494e3d849 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -36,16 +36,16 @@ static struct gmap *gmap_alloc(unsigned long limit)
unsigned long *table;
unsigned long etype, atype;
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
+ if (limit < _REGION3_SIZE) {
+ limit = _REGION3_SIZE - 1;
atype = _ASCE_TYPE_SEGMENT;
etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
+ } else if (limit < _REGION2_SIZE) {
+ limit = _REGION2_SIZE - 1;
atype = _ASCE_TYPE_REGION3;
etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
+ } else if (limit < _REGION1_SIZE) {
+ limit = _REGION1_SIZE - 1;
atype = _ASCE_TYPE_REGION2;
etype = _REGION2_ENTRY_EMPTY;
} else {
@@ -65,7 +65,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
spin_lock_init(&gmap->guest_table_lock);
spin_lock_init(&gmap->shadow_lock);
atomic_set(&gmap->ref_count, 1);
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
goto out_free;
page->index = 0;
@@ -186,7 +186,7 @@ static void gmap_free(struct gmap *gmap)
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
@@ -306,7 +306,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -321,7 +321,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
}
spin_unlock(&gmap->guest_table_lock);
if (page)
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return 0;
}
@@ -546,30 +546,30 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
/* Create higher level tables in the gmap page table */
table = gmap->table;
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
+ gaddr & _REGION1_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
+ gaddr & _REGION2_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if ((*table & _REGION_ENTRY_INVALID) &&
gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
+ gaddr & _REGION3_MASK))
return -ENOMEM;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
}
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
/* Walk the parent mm page table */
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
@@ -771,7 +771,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
- table += (gaddr >> 53) & 0x7ff;
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if (level == 4)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -779,7 +779,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION2:
- table += (gaddr >> 42) & 0x7ff;
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -787,7 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_REGION3:
- table += (gaddr >> 31) & 0x7ff;
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
break;
if (*table & _REGION_ENTRY_INVALID)
@@ -795,13 +795,13 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
/* Fallthrough */
case _ASCE_TYPE_SEGMENT:
- table += (gaddr >> 20) & 0x7ff;
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
break;
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
- table += (gaddr >> 12) & 0xff;
+ table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
}
return table;
}
@@ -1126,7 +1126,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
if (!table || *table & _PAGE_INVALID)
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 12) - 1);
+ gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
}
@@ -1144,7 +1144,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
int i;
BUG_ON(!gmap_is_shadow(sg));
- for (i = 0; i < 256; i++, raddr += 1UL << 12)
+ for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
pgt[i] = _PAGE_INVALID;
}
@@ -1164,8 +1164,8 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
- sto = (unsigned long) (ste - ((raddr >> 20) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+ sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
*ste = _SEGMENT_ENTRY_EMPTY;
@@ -1193,7 +1193,7 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT;
- for (i = 0; i < 2048; i++, raddr += 1UL << 20) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
continue;
pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
@@ -1222,8 +1222,8 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 31) - 1);
- r3o = (unsigned long) (r3e - ((raddr >> 31) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+ r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
*r3e = _REGION3_ENTRY_EMPTY;
@@ -1231,7 +1231,7 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1251,7 +1251,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r3t | _ASCE_TYPE_REGION3;
- for (i = 0; i < 2048; i++, raddr += 1UL << 31) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
continue;
sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
@@ -1260,7 +1260,7 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
/* Free segment table */
page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1280,8 +1280,8 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 42) - 1);
- r2o = (unsigned long) (r2e - ((raddr >> 42) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+ r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
*r2e = _REGION2_ENTRY_EMPTY;
@@ -1289,7 +1289,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1309,7 +1309,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r2t | _ASCE_TYPE_REGION2;
- for (i = 0; i < 2048; i++, raddr += 1UL << 42) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
continue;
r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
@@ -1318,7 +1318,7 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
/* Free region 3 table */
page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1338,8 +1338,8 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
return;
- gmap_call_notifier(sg, raddr, raddr + (1UL << 53) - 1);
- r1o = (unsigned long) (r1e - ((raddr >> 53) & 0x7ff));
+ gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+ r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
*r1e = _REGION1_ENTRY_EMPTY;
@@ -1347,7 +1347,7 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
/**
@@ -1367,7 +1367,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
BUG_ON(!gmap_is_shadow(sg));
asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
- for (i = 0; i < 2048; i++, raddr += 1UL << 53) {
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
continue;
r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
@@ -1378,7 +1378,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
/* Free region 2 table */
page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
list_del(&page->lru);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1535,7 +1535,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
/* protect after insertion, so it will get properly invalidated */
down_read(&parent->mm->mmap_sem);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * 4096,
+ ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
PROT_READ, PGSTE_VSIE_BIT);
up_read(&parent->mm->mmap_sem);
spin_lock(&parent->shadow_lock);
@@ -1578,7 +1578,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1614,10 +1614,10 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r2t read-only in parent gmap page table */
- raddr = (saddr & 0xffe0000000000000UL) | _SHADOW_RMAP_REGION1;
+ raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
origin = r2t & _REGION_ENTRY_ORIGIN;
- offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1634,7 +1634,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
@@ -1662,7 +1662,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
BUG_ON(!gmap_is_shadow(sg));
/* Allocate a shadow region second table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1697,10 +1697,10 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
}
spin_unlock(&sg->guest_table_lock);
/* Make r3t read-only in parent gmap page table */
- raddr = (saddr & 0xfffffc0000000000UL) | _SHADOW_RMAP_REGION2;
+ raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
origin = r3t & _REGION_ENTRY_ORIGIN;
- offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1717,7 +1717,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
@@ -1745,7 +1745,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
/* Allocate a shadow segment table */
- page = alloc_pages(GFP_KERNEL, 2);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -1781,10 +1781,10 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make sgt read-only in parent gmap page table */
- raddr = (saddr & 0xffffffff80000000UL) | _SHADOW_RMAP_REGION3;
+ raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
origin = sgt & _REGION_ENTRY_ORIGIN;
- offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * 4096;
- len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * 4096 - offset;
+ offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
spin_lock(&sg->guest_table_lock);
if (!rc) {
@@ -1801,7 +1801,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- __free_pages(page, 2);
+ __free_pages(page, CRST_ALLOC_ORDER);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
@@ -1902,7 +1902,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
}
spin_unlock(&sg->guest_table_lock);
/* Make pgt read-only in parent gmap page table (not the pgste) */
- raddr = (saddr & 0xfffffffffff00000UL) | _SHADOW_RMAP_SEGMENT;
+ raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
spin_lock(&sg->guest_table_lock);
@@ -2021,7 +2021,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
}
/* Check for top level table */
start = sg->orig_asce & _ASCE_ORIGIN;
- end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+ end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
gaddr < end) {
/* The complete shadow table has to go */
@@ -2032,7 +2032,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
return;
}
/* Remove the page table tree from on specific entry */
- head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> 12);
+ head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
gmap_for_each_rmap_safe(rmap, rnext, head) {
bits = rmap->raddr & _SHADOW_RMAP_MASK;
raddr = rmap->raddr ^ bits;
@@ -2076,7 +2076,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
+ offset = offset * (PAGE_SIZE / sizeof(pte_t));
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
@@ -2121,6 +2121,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2137,6 +2168,7 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
+ zap_zero_pages(mm);
up_write(&mm->mmap_sem);
return 0;
}
@@ -2149,13 +2181,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte)))
- ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8111694ce55a..3b567838b905 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -26,6 +26,7 @@
#include <linux/poison.h>
#include <linux/initrd.h>
#include <linux/export.h>
+#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <asm/processor.h>
@@ -84,7 +85,7 @@ void __init paging_init(void)
psw_t psw;
init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > (1UL << 42)) {
+ if (VMALLOC_END > _REGION2_SIZE) {
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION2_ENTRY_EMPTY;
} else {
@@ -93,8 +94,7 @@ void __init paging_init(void)
}
init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
S390_lowcore.kernel_asce = init_mm.context.asce;
- clear_table((unsigned long *) init_mm.pgd, pgd_type,
- sizeof(unsigned long)*2048);
+ crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
/* enable virtual mapping in kernel mode */
@@ -137,6 +137,8 @@ void __init mem_init(void)
free_all_bootmem();
setup_zero_pages(); /* Setup zeroed pages. */
+ cmma_init_nodat();
+
mem_init_print_info(NULL);
}
@@ -166,6 +168,58 @@ unsigned long memory_block_size_bytes(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+ struct s390_cma_mem_data *mem_data;
+ unsigned long start, end;
+
+ mem_data = data;
+ start = cma_get_base(cma);
+ end = start + cma_get_size(cma);
+ if (end < mem_data->start)
+ return 0;
+ if (start >= mem_data->end)
+ return 0;
+ return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct s390_cma_mem_data mem_data;
+ struct memory_notify *arg;
+ int rc = 0;
+
+ arg = data;
+ mem_data.start = arg->start_pfn << PAGE_SHIFT;
+ mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+ if (action == MEM_GOING_OFFLINE)
+ rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+ return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+ .notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+ return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 2e10d2b8ad35..5bea139517a2 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -119,7 +119,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit) {
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
@@ -183,7 +184,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
}
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit) {
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 69a7b01ae746..07fa7b8ae233 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -10,9 +10,10 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
-
+#include <asm/facility.h>
#include <asm/page-states.h>
static int cmma_flag = 1;
@@ -36,14 +37,16 @@ __setup("cmma=", cmma);
static inline int cmma_test_essa(void)
{
register unsigned long tmp asm("0") = 0;
- register int rc asm("1") = -EOPNOTSUPP;
+ register int rc asm("1");
+ /* test ESSA_GET_STATE */
asm volatile(
- " .insn rrf,0xb9ab0000,%1,%1,0,0\n"
+ " .insn rrf,0xb9ab0000,%1,%1,%2,0\n"
"0: la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+&d" (rc), "+&d" (tmp));
+ : "=&d" (rc), "+&d" (tmp)
+ : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
return rc;
}
@@ -51,11 +54,26 @@ void __init cmma_init(void)
{
if (!cmma_flag)
return;
- if (cmma_test_essa())
+ if (cmma_test_essa()) {
cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
}
-static inline void set_page_unstable(struct page *page, int order)
+static inline unsigned char get_page_state(struct page *page)
+{
+ unsigned char state;
+
+ asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (state)
+ : "a" (page_to_phys(page)),
+ "i" (ESSA_GET_STATE));
+ return state & 0x3f;
+}
+
+static inline void set_page_unused(struct page *page, int order)
{
int i, rc;
@@ -66,14 +84,18 @@ static inline void set_page_unstable(struct page *page, int order)
"i" (ESSA_SET_UNUSED));
}
-void arch_free_page(struct page *page, int order)
+static inline void set_page_stable_dat(struct page *page, int order)
{
- if (!cmma_flag)
- return;
- set_page_unstable(page, order);
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_STABLE));
}
-static inline void set_page_stable(struct page *page, int order)
+static inline void set_page_stable_nodat(struct page *page, int order)
{
int i, rc;
@@ -81,14 +103,154 @@ static inline void set_page_stable(struct page *page, int order)
asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
: "=&d" (rc)
: "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
+ "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd) || pmd_large(*pmd))
+ continue;
+ page = virt_to_page(pmd_val(*pmd));
+ set_bit(PG_arch_1, &page->flags);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pud_t *pud;
+ int i;
+
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud) || pud_large(*pud))
+ continue;
+ if (!pud_folded(*pud)) {
+ page = virt_to_page(pud_val(*pud));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pmd(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ p4d_t *p4d;
+ int i;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d))
+ continue;
+ if (!p4d_folded(*p4d)) {
+ page = virt_to_page(p4d_val(*p4d));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pud(p4d, addr, next);
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+ unsigned long addr, next;
+ struct page *page;
+ pgd_t *pgd;
+ int i;
+
+ addr = 0;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, MODULES_END);
+ if (pgd_none(*pgd))
+ continue;
+ if (!pgd_folded(*pgd)) {
+ page = virt_to_page(pgd_val(*pgd));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_p4d(pgd, addr, next);
+ } while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+ struct memblock_region *reg;
+ struct page *page;
+ unsigned long start, end, ix;
+
+ if (cmma_flag < 2)
+ return;
+ /* Mark pages used in kernel page tables */
+ mark_kernel_pgd();
+
+ /* Set all kernel pages not used for page tables to stable/no-dat */
+ for_each_memblock(memory, reg) {
+ start = memblock_region_memory_base_pfn(reg);
+ end = memblock_region_memory_end_pfn(reg);
+ page = pfn_to_page(start);
+ for (ix = start; ix < end; ix++, page++) {
+ if (__test_and_clear_bit(PG_arch_1, &page->flags))
+ continue; /* skip page table pages */
+ if (!list_empty(&page->lru))
+ continue; /* skip free pages */
+ set_page_stable_nodat(page, 0);
+ }
+ }
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_unused(page, order);
}
void arch_alloc_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_stable(page, order);
+ if (cmma_flag < 2)
+ set_page_stable_dat(page, order);
+ else
+ set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_stable_dat(page, order);
+}
+
+void arch_set_page_nodat(struct page *page, int order)
+{
+ if (cmma_flag < 2)
+ return;
+ set_page_stable_nodat(page, order);
+}
+
+int arch_test_page_nodat(struct page *page)
+{
+ unsigned char state;
+
+ if (cmma_flag < 2)
+ return 0;
+ state = get_page_state(page);
+ return !!(state & 0x20);
}
void arch_set_page_states(int make_stable)
@@ -108,9 +270,9 @@ void arch_set_page_states(int make_stable)
list_for_each(l, &zone->free_area[order].free_list[t]) {
page = list_entry(l, struct page, lru);
if (make_stable)
- set_page_stable(page, order);
+ set_page_stable_dat(page, 0);
else
- set_page_unstable(page, order);
+ set_page_unused(page, order);
}
}
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 180481589246..552f898dfa74 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,6 +7,7 @@
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -191,7 +192,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
pud_t new;
int i, ro, nx;
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
return -ENOMEM;
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
@@ -328,7 +329,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
return;
}
for (i = 0; i < nr; i++) {
- __ptep_ipte(address, pte, IPTE_GLOBAL);
+ __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 18918e394ce4..c5b74dd61197 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -57,6 +57,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
if (!page)
return NULL;
+ arch_set_page_dat(page, 2);
return (unsigned long *) page_to_phys(page);
}
@@ -82,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
int rc, notify;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- BUG_ON(mm->context.asce_limit < (1UL << 42));
+ BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
if (end >= TASK_SIZE_MAX)
return -ENOMEM;
rc = 0;
@@ -95,11 +96,11 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
}
spin_lock_bh(&mm->page_table_lock);
pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit == (1UL << 42)) {
+ if (mm->context.asce_limit == _REGION2_SIZE) {
crst_table_init(table, _REGION2_ENTRY_EMPTY);
p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
} else {
@@ -123,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd_t *pgd;
/* downgrade should only happen from 3 to 2 levels (compat only) */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
+ BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
if (current->active_mm == mm) {
clear_user_asce();
@@ -132,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd = mm->pgd;
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_limit = _REGION3_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
crst_table_free(mm, (unsigned long *) pgd);
@@ -214,6 +215,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
__free_page(page);
return NULL;
}
+ arch_set_page_dat(page, 0);
/* Initialize page table */
table = (unsigned long *) page_to_phys(page);
if (mm_alloc_pgste(mm)) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4a1f7366b17a..ae677f814bc0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,8 +25,49 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+ }
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ }
+}
+
static inline pte_t ptep_flush_direct(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -36,15 +77,16 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte(addr, ptep, IPTE_LOCAL);
+ ptep_ipte_local(mm, addr, ptep, nodat);
else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ int nodat)
{
pte_t old;
@@ -57,7 +99,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
} else
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ ptep_ipte_global(mm, addr, ptep, nodat);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -229,10 +271,12 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_direct(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_direct(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -244,10 +288,12 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
preempt_enable();
return old;
@@ -259,10 +305,12 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
{
pgste_t pgste;
pte_t old;
+ int nodat;
preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
- old = ptep_flush_lazy(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(old, pgste, mm);
pgste_set(ptep, pgste);
@@ -290,6 +338,28 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_modify_prot_commit);
+static inline void pmdp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+}
+
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
@@ -298,16 +368,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte(addr, pmdp, IDTE_LOCAL);
+ pmdp_idte_local(mm, addr, pmdp);
else
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
+ pmdp_idte_global(mm, addr, pmdp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -325,10 +391,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(addr, pmdp, IDTE_GLOBAL);
- else
- __pmdp_csp(pmdp);
+ } else {
+ pmdp_idte_global(mm, addr, pmdp);
+ }
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -359,28 +424,46 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(pmdp_xchg_lazy);
-static inline pud_t pudp_flush_direct(struct mm_struct *mm,
- unsigned long addr, pud_t *pudp)
+static inline void pudp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
{
- pud_t old;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
- old = *pudp;
- if (pud_val(old) & _REGION_ENTRY_INVALID)
- return old;
- if (!MACHINE_HAS_IDTE) {
+static inline void pudp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+ else
/*
* Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here
*/
__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old;
+
+ old = *pudp;
+ if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
- }
atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pudp_idte(addr, pudp, IDTE_LOCAL);
+ pudp_idte_local(mm, addr, pudp);
else
- __pudp_idte(addr, pudp, IDTE_GLOBAL);
+ pudp_idte_global(mm, addr, pudp);
atomic_dec(&mm->context.flush_count);
return old;
}
@@ -482,7 +565,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
{
pte_t entry;
pgste_t pgste;
- int pte_i, pte_p;
+ int pte_i, pte_p, nodat;
pgste = pgste_get_lock(ptep);
entry = *ptep;
@@ -495,13 +578,14 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
return -EAGAIN;
}
/* Change access rights and set pgste bit */
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
if (prot == PROT_NONE && !pte_i) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pgste = pgste_update_all(entry, pgste, mm);
pte_val(entry) |= _PAGE_INVALID;
}
if (prot == PROT_READ && !pte_p) {
- ptep_flush_direct(mm, addr, ptep);
+ ptep_flush_direct(mm, addr, ptep, nodat);
pte_val(entry) &= ~_PAGE_INVALID;
pte_val(entry) |= _PAGE_PROTECT;
}
@@ -541,10 +625,12 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
{
pgste_t pgste;
+ int nodat;
pgste = pgste_get_lock(ptep);
/* notifier is called by the caller */
- ptep_flush_direct(mm, saddr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_flush_direct(mm, saddr, ptep, nodat);
/* don't touch the storage key - it belongs to parent pgste */
pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
pgste_set_unlock(ptep, pgste);
@@ -617,6 +703,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte_t *ptep;
pte_t pte;
bool dirty;
+ int nodat;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
@@ -645,7 +732,8 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep, IPTE_GLOBAL);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_ipte_global(mm, addr, ptep, nodat);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else
@@ -831,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
case ESSA_GET_STATE:
break;
case ESSA_SET_STABLE:
- pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgstev |= _PGSTE_GPS_USAGE_STABLE;
break;
case ESSA_SET_UNUSED:
@@ -877,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
pgstev |= _PGSTE_GPS_USAGE_STABLE;
}
break;
+ case ESSA_SET_STABLE_NODAT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
+ break;
default:
/* we should never get here! */
break;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d8398962a723..c0af0d7b6e5f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,37 +38,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
-static inline p4d_t *vmem_p4d_alloc(void)
+void *vmem_crst_alloc(unsigned long val)
{
- p4d_t *p4d = NULL;
+ unsigned long *table;
- p4d = vmem_alloc_pages(2);
- if (!p4d)
- return NULL;
- clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
- return p4d;
-}
-
-static inline pud_t *vmem_pud_alloc(void)
-{
- pud_t *pud = NULL;
-
- pud = vmem_alloc_pages(2);
- if (!pud)
- return NULL;
- clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pud;
-}
-
-pmd_t *vmem_pmd_alloc(void)
-{
- pmd_t *pmd = NULL;
-
- pmd = vmem_alloc_pages(2);
- if (!pmd)
- return NULL;
- clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
- return pmd;
+ table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
}
pte_t __ref *vmem_pte_alloc(void)
@@ -114,14 +91,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
}
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -136,7 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
continue;
}
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);
@@ -253,7 +230,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
- p4_dir = vmem_p4d_alloc();
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!p4_dir)
goto out;
pgd_populate(&init_mm, pg_dir, p4_dir);
@@ -261,7 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
- pu_dir = vmem_pud_alloc();
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!pu_dir)
goto out;
p4d_populate(&init_mm, p4_dir, pu_dir);
@@ -269,7 +246,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
- pm_dir = vmem_pmd_alloc();
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
goto out;
pud_populate(&init_mm, pu_dir, pm_dir);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 01c6fbc3e85b..8ec88497a28d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1093,15 +1093,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
mask = 0x2000; /* jh */
goto branch_ks;
+ case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+ mask = 0x4000; /* jl */
+ goto branch_ks;
case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
mask = 0xa000; /* jhe */
goto branch_ks;
+ case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+ mask = 0xc000; /* jle */
+ goto branch_ks;
case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
mask = 0x2000; /* jh */
goto branch_ku;
+ case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+ mask = 0x4000; /* jl */
+ goto branch_ku;
case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
mask = 0xa000; /* jhe */
goto branch_ku;
+ case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+ mask = 0xc000; /* jle */
+ goto branch_ku;
case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
mask = 0x7000; /* jne */
goto branch_ku;
@@ -1119,15 +1131,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
mask = 0x2000; /* jh */
goto branch_xs;
+ case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+ mask = 0x4000; /* jl */
+ goto branch_xs;
case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
mask = 0xa000; /* jhe */
goto branch_xs;
+ case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+ mask = 0xc000; /* jle */
+ goto branch_xs;
case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
mask = 0x2000; /* jh */
goto branch_xu;
+ case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+ mask = 0x4000; /* jl */
+ goto branch_xu;
case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
mask = 0xa000; /* jhe */
goto branch_xu;
+ case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+ mask = 0xc000; /* jle */
+ goto branch_xu;
case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
mask = 0x7000; /* jne */
goto branch_xu;
@@ -1253,7 +1277,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
insn_count = bpf_jit_insn(jit, fp, i);
if (insn_count < 0)
return -1;
- jit->addrs[i + 1] = jit->prg; /* Next instruction address */
+ /* Next instruction address */
+ jit->addrs[i + insn_count] = jit->prg;
}
bpf_jit_epilogue(jit);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 7b30af5da222..ddb9923fb45d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -262,10 +262,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
return rc;
}
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size,
resource_size_t align)
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index bd534b4d40e3..0ae3936e266f 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,6 +24,14 @@
bool zpci_unique_uid;
+static void update_uid_checking(bool new)
+{
+ if (zpci_unique_uid != new)
+ zpci_dbg(1, "uid checking:%d\n", new);
+
+ zpci_unique_uid = new;
+}
+
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
struct {
@@ -319,7 +327,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
goto out;
}
- zpci_unique_uid = rrb->response.uid_checking;
+ update_uid_checking(rrb->response.uid_checking);
WARN_ON_ONCE(rrb->response.entry_size !=
sizeof(struct clp_fh_list_entry));
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 025ea20fc4b4..70dd8f17d054 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -41,7 +41,7 @@ static struct facility_def facility_defs[] = {
27, /* mvcos */
32, /* compare and swap and store */
33, /* compare and swap and store 2 */
- 34, /* general extension facility */
+ 34, /* general instructions extension */
35, /* execute extensions */
#endif
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = {
#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
53, /* load-and-zero-rightmost-byte, etc. */
#endif
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+ 58, /* miscellaneous-instruction-extension 2 */
+#endif
-1 /* END */
}
},
@@ -80,6 +83,7 @@ static struct facility_def facility_defs[] = {
78, /* enhanced-DAT 2 */
130, /* instruction-execution-protection */
131, /* enhanced-SOP 2 and side-effect */
+ 139, /* multiple epoch facility */
146, /* msa extension 8 */
-1 /* END */
}
diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig
index e5335123b5e9..72b72e50a92e 100644
--- a/arch/sh/configs/ap325rxa_defconfig
+++ b/arch/sh/configs/ap325rxa_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -28,14 +27,10 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -51,8 +46,6 @@ CONFIG_NETDEVICES=y
CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -82,7 +75,6 @@ CONFIG_FB=y
CONFIG_FB_SH_MOBILE_LCDC=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_MMC=y
CONFIG_MMC_SPI=y
@@ -110,8 +102,6 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig
index 6cb327977d13..4710df43a5b5 100644
--- a/arch/sh/configs/apsh4a3a_defconfig
+++ b/arch/sh/configs/apsh4a3a_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -28,15 +27,11 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -46,8 +41,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -66,7 +59,6 @@ CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
@@ -96,7 +88,6 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_FTRACE is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index fe45d2c9b151..825c641726c4 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -53,7 +52,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_NET_KEY=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -70,8 +68,6 @@ CONFIG_NETDEVICES=y
CONFIG_MDIO_BITBANG=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
@@ -83,7 +79,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
CONFIG_FB_SH7785FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
@@ -124,6 +119,5 @@ CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_VM=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_DWARF_UNWINDER=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig
index 67e150631ea5..5a90e24aa8a6 100644
--- a/arch/sh/configs/cayman_defconfig
+++ b/arch/sh/configs/cayman_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -19,7 +18,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
@@ -38,7 +36,6 @@ CONFIG_NET_ETHERNET=y
CONFIG_HW_RANDOM=y
CONFIG_I2C=m
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_MODE_HELPERS=y
@@ -67,5 +64,4 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_SCHEDSTATS=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig
index ec243ca29529..3f08dc54480b 100644
--- a/arch/sh/configs/dreamcast_defconfig
+++ b/arch/sh/configs/dreamcast_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
@@ -32,7 +31,6 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
@@ -43,8 +41,6 @@ CONFIG_NET_ETHERNET=y
CONFIG_NET_PCI=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_KEYBOARD_ATKBD is not set
CONFIG_KEYBOARD_MAPLE=y
# CONFIG_MOUSE_PS2 is not set
@@ -56,7 +52,6 @@ CONFIG_HW_RANDOM=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_SH_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_PVR2=y
@@ -74,5 +69,4 @@ CONFIG_LOGO=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/ecovec24-romimage_defconfig b/arch/sh/configs/ecovec24-romimage_defconfig
index 5fcb17bff24a..0c5dfccbfe37 100644
--- a/arch/sh/configs/ecovec24-romimage_defconfig
+++ b/arch/sh/configs/ecovec24-romimage_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -26,19 +25,15 @@ CONFIG_INET=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -51,7 +46,6 @@ CONFIG_I2C=y
CONFIG_I2C_SH_MOBILE=y
CONFIG_GPIO_SYSFS=y
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
@@ -64,4 +58,3 @@ CONFIG_TMPFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index 0b364e3b0ff8..3568310c2c2f 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -29,16 +28,12 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_IRDA=y
CONFIG_SH_SIR=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -53,8 +48,6 @@ CONFIG_NETDEVICES=y
CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -140,8 +133,6 @@ CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index 41fa3a7eed96..db756e099052 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -20,7 +20,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y
CONFIG_SH_EDOSK7705=y
CONFIG_SH_PCLK_FREQ=31250000
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_MISC_DEVICES is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -35,5 +34,4 @@ CONFIG_SH_PCLK_FREQ=31250000
# CONFIG_SYSFS is not set
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index e1077a041ac3..aab4ff1e247c 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="_edosk7760"
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -31,7 +30,6 @@ CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
@@ -39,10 +37,7 @@ CONFIG_DEBUG_DRIVER=y
CONFIG_DEBUG_DEVRES=y
CONFIG_MTD=y
CONFIG_MTD_DEBUG=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
@@ -62,12 +57,9 @@ CONFIG_MTD_ABSENT=y
CONFIG_MTD_PHYSMAP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=26000
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -92,7 +84,6 @@ CONFIG_SND=y
# CONFIG_SND_VERBOSE_PROCFS is not set
CONFIG_SND_VERBOSE_PRINTK=y
CONFIG_SND_SOC=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
@@ -119,8 +110,6 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index 67cb1094a033..2985fe7c6d50 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -26,13 +25,10 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
@@ -43,14 +39,11 @@ CONFIG_MTD_CFI_GEOMETRY=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_COMPLEX_MAPPINGS=y
CONFIG_MTD_PHYSMAP=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -65,7 +58,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y
CONFIG_FB_SH7760=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
@@ -73,7 +65,6 @@ CONFIG_USB_STORAGE=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS_FS=y
CONFIG_AUTOFS4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
@@ -123,6 +114,5 @@ CONFIG_NLS_UTF8=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 496edcdf95a3..4dcf7f552582 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -37,7 +36,6 @@ CONFIG_SERIAL_SH_SCI_NR_UARTS=3
CONFIG_SERIAL_SH_SCI_CONSOLE=y
CONFIG_LEGACY_PTY_COUNT=64
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_HIT=y
@@ -46,7 +44,6 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FONTS=y
CONFIG_FONT_PEARL_8x8=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
@@ -55,7 +52,6 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_NLS_CODEPAGE_850=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
diff --git a/arch/sh/configs/kfr2r09-romimage_defconfig b/arch/sh/configs/kfr2r09-romimage_defconfig
index 029a506ca325..9cc37f29e3b4 100644
--- a/arch/sh/configs/kfr2r09-romimage_defconfig
+++ b/arch/sh/configs/kfr2r09-romimage_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -26,12 +25,10 @@ CONFIG_INET=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_MISC_DEVICES is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -44,7 +41,6 @@ CONFIG_I2C=y
CONFIG_I2C_SH_MOBILE=y
CONFIG_GPIO_SYSFS=y
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB_GADGET=y
CONFIG_USB_CDC_COMPOSITE=y
# CONFIG_DNOTIFY is not set
@@ -55,5 +51,4 @@ CONFIG_TMPFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/kfr2r09_defconfig b/arch/sh/configs/kfr2r09_defconfig
index fac13ded07b2..46693d033644 100644
--- a/arch/sh/configs/kfr2r09_defconfig
+++ b/arch/sh/configs/kfr2r09_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -33,15 +32,12 @@ CONFIG_INET=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
@@ -49,7 +45,6 @@ CONFIG_MTD_PHYSMAP=y
CONFIG_MTD_ONENAND=y
CONFIG_MTD_ONENAND_GENERIC=y
CONFIG_MTD_UBI=y
-# CONFIG_MISC_DEVICES is not set
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -77,7 +72,6 @@ CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_CLUT224 is not set
# CONFIG_LOGO_SUPERH_MONO is not set
# CONFIG_LOGO_SUPERH_CLUT224 is not set
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB_GADGET=y
CONFIG_USB_CDC_COMPOSITE=m
CONFIG_MMC=y
@@ -91,4 +85,3 @@ CONFIG_TMPFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 6783f31315c7..467f4d2d8e87 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_SYSCTL_SYSCALL is not set
@@ -24,10 +23,8 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
-CONFIG_IP_NF_QUEUE=m
CONFIG_ATALK=m
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
@@ -118,7 +115,6 @@ CONFIG_NFSD_V3=y
CONFIG_SMB_FS=m
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig
index e3c0894b1bb4..9e3edfdf9b2e 100644
--- a/arch/sh/configs/lboxre2_defconfig
+++ b/arch/sh/configs/lboxre2_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_SYSCTL_SYSCALL is not set
@@ -28,7 +27,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -61,7 +59,6 @@ CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_ROMFS_FS=y
CONFIG_NLS_CODEPAGE_437=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 9479872b1ae6..fb7415dbc102 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -35,16 +34,13 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -55,8 +51,6 @@ CONFIG_NETDEVICES=y
CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_MOUSE_PS2 is not set
CONFIG_SERIAL_8250=y
@@ -68,7 +62,6 @@ CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
@@ -96,7 +89,5 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_KOBJECT=y
CONFIG_DEBUG_INFO=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRC_CCITT=m
CONFIG_CRC16=m
diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig
index f1d2e1b5ee41..c3f7d5899922 100644
--- a/arch/sh/configs/microdev_defconfig
+++ b/arch/sh/configs/microdev_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
@@ -19,7 +18,6 @@ CONFIG_SUPERHYWAY=y
CONFIG_NET=y
CONFIG_INET=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
@@ -45,6 +43,5 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_ECB=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index cc61eda44922..e04f21be0756 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -26,15 +25,11 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_FW_LOADER=m
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -47,8 +42,6 @@ CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -101,7 +94,6 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_MANAGER=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig
index f3d5d9f76310..0a432b5f50e7 100644
--- a/arch/sh/configs/polaris_defconfig
+++ b/arch/sh/configs/polaris_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
@@ -37,14 +36,11 @@ CONFIG_IP_MULTICAST=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FIRMWARE_IN_KERNEL is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -57,8 +53,6 @@ CONFIG_NETDEVICES=y
CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -71,7 +65,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
@@ -91,5 +84,3 @@ CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SG=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index 920b8471ceb7..435bcd66c667 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -35,13 +34,11 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_BRIDGE=m
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_FW_LOADER=m
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_COMPLEX_MAPPINGS=y
@@ -110,7 +107,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_PREEMPT is not set
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index c77da6be06b8..5877e6d1f285 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -42,7 +41,6 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_BRIDGE=m
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -104,7 +102,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_4KSTACKS=y
diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig
index 5df916d931c5..b195bc01e406 100644
--- a/arch/sh/configs/rsk7201_defconfig
+++ b/arch/sh/configs/rsk7201_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -37,10 +36,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -58,8 +54,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SH=y
@@ -71,5 +65,3 @@ CONFIG_ROMFS_FS=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 3c4f6f4d52b0..8c471959bbc7 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -44,7 +43,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -52,10 +50,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -64,8 +59,6 @@ CONFIG_NETDEVICES=y
CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -81,7 +74,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
CONFIG_REGULATOR=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
@@ -130,6 +122,4 @@ CONFIG_DEBUG_VM=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_DEBUG_STACK_USAGE=y
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig
index eecdf65bb789..2b9b731fc86b 100644
--- a/arch/sh/configs/rsk7264_defconfig
+++ b/arch/sh/configs/rsk7264_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -61,11 +60,9 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HWMON is not set
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_STORAGE_DEBUG=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig
index 8370b10df357..d041f7bcb84c 100644
--- a/arch/sh/configs/rsk7269_defconfig
+++ b/arch/sh/configs/rsk7269_defconfig
@@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -44,11 +43,9 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HWMON is not set
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_STORAGE_DEBUG=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
@@ -60,5 +57,4 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_FTRACE is not set
diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig
index a3d081095ce2..379d673f5ce8 100644
--- a/arch/sh/configs/rts7751r2d1_defconfig
+++ b/arch/sh/configs/rts7751r2d1_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -22,7 +21,6 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_FW_LOADER=m
@@ -48,7 +46,6 @@ CONFIG_HW_RANDOM=y
CONFIG_SPI=y
CONFIG_SPI_SH_SCI=y
CONFIG_MFD_SM501=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FB_SH_MOBILE_LCDC=m
CONFIG_FB_SM501=y
@@ -83,7 +80,6 @@ CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_R9701=y
CONFIG_EXT2_FS=y
@@ -94,6 +90,5 @@ CONFIG_TMPFS=y
CONFIG_MINIX_FS=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig
index b1a04f3c598b..11177bceda83 100644
--- a/arch/sh/configs/rts7751r2dplus_defconfig
+++ b/arch/sh/configs/rts7751r2dplus_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -22,15 +21,11 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_FW_LOADER=m
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
@@ -56,7 +51,6 @@ CONFIG_HW_RANDOM=y
CONFIG_SPI=y
CONFIG_SPI_SH_SCI=y
CONFIG_MFD_SM501=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FB_SH_MOBILE_LCDC=m
CONFIG_FB_SM501=y
@@ -91,7 +85,6 @@ CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_R9701=y
CONFIG_EXT2_FS=y
@@ -102,6 +95,5 @@ CONFIG_TMPFS=y
CONFIG_MINIX_FS=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index bbd4c2298708..95e5208b8260 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="_SDK7780"
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -39,7 +38,6 @@ CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_BOOTP=y
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
CONFIG_IPV6=y
# CONFIG_INET6_XFRM_MODE_BEET is not set
CONFIG_NET_SCHED=y
@@ -47,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_PARPORT=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_IDE=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_PLATFORM=y
@@ -63,8 +60,6 @@ CONFIG_BLK_DEV_DM=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_NETCONSOLE=y
CONFIG_INPUT_FF_MEMLESS=m
CONFIG_INPUT_EVDEV=y
@@ -78,7 +73,6 @@ CONFIG_SSB=y
CONFIG_SSB_DRIVER_PCICORE=y
CONFIG_FB=y
CONFIG_FB_SH_MOBILE_LCDC=m
-CONFIG_DISPLAY_SUPPORT=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_LOGO=y
@@ -101,7 +95,6 @@ CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -144,8 +137,6 @@ CONFIG_DETECT_HUNG_TASK=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_SH_STANDARD_BIOS=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 36642ec2cb97..e9ee0c878ead 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_KERNEL_LZO=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -90,13 +89,11 @@ CONFIG_NET_KEY=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_FTL=y
@@ -119,7 +116,6 @@ CONFIG_MTD_UBI_GLUEBI=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_IDE=y
CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_PLATFORM=y
@@ -140,8 +136,6 @@ CONFIG_MDIO_BITBANG=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
CONFIG_VT_HW_CONSOLE_BINDING=y
CONFIG_SERIAL_SH_SCI=y
@@ -157,7 +151,6 @@ CONFIG_SPI=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_SH_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
@@ -223,9 +216,7 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_MEMORY_INIT=y
-# CONFIG_RCU_CPU_STALL_VERBOSE is not set
CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_FUNCTION_TRACER=y
# CONFIG_FUNCTION_GRAPH_TRACER is not set
CONFIG_DMA_API_DEBUG=y
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 91853a67ec34..3553acd5edb1 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -57,7 +56,6 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -65,9 +63,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -78,8 +73,6 @@ CONFIG_EEPROM_93CX6=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -109,7 +102,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_VM=y
CONFIG_DEBUG_LIST=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig
index 201acb4652f7..fc77a67b16e7 100644
--- a/arch/sh/configs/se7343_defconfig
+++ b/arch/sh/configs/se7343_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -27,26 +26,19 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_RAM=y
CONFIG_MTD_PHYSMAP=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_USB_USBNET=y
# CONFIG_USB_NET_AX8817X is not set
CONFIG_USB_NET_DM9601=y
@@ -104,5 +96,4 @@ CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_NFSD=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 9a9ad9adf959..f54722dbc8f5 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
@@ -24,10 +23,7 @@ CONFIG_BINFMT_ZFLAT=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_REDBOOT_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -48,4 +44,3 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_SYSFS is not set
CONFIG_ROMFS_FS=y
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 044e0844fda1..ddfc69841955 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
@@ -27,11 +26,8 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -58,5 +54,4 @@ CONFIG_PROC_KCORE=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index 1248635e4f88..5a1097641247 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
@@ -47,7 +46,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_NET_SCHED=y
@@ -68,9 +66,6 @@ CONFIG_NET_CLS_FW=y
CONFIG_NET_CLS_IND=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -104,8 +99,6 @@ CONFIG_ROOT_NFS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index c3ba6e8a9818..9c0ef13bee10 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
@@ -46,7 +45,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_NET_SCHED=y
@@ -67,9 +65,6 @@ CONFIG_NET_CLS_FW=y
CONFIG_NET_CLS_IND=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -132,6 +127,5 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_CCITT=y
diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig
index ae998c7e2ee0..ccc7fc423fde 100644
--- a/arch/sh/configs/se7722_defconfig
+++ b/arch/sh/configs/se7722_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -26,7 +25,6 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
@@ -57,6 +55,5 @@ CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index 1faa788aecae..aedb3a2d9a10 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -30,14 +29,10 @@ CONFIG_IP_PNP_DHCP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -53,8 +48,6 @@ CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -137,8 +130,6 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index 912c98590e22..b23f67542728 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -25,11 +24,8 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -58,5 +54,4 @@ CONFIG_ROOT_NFS=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 75c92fc1876b..162343683937 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_LOG_BUF_SHIFT=14
@@ -25,13 +24,9 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
-CONFIG_IP_NF_QUEUE=y
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -49,5 +44,4 @@ CONFIG_EXT2_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/se7780_defconfig b/arch/sh/configs/se7780_defconfig
index b0ef63ce525a..ec32c82646ed 100644
--- a/arch/sh/configs/se7780_defconfig
+++ b/arch/sh/configs/se7780_defconfig
@@ -24,7 +24,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
CONFIG_IPV6=y
# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
@@ -32,8 +31,6 @@ CONFIG_IPV6=y
# CONFIG_IPV6_SIT is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -54,8 +51,6 @@ CONFIG_SMSC_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
CONFIG_NET_PCI=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -94,7 +89,6 @@ CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
@@ -110,5 +104,4 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig
index 7eae4e59d7f0..360592d63a2f 100644
--- a/arch/sh/configs/secureedge5410_defconfig
+++ b/arch/sh/configs/secureedge5410_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
@@ -18,12 +17,9 @@ CONFIG_INET=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK_RO=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -34,14 +30,11 @@ CONFIG_MTD_CFI_GEOMETRY=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_PLATRAM=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_NET_PCI=y
CONFIG_8139CP=y
CONFIG_8139TOO=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -51,7 +44,6 @@ CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1302=y
@@ -60,4 +52,3 @@ CONFIG_EXT2_FS=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_ROMFS_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 0cf4097b71e8..2156223405a1 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -34,7 +33,6 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
@@ -70,7 +68,6 @@ CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_AUTOFS_FS=y
CONFIG_AUTOFS4_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
@@ -126,7 +123,6 @@ CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig
index df25ae774ee0..34094e05e892 100644
--- a/arch/sh/configs/sh2007_defconfig
+++ b/arch/sh/configs/sh2007_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -42,7 +41,6 @@ CONFIG_NET_IPIP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETWORK_SECMARK=y
CONFIG_NET_PKTGEN=y
@@ -50,7 +48,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_CDROM_PKTCDVD=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_RAID_ATTRS=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
@@ -72,8 +69,6 @@ CONFIG_TUN=y
CONFIG_VETH=y
CONFIG_NET_ETHERNET=y
CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
CONFIG_INPUT_FF_MEMLESS=y
# CONFIG_INPUT_MOUSEDEV is not set
@@ -95,9 +90,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
@@ -172,7 +165,6 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_DEBUG_INFO=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_AUTHENC=y
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index f92ad17cd629..65a1aad899c8 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -24,7 +23,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
@@ -36,8 +34,6 @@ CONFIG_NET_CLS_ROUTE4=y
CONFIG_NET_CLS_U32=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -59,5 +55,4 @@ CONFIG_THERMAL=y
# CONFIG_DNOTIFY is not set
CONFIG_JFFS2_FS=y
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
index f83ac7b0b031..d15e53647983 100644
--- a/arch/sh/configs/sh7724_generic_defconfig
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_CGROUPS=y
@@ -18,7 +17,6 @@ CONFIG_HIBERNATION=y
CONFIG_CPU_IDLE=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_MISC_DEVICES is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -44,5 +42,4 @@ CONFIG_UIO_PDRV_GENIRQ=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index cfde98ddb29d..b0c4bc830fb8 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -32,13 +31,11 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
CONFIG_IPV6=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_M25P80=y
CONFIG_BLK_DEV_RAM=y
@@ -48,7 +45,6 @@ CONFIG_NETDEVICES=y
CONFIG_VITESSE_PHY=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
# CONFIG_KEYBOARD_ATKBD is not set
# CONFIG_MOUSE_PS2 is not set
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index 479536440264..2ef780fb9813 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -26,11 +25,9 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_CFI=y
@@ -43,14 +40,11 @@ CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_CFI_STAA=y
CONFIG_MTD_COMPLEX_MAPPINGS=y
CONFIG_MTD_PHYSMAP=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SH_ETH=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -65,7 +59,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y
CONFIG_FB_SH7760=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
@@ -74,7 +67,6 @@ CONFIG_MMC=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS_FS=y
CONFIG_AUTOFS4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
@@ -124,6 +116,5 @@ CONFIG_NLS_UTF8=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_DEBUG_FS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC_T10DIF=y
diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig
index 025bd3ac5ab0..742634b37c0a 100644
--- a/arch/sh/configs/sh7770_generic_defconfig
+++ b/arch/sh/configs/sh7770_generic_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_CGROUPS=y
@@ -20,7 +19,6 @@ CONFIG_HIBERNATION=y
CONFIG_CPU_IDLE=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_MISC_DEVICES is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -46,5 +44,4 @@ CONFIG_UIO_PDRV_GENIRQ=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
# CONFIG_CRC32 is not set
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 2fce54d9c388..2ddf5ca7094e 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -44,13 +43,9 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -58,7 +53,6 @@ CONFIG_MTD_PHYSMAP=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
# CONFIG_SCSI_PROC_FS is not set
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
@@ -69,7 +63,6 @@ CONFIG_NET_ETHERNET=y
CONFIG_NET_VENDOR_3COM=y
CONFIG_VORTEX=y
CONFIG_R8169=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
CONFIG_INPUT_FF_MEMLESS=m
CONFIG_INPUT_EVDEV=y
@@ -113,7 +106,6 @@ CONFIG_SND_CMIPCI=y
CONFIG_SND_EMU10K1=y
# CONFIG_SND_SUPERH is not set
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_R8A66597_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
@@ -154,9 +146,7 @@ CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_FTRACE is not set
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig
index d29da4a0f6c2..7098828d392e 100644
--- a/arch/sh/configs/sh7785lcr_defconfig
+++ b/arch/sh/configs/sh7785lcr_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -26,27 +25,21 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
CONFIG_SATA_SIL=y
CONFIG_NETDEVICES=y
CONFIG_R8169=y
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_KEYBOARD_ATKBD is not set
@@ -121,8 +114,6 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_HMAC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index 4802e14a4649..d589cfdfb7eb 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_LOG_BUF_SHIFT=14
# CONFIG_UID16 is not set
@@ -28,10 +27,8 @@ CONFIG_NET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
@@ -53,6 +50,5 @@ CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index 4a4269ad5b04..755c4f73c718 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -56,7 +55,6 @@ CONFIG_NET=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
CONFIG_CAN=m
CONFIG_CAN_RAW=m
CONFIG_CAN_BCM=m
@@ -70,8 +68,6 @@ CONFIG_PATA_PLATFORM=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_SMC91X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -82,7 +78,6 @@ CONFIG_I2C=m
CONFIG_SPI=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_R8A66597_HCD=m
@@ -104,7 +99,6 @@ CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_VM=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SH_STANDARD_BIOS=y
CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index a77b778c745b..ceb48e9b70f4 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -49,7 +48,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
CONFIG_INET_IPCOMP=y
-# CONFIG_INET_LRO is not set
CONFIG_INET_DIAG=m
CONFIG_IPV6=y
CONFIG_IPV6_PRIVACY=y
@@ -79,7 +77,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_SCTP=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_AH=m
@@ -88,7 +85,6 @@ CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
@@ -96,7 +92,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_QUEUE=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -106,7 +101,6 @@ CONFIG_IP6_NF_MATCH_HL=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
@@ -154,7 +148,6 @@ CONFIG_FW_LOADER=m
CONFIG_CONNECTOR=m
CONFIG_MTD=m
CONFIG_MTD_DEBUG=y
-CONFIG_MTD_CHAR=m
CONFIG_MTD_BLOCK=m
CONFIG_FTL=m
CONFIG_NFTL=m
@@ -261,7 +254,6 @@ CONFIG_NLS_UTF8=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_MD4=m
diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig
index 2d288b887fbd..5f2921a85192 100644
--- a/arch/sh/configs/ul2_defconfig
+++ b/arch/sh/configs/ul2_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
@@ -29,7 +28,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_CFG80211=y
CONFIG_MAC80211=y
@@ -37,9 +35,6 @@ CONFIG_MAC80211_RC_PID=y
# CONFIG_MAC80211_RC_MINSTREL is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -50,8 +45,6 @@ CONFIG_ATA=y
CONFIG_PATA_PLATFORM=y
CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_LIBERTAS=m
CONFIG_LIBERTAS_SDIO=m
CONFIG_LIBERTAS_DEBUG=y
@@ -70,7 +63,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_R8A66597_HCD=y
@@ -92,6 +84,5 @@ CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_1=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_MICHAEL_MIC=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index 01c9a91ee896..7d5591b7c088 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
@@ -46,20 +45,15 @@ CONFIG_INET=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=y
@@ -73,7 +67,6 @@ CONFIG_NET_PCI=y
CONFIG_8139CP=y
CONFIG_SKY2=y
CONFIG_SKY2_DEBUG=y
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_FF_MEMLESS=m
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_KEYBOARD_ATKBD is not set
@@ -150,8 +143,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
CONFIG_FRAME_POINTER=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
# CONFIG_FTRACE is not set
# CONFIG_DUMP_CODE is not set
CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c
index edc2fb7a5bb2..32467884d6f7 100644
--- a/arch/sh/drivers/pci/fixups-cayman.c
+++ b/arch/sh/drivers/pci/fixups-cayman.c
@@ -5,7 +5,7 @@
#include <cpu/irq.h>
#include "pci-sh5.h"
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
int result = -1;
diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index 1d1c5a227e50..9d597f7ab8dd 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -76,7 +76,7 @@ static void gapspci_fixup_resources(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, gapspci_fixup_resources);
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
/*
* The interrupt routing semantics here are quite trivial.
diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c
index 57ed3f09d0c2..2c9b58f848dd 100644
--- a/arch/sh/drivers/pci/fixups-r7780rp.c
+++ b/arch/sh/drivers/pci/fixups-r7780rp.c
@@ -15,7 +15,7 @@
#include <linux/sh_intc.h>
#include "pci-sh4.h"
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
return evt2irq(0xa20) + slot;
}
diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c
index eaddb56c45c6..358ac104f08c 100644
--- a/arch/sh/drivers/pci/fixups-rts7751r2d.c
+++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c
@@ -20,18 +20,18 @@
#define PCIMCR_MRSET_OFF 0xBFFFFFFF
#define PCIMCR_RFSH_OFF 0xFFFFFFFB
-static u8 rts7751r2d_irq_tab[] __initdata = {
+static u8 rts7751r2d_irq_tab[] = {
IRQ_PCI_INTA,
IRQ_PCI_INTB,
IRQ_PCI_INTC,
IRQ_PCI_INTD,
};
-static char lboxre2_irq_tab[] __initdata = {
+static char lboxre2_irq_tab[] = {
IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD,
};
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
if (mach_is_lboxre2())
return lboxre2_irq_tab[slot];
diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c
index c0a015ae6ecf..24e96dfbdb22 100644
--- a/arch/sh/drivers/pci/fixups-sdk7780.c
+++ b/arch/sh/drivers/pci/fixups-sdk7780.c
@@ -22,7 +22,7 @@
#define IRQ_INTD evt2irq(0xa80)
/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */
-static char sdk7780_irq_tab[4][16] __initdata = {
+static char sdk7780_irq_tab[4][16] = {
/* INTA */
{ IRQ_INTA, IRQ_INTD, IRQ_INTC, IRQ_INTD, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1 },
@@ -37,7 +37,7 @@ static char sdk7780_irq_tab[4][16] __initdata = {
-1, -1, -1 },
};
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
return sdk7780_irq_tab[pin-1][slot];
}
diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c
index 84a88ca92008..1cb8d0ac4fdb 100644
--- a/arch/sh/drivers/pci/fixups-se7751.c
+++ b/arch/sh/drivers/pci/fixups-se7751.c
@@ -7,7 +7,7 @@
#include <linux/sh_intc.h>
#include "pci-sh4.h"
-int __init pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *, u8 slot, u8 pin)
{
switch (slot) {
case 0: return evt2irq(0x3a0);
diff --git a/arch/sh/drivers/pci/fixups-sh03.c b/arch/sh/drivers/pci/fixups-sh03.c
index 16207bef9f52..55ac1ba2c74f 100644
--- a/arch/sh/drivers/pci/fixups-sh03.c
+++ b/arch/sh/drivers/pci/fixups-sh03.c
@@ -4,7 +4,7 @@
#include <linux/pci.h>
#include <linux/sh_intc.h>
-int __init pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
int irq;
diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c
index 6e33ba4cd076..a931e5928f58 100644
--- a/arch/sh/drivers/pci/fixups-snapgear.c
+++ b/arch/sh/drivers/pci/fixups-snapgear.c
@@ -19,7 +19,7 @@
#include <linux/sh_intc.h>
#include "pci-sh4.h"
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
int irq = -1;
diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c
index bd1addb1b8be..a9d563e479d5 100644
--- a/arch/sh/drivers/pci/fixups-titan.c
+++ b/arch/sh/drivers/pci/fixups-titan.c
@@ -19,7 +19,7 @@
#include <mach/titan.h>
#include "pci-sh4.h"
-static char titan_irq_tab[] __initdata = {
+static char titan_irq_tab[] = {
TITAN_IRQ_WAN,
TITAN_IRQ_LAN,
TITAN_IRQ_MPCIA,
@@ -27,7 +27,7 @@ static char titan_irq_tab[] __initdata = {
TITAN_IRQ_USB,
};
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
int irq = titan_irq_tab[slot];
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c99ee286b69f..5976a2c8a3e3 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -39,8 +39,12 @@ static void pcibios_scanbus(struct pci_channel *hose)
LIST_HEAD(resources);
struct resource *res;
resource_size_t offset;
- int i;
- struct pci_bus *bus;
+ int i, ret;
+ struct pci_host_bridge *bridge;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return;
for (i = 0; i < hose->nr_resources; i++) {
res = hose->resources + i;
@@ -52,19 +56,26 @@ static void pcibios_scanbus(struct pci_channel *hose)
pci_add_resource_offset(&resources, res, offset);
}
- bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
- &resources);
- hose->bus = bus;
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = hose;
+ bridge->busnr = next_busno;
+ bridge->ops = hose->pci_ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = pcibios_map_platform_irq;
+
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
+ return;
+ }
+
+ hose->bus = bridge->bus;
need_domain_info = need_domain_info || hose->index;
hose->need_domain_info = need_domain_info;
- if (!bus) {
- pci_free_resource_list(&resources);
- return;
- }
-
- next_busno = bus->busn_res.end + 1;
+ next_busno = hose->bus->busn_res.end + 1;
/* Don't allow 8-bit bus number overflow inside the hose -
reserve some space for bridges. */
if (next_busno > 224) {
@@ -72,9 +83,9 @@ static void pcibios_scanbus(struct pci_channel *hose)
need_domain_info = 1;
}
- pci_bus_size_bridges(bus);
- pci_bus_assign_resources(bus);
- pci_bus_add_devices(bus);
+ pci_bus_size_bridges(hose->bus);
+ pci_bus_assign_resources(hose->bus);
+ pci_bus_add_devices(hose->bus);
}
/*
@@ -144,8 +155,6 @@ static int __init pcibios_init(void)
for (hose = hose_head; hose; hose = hose->next)
pcibios_scanbus(hose);
- pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
-
dma_debug_add_bus(&pci_bus_type);
pci_initialized = 1;
@@ -155,14 +164,6 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
/*
- * Called after each bus is probed, but before its children
- * are examined.
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
-/*
* We need to avoid collisions with `mirrored' VGA ports
* and other strange ISA hardware, so we always want the
* addresses to be allocated in the 0x000-0x0ff region
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index a162a7f86b2e..0167a7352719 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -467,7 +467,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
return 0;
}
-int __init pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
return evt2irq(0xae0);
}
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index d0078747d308..8f8cf941a8cd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -27,21 +27,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
}
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- u32 oparg = (encoded_op << 8) >> 20;
- u32 cmparg = (encoded_op << 20) >> 20;
u32 oldval, newval, prev;
int ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
do {
@@ -80,17 +71,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
- case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
- case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
- case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
return ret;
}
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
index c46e8cc7b515..5ed7dbbd94ff 100644
--- a/arch/sh/include/asm/spinlock-cas.h
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -29,11 +29,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
while (!__sl_cas(&lock->lock, 1, 0));
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
index cec78143fa83..f77263aae760 100644
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -21,11 +21,6 @@
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
* on the local processor, one does not.
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 46e0d635e36f..51a8bc967e75 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
}
static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
tlb->mm = mm;
tlb->start = start;
@@ -47,9 +48,10 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
}
static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
{
- if (tlb->fullmm)
+ if (tlb->fullmm || force)
flush_tlb_mm(tlb->mm);
/* keep the page table cache within bounds */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a4a626199c47..0be3828752e5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -97,6 +97,9 @@ config ARCH_PROC_KCORE_TEXT
config CPU_BIG_ENDIAN
def_bool y
+config CPU_BIG_ENDIAN
+ def_bool y
+
config ARCH_ATU
bool
default y if SPARC64
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index c90930de76ba..3cd4f6b198b6 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
keystream, AES_BLOCK_SIZE);
- crypto_xor((u8 *) keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
+ crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index ee3f11c43cda..7643e979e333 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int);
int __atomic_add_unless(atomic_t *, int, int);
void atomic_set(atomic_t *, int);
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v)))
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 4e899b0dabf7..1cfd89d92208 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -29,22 +29,14 @@
: "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
: "memory")
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
- if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
- return -EFAULT;
if (unlikely((((unsigned long) uaddr) & 0x3UL)))
return -EINVAL;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
pagefault_disable();
switch (op) {
@@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
index 0efd0583a8c9..6249214148c2 100644
--- a/arch/sparc/include/asm/page_32.h
+++ b/arch/sparc/include/asm/page_32.h
@@ -68,6 +68,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t;
#define iopgprot_val(x) ((x).iopgprot)
#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { { (x) }, })
#define __iopte(x) ((iopte_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __ctxd(x) ((ctxd_t) { (x) } )
@@ -95,6 +96,7 @@ typedef unsigned long iopgprot_t;
#define iopgprot_val(x) (x)
#define __pte(x) (x)
+#define __pmd(x) ((pmd_t) { { (x) }, })
#define __iopte(x) (x)
#define __pgd(x) (x)
#define __ctxd(x) (x)
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 8011e79f59c9..67345b2dc408 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -14,11 +14,6 @@
#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->lock, !VAL);
-}
-
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
__asm__ __volatile__(
diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 1d8321c827a8..1b1286d05069 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -47,10 +47,26 @@
#define SUN4V_CHIP_NIAGARA5 0x05
#define SUN4V_CHIP_SPARC_M6 0x06
#define SUN4V_CHIP_SPARC_M7 0x07
+#define SUN4V_CHIP_SPARC_M8 0x08
#define SUN4V_CHIP_SPARC64X 0x8a
#define SUN4V_CHIP_SPARC_SN 0x8b
#define SUN4V_CHIP_UNKNOWN 0xff
+/*
+ * The following CPU_ID_xxx constants are used
+ * to identify the CPU type in the setup phase
+ * (see head_64.S)
+ */
+#define CPU_ID_NIAGARA1 ('1')
+#define CPU_ID_NIAGARA2 ('2')
+#define CPU_ID_NIAGARA3 ('3')
+#define CPU_ID_NIAGARA4 ('4')
+#define CPU_ID_NIAGARA5 ('5')
+#define CPU_ID_M6 ('6')
+#define CPU_ID_M7 ('7')
+#define CPU_ID_M8 ('8')
+#define CPU_ID_SONOMA1 ('N')
+
#ifndef __ASSEMBLY__
enum ultra_tlb_layout {
diff --git a/arch/sparc/include/asm/vga.h b/arch/sparc/include/asm/vga.h
index ec0e9967d93d..f54e8b6fb197 100644
--- a/arch/sparc/include/asm/vga.h
+++ b/arch/sparc/include/asm/vga.h
@@ -8,9 +8,13 @@
#define _LINUX_ASM_VGA_H_
#include <linux/bug.h>
+#include <linux/string.h>
#include <asm/types.h>
#define VT_BUF_HAVE_RW
+#define VT_BUF_HAVE_MEMSETW
+#define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
#undef scr_writew
#undef scr_readw
@@ -29,6 +33,27 @@ static inline u16 scr_readw(const u16 *addr)
return *addr;
}
+static inline void scr_memsetw(u16 *p, u16 v, unsigned int n)
+{
+ BUG_ON((long) p >= 0);
+
+ memset16(p, cpu_to_le16(v), n / 2);
+}
+
+static inline void scr_memcpyw(u16 *d, u16 *s, unsigned int n)
+{
+ BUG_ON((long) d >= 0);
+
+ memcpy(d, s, n);
+}
+
+static inline void scr_memmovew(u16 *d, u16 *s, unsigned int n)
+{
+ BUG_ON((long) d >= 0);
+
+ memmove(d, s, n);
+}
+
#define VGA_MAP_MEM(x,s) (x)
#endif
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 186fd8199f54..b2f5c50d0947 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -98,6 +98,8 @@
#define SO_PEERGROUPS 0x003d
+#define SO_ZEROCOPY 0x003e
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 493e023a468a..ef4f18f7a674 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void)
sparc_pmu_type = "sparc-m7";
break;
+ case SUN4V_CHIP_SPARC_M8:
+ sparc_cpu_type = "SPARC-M8";
+ sparc_fpu_type = "SPARC-M8 integrated FPU";
+ sparc_pmu_type = "sparc-m8";
+ break;
+
case SUN4V_CHIP_SPARC_SN:
sparc_cpu_type = "SPARC-SN";
sparc_fpu_type = "SPARC-SN integrated FPU";
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 45c820e1cba5..90d550bbfeef 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
case SUN4V_CHIP_NIAGARA5:
case SUN4V_CHIP_SPARC_M6:
case SUN4V_CHIP_SPARC_M7:
+ case SUN4V_CHIP_SPARC_M8:
case SUN4V_CHIP_SPARC_SN:
case SUN4V_CHIP_SPARC64X:
rover_inc_table = niagara_iterate_method;
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 41a407328667..78e0211753d2 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -424,22 +424,25 @@ EXPORT_SYMBOL(sun4v_chip_type)
nop
70: ldub [%g1 + 7], %g2
- cmp %g2, '3'
+ cmp %g2, CPU_ID_NIAGARA3
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA3, %g4
- cmp %g2, '4'
+ cmp %g2, CPU_ID_NIAGARA4
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA4, %g4
- cmp %g2, '5'
+ cmp %g2, CPU_ID_NIAGARA5
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA5, %g4
- cmp %g2, '6'
+ cmp %g2, CPU_ID_M6
be,pt %xcc, 5f
mov SUN4V_CHIP_SPARC_M6, %g4
- cmp %g2, '7'
+ cmp %g2, CPU_ID_M7
be,pt %xcc, 5f
mov SUN4V_CHIP_SPARC_M7, %g4
- cmp %g2, 'N'
+ cmp %g2, CPU_ID_M8
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_SPARC_M8, %g4
+ cmp %g2, CPU_ID_SONOMA1
be,pt %xcc, 5f
mov SUN4V_CHIP_SPARC_SN, %g4
ba,pt %xcc, 49f
@@ -448,10 +451,10 @@ EXPORT_SYMBOL(sun4v_chip_type)
91: sethi %hi(prom_cpu_compatible), %g1
or %g1, %lo(prom_cpu_compatible), %g1
ldub [%g1 + 17], %g2
- cmp %g2, '1'
+ cmp %g2, CPU_ID_NIAGARA1
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA1, %g4
- cmp %g2, '2'
+ cmp %g2, CPU_ID_NIAGARA2
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA2, %g4
@@ -602,6 +605,9 @@ niagara_tlb_fixup:
cmp %g1, SUN4V_CHIP_SPARC_M7
be,pt %xcc, niagara4_patch
nop
+ cmp %g1, SUN4V_CHIP_SPARC_M8
+ be,pt %xcc, niagara4_patch
+ nop
cmp %g1, SUN4V_CHIP_SPARC_SN
be,pt %xcc, niagara4_patch
nop
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 4371f72ff025..98c223edac84 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -25,6 +25,12 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
{
LIST_HEAD(resources);
struct pci_bus *root_bus;
+ struct pci_host_bridge *bridge;
+ int ret;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return;
pci_add_resource_offset(&resources, &info->io_space,
info->io_space.start - 0x1000);
@@ -32,15 +38,21 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
info->busn.flags = IORESOURCE_BUS;
pci_add_resource(&resources, &info->busn);
- root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info,
- &resources);
- if (!root_bus) {
- pci_free_resource_list(&resources);
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = &ofdev->dev;
+ bridge->sysdata = info;
+ bridge->busnr = 0;
+ bridge->ops = info->ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = info->map_irq;
+
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
return;
}
- /* Setup IRQs of all devices using custom routines */
- pci_fixup_irqs(pci_common_swizzle, info->map_irq);
+ root_bus = bridge->bus;
/* Assign devices with resources */
pci_assign_unassigned_resources();
@@ -94,9 +106,3 @@ void pcibios_fixup_bus(struct pci_bus *pbus)
}
}
}
-
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 7eceaa10836f..3f8670c92951 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -690,16 +690,6 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
return bus;
}
-void pcibios_fixup_bus(struct pci_bus *pbus)
-{
-}
-
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
u16 cmd, oldcmd;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index f10e2f712394..9ebebf1fd93d 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -1266,8 +1266,6 @@ static int pci_sun4v_probe(struct platform_device *op)
* ATU group, but ATU hcalls won't be available.
*/
hv_atu = false;
- pr_err(PFX "Could not register hvapi ATU err=%d\n",
- err);
} else {
pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
vatu_major, vatu_minor);
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index a38787b84322..4a133c052af8 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -602,7 +602,7 @@ void pcibios_fixup_bus(struct pci_bus *bus)
{
struct pci_dev *dev;
int i, has_io, has_mem;
- unsigned int cmd;
+ unsigned int cmd = 0;
struct linux_pcic *pcic;
/* struct linux_pbm_info* pbm = &pcic->pbm; */
int node;
@@ -746,12 +746,6 @@ static void watchdog_reset() {
}
#endif
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-
int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
return 0;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 4d9c3e13c150..150ee7d4b059 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -288,10 +288,17 @@ static void __init sun4v_patch(void)
sun4v_patch_2insn_range(&__sun4v_2insn_patch,
&__sun4v_2insn_patch_end);
- if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
- sun4v_chip_type == SUN4V_CHIP_SPARC_SN)
+
+ switch (sun4v_chip_type) {
+ case SUN4V_CHIP_SPARC_M7:
+ case SUN4V_CHIP_SPARC_M8:
+ case SUN4V_CHIP_SPARC_SN:
sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
&__sun_m7_2insn_patch_end);
+ break;
+ default:
+ break;
+ }
sun4v_hvapi_init();
}
@@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= HWCAP_SPARC_BLKINIT;
@@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= HWCAP_SPARC_N2;
@@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
@@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
index d6b6c97fe3c7..703127aaf4a5 100644
--- a/arch/sparc/lib/multi3.S
+++ b/arch/sparc/lib/multi3.S
@@ -5,26 +5,26 @@
.align 4
ENTRY(__multi3) /* %o0 = u, %o1 = v */
mov %o1, %g1
- srl %o3, 0, %g4
- mulx %g4, %g1, %o1
+ srl %o3, 0, %o4
+ mulx %o4, %g1, %o1
srlx %g1, 0x20, %g3
- mulx %g3, %g4, %g5
- sllx %g5, 0x20, %o5
- srl %g1, 0, %g4
+ mulx %g3, %o4, %g7
+ sllx %g7, 0x20, %o5
+ srl %g1, 0, %o4
sub %o1, %o5, %o5
srlx %o5, 0x20, %o5
- addcc %g5, %o5, %g5
+ addcc %g7, %o5, %g7
srlx %o3, 0x20, %o5
- mulx %g4, %o5, %g4
+ mulx %o4, %o5, %o4
mulx %g3, %o5, %o5
sethi %hi(0x80000000), %g3
- addcc %g5, %g4, %g5
- srlx %g5, 0x20, %g5
+ addcc %g7, %o4, %g7
+ srlx %g7, 0x20, %g7
add %g3, %g3, %g3
movcc %xcc, %g0, %g3
- addcc %o5, %g5, %o5
- sllx %g4, 0x20, %g4
- add %o1, %g4, %o1
+ addcc %o5, %g7, %o5
+ sllx %o4, 0x20, %o4
+ add %o1, %o4, %o1
add %o5, %g3, %g2
mulx %g1, %o2, %g1
add %g1, %g2, %g1
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index fed73f14aa49..afa0099f3748 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1944,12 +1944,22 @@ static void __init setup_page_offset(void)
break;
case SUN4V_CHIP_SPARC_M7:
case SUN4V_CHIP_SPARC_SN:
- default:
/* M7 and later support 52-bit virtual addresses. */
sparc64_va_hole_top = 0xfff8000000000000UL;
sparc64_va_hole_bottom = 0x0008000000000000UL;
max_phys_bits = 49;
break;
+ case SUN4V_CHIP_SPARC_M8:
+ default:
+ /* M8 and later support 54-bit virtual addresses.
+ * However, restricting M8 and above VA bits to 53
+ * as 4-level page table cannot support more than
+ * 53 VA bits.
+ */
+ sparc64_va_hole_top = 0xfff0000000000000UL;
+ sparc64_va_hole_bottom = 0x0010000000000000UL;
+ max_phys_bits = 51;
+ break;
}
}
@@ -2161,6 +2171,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
*/
switch (sun4v_chip_type) {
case SUN4V_CHIP_SPARC_M7:
+ case SUN4V_CHIP_SPARC_M8:
case SUN4V_CHIP_SPARC_SN:
pagecv_flag = 0x00;
break;
@@ -2313,6 +2324,7 @@ void __init paging_init(void)
*/
switch (sun4v_chip_type) {
case SUN4V_CHIP_SPARC_M7:
+ case SUN4V_CHIP_SPARC_M8:
case SUN4V_CHIP_SPARC_SN:
page_cache4v_flag = _PAGE_CP_4V;
break;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 8799ae9a8788..c340af7b1371 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -128,6 +128,8 @@ static u32 WDISP10(u32 off)
#define BA (BRANCH | CONDA)
#define BG (BRANCH | CONDG)
+#define BL (BRANCH | CONDL)
+#define BLE (BRANCH | CONDLE)
#define BGU (BRANCH | CONDGU)
#define BLEU (BRANCH | CONDLEU)
#define BGE (BRANCH | CONDGE)
@@ -715,9 +717,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
case BPF_JGT:
br_opcode = BGU;
break;
+ case BPF_JLT:
+ br_opcode = BLU;
+ break;
case BPF_JGE:
br_opcode = BGEU;
break;
+ case BPF_JLE:
+ br_opcode = BLEU;
+ break;
case BPF_JSET:
case BPF_JNE:
br_opcode = BNE;
@@ -725,9 +733,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
case BPF_JSGT:
br_opcode = BG;
break;
+ case BPF_JSLT:
+ br_opcode = BL;
+ break;
case BPF_JSGE:
br_opcode = BGE;
break;
+ case BPF_JSLE:
+ br_opcode = BLE;
+ break;
default:
/* Make sure we dont leak kernel information to the
* user.
@@ -746,18 +760,30 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
case BPF_JGT:
cbcond_opcode = CBCONDGU;
break;
+ case BPF_JLT:
+ cbcond_opcode = CBCONDLU;
+ break;
case BPF_JGE:
cbcond_opcode = CBCONDGEU;
break;
+ case BPF_JLE:
+ cbcond_opcode = CBCONDLEU;
+ break;
case BPF_JNE:
cbcond_opcode = CBCONDNE;
break;
case BPF_JSGT:
cbcond_opcode = CBCONDG;
break;
+ case BPF_JSLT:
+ cbcond_opcode = CBCONDL;
+ break;
case BPF_JSGE:
cbcond_opcode = CBCONDGE;
break;
+ case BPF_JSLE:
+ cbcond_opcode = CBCONDLE;
+ break;
default:
/* Make sure we dont leak kernel information to the
* user.
@@ -1176,10 +1202,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* IF (dst COND src) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP | BPF_JSET | BPF_X: {
int err;
@@ -1191,10 +1221,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSET | BPF_K: {
int err;
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index a93774255136..53a423e7cb92 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n)
_atomic_xchg(&v->counter, n);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
/* A 64bit atomic type */
typedef struct {
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index e64a1b75fc38..83c1e639b411 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -106,12 +106,9 @@
lock = __atomic_hashed_lock((int __force *)uaddr)
#endif
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int uninitialized_var(val), ret;
__futex_prolog();
@@ -119,12 +116,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
/* The 32-bit futex code makes this assumption, so validate it here. */
BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
@@ -148,30 +139,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (val == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (val != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (val < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (val >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (val <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (val > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = val;
+
return ret;
}
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index b14b1ba5bf9c..cba8ba9b8da6 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -64,8 +64,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
lock->current_ticket = old_ticket + TICKET_QUANTUM;
}
-void arch_spin_unlock_wait(arch_spinlock_t *lock);
-
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index b9718fb4e74a..9a2c2d605752 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -58,8 +58,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
__insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
}
-void arch_spin_unlock_wait(arch_spinlock_t *lock);
-
void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
/* Grab the "next" ticket number and bump it atomically.
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index bc6656b5708b..bbf81579b1f8 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -67,16 +67,6 @@ static struct pci_ops tile_cfg_ops;
/*
- * We don't need to worry about the alignment of resources.
- */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
-/*
* Open a FD to the hypervisor PCI device.
*
* controller_id is the controller number, config type is 0 or 1 for
@@ -274,6 +264,7 @@ static void fixup_read_and_payload_sizes(void)
*/
int __init pcibios_init(void)
{
+ struct pci_host_bridge *bridge;
int i;
pr_info("PCI: Probing PCI hardware\n");
@@ -306,16 +297,26 @@ int __init pcibios_init(void)
pci_add_resource(&resources, &ioport_resource);
pci_add_resource(&resources, &iomem_resource);
- bus = pci_scan_root_bus(NULL, 0, controller->ops,
- controller, &resources);
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ break;
+
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = controller;
+ bridge->busnr = 0;
+ bridge->ops = controller->ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = tile_map_irq;
+
+ pci_scan_root_bus_bridge(bridge);
+ bus = bridge->bus;
controller->root_bus = bus;
controller->last_busno = bus->busn_res.end;
}
}
- /* Do machine dependent PCI interrupt routing */
- pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
-
/*
* This comes from the generic Linux PCI driver.
*
@@ -369,14 +370,6 @@ int __init pcibios_init(void)
}
subsys_initcall(pcibios_init);
-/*
- * No bus fixups needed.
- */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
- /* Nothing needs to be done. */
-}
-
void pcibios_set_master(struct pci_dev *dev)
{
/* No special bus mastering setup handling. */
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index b554a68eea1b..9aa238ac7b35 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -108,15 +108,6 @@ static struct pci_ops tile_cfg_ops;
/* Mask of CPUs that should receive PCIe interrupts. */
static struct cpumask intr_cpus_map;
-/* We don't need to worry about the alignment of resources. */
-resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size,
- resource_size_t align)
-{
- return res->start;
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
/*
* Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #.
* For now, we simply send interrupts to non-dataplane CPUs.
@@ -669,6 +660,7 @@ int __init pcibios_init(void)
resource_size_t offset;
LIST_HEAD(resources);
int next_busno;
+ struct pci_host_bridge *bridge;
int i;
tile_pci_init();
@@ -881,15 +873,25 @@ int __init pcibios_init(void)
controller->mem_offset);
pci_add_resource(&resources, &controller->io_space);
controller->first_busno = next_busno;
- bus = pci_scan_root_bus(NULL, next_busno, controller->ops,
- controller, &resources);
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ break;
+
+ list_splice_init(&resources, &bridge->windows);
+ bridge->dev.parent = NULL;
+ bridge->sysdata = controller;
+ bridge->busnr = next_busno;
+ bridge->ops = controller->ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = tile_map_irq;
+
+ pci_scan_root_bus_bridge(bridge);
+ bus = bridge->bus;
controller->root_bus = bus;
next_busno = bus->busn_res.end + 1;
}
- /* Do machine dependent PCI interrupt routing */
- pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
-
/*
* This comes from the generic Linux PCI driver.
*
@@ -1038,11 +1040,6 @@ alloc_mem_map_failed:
}
subsys_initcall(pcibios_init);
-/* No bus fixups needed. */
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-}
-
/* Process any "pci=" kernel boot arguments. */
char *__init pcibios_setup(char *str)
{
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 443a70bccc1c..6becb96c60a0 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1200,7 +1200,7 @@ static void __init validate_hv(void)
* We use a struct cpumask for this, so it must be big enough.
*/
if ((smp_height * smp_width) > nr_cpu_ids)
- early_panic("Hypervisor %d x %d grid too big for Linux NR_CPUS %d\n",
+ early_panic("Hypervisor %d x %d grid too big for Linux NR_CPUS %u\n",
smp_height, smp_width, nr_cpu_ids);
#endif
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 076c6cc43113..db9333f2447c 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -62,29 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_trylock);
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- u32 iterations = 0;
- int curr = READ_ONCE(lock->current_ticket);
- int next = READ_ONCE(lock->next_ticket);
-
- /* Return immediately if unlocked. */
- if (next == curr)
- return;
-
- /* Wait until the current locker has released the lock. */
- do {
- delay_backoff(iterations++);
- } while (READ_ONCE(lock->current_ticket) == curr);
-
- /*
- * The TILE architecture doesn't do read speculation; therefore
- * a control dependency guarantees a LOAD->{LOAD,STORE} order.
- */
- barrier();
-}
-EXPORT_SYMBOL(arch_spin_unlock_wait);
-
/*
* The low byte is always reserved to be the marker for a "tns" operation
* since the low bit is set to "1" by a tns. The next seven bits are
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index a4b5b2cbce93..de414c22892f 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -62,28 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_trylock);
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- u32 iterations = 0;
- u32 val = READ_ONCE(lock->lock);
- u32 curr = arch_spin_current(val);
-
- /* Return immediately if unlocked. */
- if (arch_spin_next(val) == curr)
- return;
-
- /* Wait until the current locker has released the lock. */
- do {
- delay_backoff(iterations++);
- } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
-
- /*
- * The TILE architecture doesn't do read speculation; therefore
- * a control dependency guarantees a LOAD->{LOAD,STORE} order.
- */
- barrier();
-}
-EXPORT_SYMBOL(arch_spin_unlock_wait);
/*
* If the read lock fails due to a writer, we retry periodically
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 600a2e9bfee2..344d95619d03 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
}
static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
tlb->mm = mm;
tlb->start = start;
@@ -80,13 +81,19 @@ tlb_flush_mmu(struct mmu_gather *tlb)
tlb_flush_mmu_free(tlb);
}
-/* tlb_finish_mmu
+/* arch_tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
* that were required.
*/
static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
{
+ if (force) {
+ tlb->start = start;
+ tlb->end = end;
+ tlb->need_flush = 1;
+ }
tlb_flush_mmu(tlb);
/* keep the page table cache within bounds */
diff --git a/arch/um/include/asm/unwind.h b/arch/um/include/asm/unwind.h
new file mode 100644
index 000000000000..7ffa5437b761
--- /dev/null
+++ b/arch/um/include/asm/unwind.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_UML_UNWIND_H
+#define _ASM_UML_UNWIND_H
+
+static inline void
+unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+ void *orc, size_t orc_size) {}
+
+#endif /* _ASM_UML_UNWIND_H */
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 1053bca1f8aa..9f26840e41b1 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -101,7 +101,7 @@ void pci_puv3_preinit(void)
writel(readl(PCIBRI_CMD) | PCIBRI_CMD_IO | PCIBRI_CMD_MEM, PCIBRI_CMD);
}
-static int __init pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int pci_puv3_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
if (dev->bus->number == 0) {
#ifdef CONFIG_ARCH_FPGA /* 4 pci slots */
@@ -252,19 +252,46 @@ void pcibios_fixup_bus(struct pci_bus *bus)
}
EXPORT_SYMBOL(pcibios_fixup_bus);
+static struct resource busn_resource = {
+ .name = "PCI busn",
+ .start = 0,
+ .end = 255,
+ .flags = IORESOURCE_BUS,
+};
+
static int __init pci_common_init(void)
{
struct pci_bus *puv3_bus;
+ struct pci_host_bridge *bridge;
+ int ret;
+
+ bridge = pci_alloc_host_bridge(0);
+ if (!bridge)
+ return -ENOMEM;
pci_puv3_preinit();
- puv3_bus = pci_scan_bus(0, &pci_puv3_ops, NULL);
+ pci_add_resource(&bridge->windows, &ioport_resource);
+ pci_add_resource(&bridge->windows, &iomem_resource);
+ pci_add_resource(&bridge->windows, &busn_resource);
+ bridge->sysdata = NULL;
+ bridge->busnr = 0;
+ bridge->ops = &pci_puv3_ops;
+ bridge->swizzle_irq = pci_common_swizzle;
+ bridge->map_irq = pci_puv3_map_irq;
+
+ /* Scan our single hose. */
+ ret = pci_scan_root_bus_bridge(bridge);
+ if (ret) {
+ pci_free_host_bridge(bridge);
+ return;
+ }
+
+ puv3_bus = bridge->bus;
if (!puv3_bus)
panic("PCI: unable to scan bus!");
- pci_fixup_irqs(pci_common_swizzle, pci_puv3_map_irq);
-
pci_bus_size_bridges(puv3_bus);
pci_bus_assign_resources(puv3_bus);
pci_bus_add_devices(puv3_bus);
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 586b786b3edf..0038a2d10a7a 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -8,10 +8,7 @@ obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_XEN) += xen/
# Hyper-V paravirtualization support
-obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/
-
-# lguest paravirtualization support
-obj-$(CONFIG_LGUEST_GUEST) += lguest/
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/
obj-y += realmode/
obj-y += kernel/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 781521b7cf9e..a3e6e6136a47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,8 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64
+ # Causing hangs/crashes, see the commit that added this change for details.
+ select ARCH_HAS_REFCOUNT if BROKEN
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
@@ -73,7 +75,6 @@ config X86
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
- select ARCH_WANT_FRAME_POINTERS
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_THP_SWAP if X86_64
select BUILDTIME_EXTABLE_SORT
@@ -100,6 +101,7 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
@@ -157,17 +159,19 @@ config X86
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MIXED_BREAKPOINTS_REGS
+ select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_OPROFILE
select HAVE_OPTPROBES
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
- select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
@@ -326,6 +330,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
+ default 5 if X86_5LEVEL
default 4 if X86_64
default 3 if X86_PAE
default 2
@@ -424,16 +429,16 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
-config INTEL_RDT_A
- bool "Intel Resource Director Technology Allocation support"
+config INTEL_RDT
+ bool "Intel Resource Director Technology support"
default n
depends on X86 && CPU_SUP_INTEL
select KERNFS
help
- Select to enable resource allocation which is a sub-feature of
- Intel Resource Director Technology(RDT). More information about
- RDT can be found in the Intel x86 Architecture Software
- Developer Manual.
+ Select to enable resource allocation and monitoring which are
+ sub-features of Intel Resource Director Technology(RDT). More
+ information about RDT can be found in the Intel x86
+ Architecture Software Developer Manual.
Say N if unsure.
@@ -777,8 +782,6 @@ config KVM_DEBUG_FS
Statistics are displayed in debugfs filesystem. Enabling this option
may incur significant overhead.
-source "arch/x86/lguest/Kconfig"
-
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
depends on PARAVIRT
@@ -1398,6 +1401,24 @@ config X86_PAE
has the cost of more pagetable lookup overhead, and also
consumes more pagetable space per process.
+config X86_5LEVEL
+ bool "Enable 5-level page tables support"
+ depends on X86_64
+ ---help---
+ 5-level paging enables access to larger address space:
+ upto 128 PiB of virtual address space and 4 PiB of
+ physical address space.
+
+ It will be supported by future Intel CPUs.
+
+ Note: a kernel with this option enabled can only be booted
+ on machines that support the feature.
+
+ See Documentation/x86/x86_64/5level-paging.txt for more
+ information.
+
+ Say N if unsure.
+
config ARCH_PHYS_ADDR_T_64BIT
def_bool y
depends on X86_64 || X86_PAE
@@ -1415,6 +1436,35 @@ config X86_DIRECT_GBPAGES
supports them), so don't confuse the user by printing
that we have them enabled.
+config ARCH_HAS_MEM_ENCRYPT
+ def_bool y
+
+config AMD_MEM_ENCRYPT
+ bool "AMD Secure Memory Encryption (SME) support"
+ depends on X86_64 && CPU_SUP_AMD
+ ---help---
+ Say yes to enable support for the encryption of system memory.
+ This requires an AMD processor that supports Secure Memory
+ Encryption (SME).
+
+config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
+ bool "Activate AMD Secure Memory Encryption (SME) by default"
+ default y
+ depends on AMD_MEM_ENCRYPT
+ ---help---
+ Say yes to have system memory encrypted by default if running on
+ an AMD processor that supports Secure Memory Encryption (SME).
+
+ If set to Y, then the encryption of system memory can be
+ deactivated with the mem_encrypt=off command line option.
+
+ If set to N, then the encryption of system memory can be
+ activated with the mem_encrypt=on command line option.
+
+config ARCH_USE_MEMREMAP_PROT
+ def_bool y
+ depends on AMD_MEM_ENCRYPT
+
# Common NUMA Features
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
@@ -1756,7 +1806,9 @@ config X86_SMAP
config X86_INTEL_MPX
prompt "Intel MPX (Memory Protection Extensions)"
def_bool n
- depends on CPU_SUP_INTEL
+ # Note: only available in 64-bit mode due to VMA flags shortage
+ depends on CPU_SUP_INTEL && X86_64
+ select ARCH_USES_HIGH_VMA_FLAGS
---help---
MPX provides hardware features that can be used in
conjunction with compiler-instrumented code to check
@@ -2271,6 +2323,10 @@ source "kernel/livepatch/Kconfig"
endmenu
+config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
+
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
depends on X86_64 || (X86_32 && HIGHMEM)
@@ -2291,6 +2347,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on X86_64 && HUGETLB_PAGE && MIGRATION
+config ARCH_ENABLE_THP_MIGRATION
+ def_bool y
+ depends on X86_64 && TRANSPARENT_HUGEPAGE
+
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index cd20ca0b4043..71a48a30fc84 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -305,8 +305,6 @@ config DEBUG_ENTRY
Some of these sanity checks may slow down kernel entries and
exits or otherwise impact performance.
- This is currently used to help test NMI code.
-
If unsure, say N.
config DEBUG_NMI_SELFTEST
@@ -358,4 +356,61 @@ config PUNIT_ATOM_DEBUG
The current power state can be read from
/sys/kernel/debug/punit_atom/dev_power_state
+choice
+ prompt "Choose kernel unwinder"
+ default FRAME_POINTER_UNWINDER
+ ---help---
+ This determines which method will be used for unwinding kernel stack
+ traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
+ livepatch, lockdep, and more.
+
+config FRAME_POINTER_UNWINDER
+ bool "Frame pointer unwinder"
+ select FRAME_POINTER
+ ---help---
+ This option enables the frame pointer unwinder for unwinding kernel
+ stack traces.
+
+ The unwinder itself is fast and it uses less RAM than the ORC
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
+ overall performance will degrade by roughly 5-10%.
+
+ This option is recommended if you want to use the livepatch
+ consistency model, as this is currently the only way to get a
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+
+config ORC_UNWINDER
+ bool "ORC unwinder"
+ depends on X86_64
+ select STACK_VALIDATION
+ ---help---
+ This option enables the ORC (Oops Rewind Capability) unwinder for
+ unwinding kernel stack traces. It uses a custom data format which is
+ a simplified version of the DWARF Call Frame Information standard.
+
+ This unwinder is more accurate across interrupt entry frames than the
+ frame pointer unwinder. It also enables a 5-10% performance
+ improvement across the entire kernel compared to frame pointers.
+
+ Enabling this option will increase the kernel's runtime memory usage
+ by roughly 2-4MB, depending on your kernel config.
+
+config GUESS_UNWINDER
+ bool "Guess unwinder"
+ depends on EXPERT
+ ---help---
+ This option enables the "guess" unwinder for unwinding kernel stack
+ traces. It scans the stack and reports every kernel text address it
+ finds. Some of the addresses it reports may be incorrect.
+
+ While this option often produces false positives, it can still be
+ useful in many cases. Unlike the other unwinders, it has no runtime
+ overhead.
+
+endchoice
+
+config FRAME_POINTER
+ depends on !ORC_UNWINDER && !GUESS_UNWINDER
+ bool
+
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1e902f926be3..6276572259c8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -14,9 +14,11 @@ endif
# For gcc stack alignment is specified with -mpreferred-stack-boundary,
# clang has the option -mstack-alignment for that purpose.
ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align_opt := -mpreferred-stack-boundary
-else ifneq ($(call cc-option, -mstack-alignment=4),)
- cc_stack_align_opt := -mstack-alignment
+ cc_stack_align4 := -mpreferred-stack-boundary=2
+ cc_stack_align8 := -mpreferred-stack-boundary=3
+else ifneq ($(call cc-option, -mstack-alignment=16),)
+ cc_stack_align4 := -mstack-alignment=4
+ cc_stack_align8 := -mstack-alignment=8
endif
# How to compile the 16-bit code. Note we always compile for -march=i386;
@@ -36,7 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
-REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align_opt)=2)
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
@@ -76,7 +78,7 @@ ifeq ($(CONFIG_X86_32),y)
# Align the stack to the register width instead of using the default
# alignment of 16 bytes. This reduces stack usage and the number of
# alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=2)
+ KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
@@ -115,7 +117,7 @@ else
# default alignment which keep the stack *mis*aligned.
# Furthermore an alignment to the register width reduces stack usage
# and the number of alignment instructions.
- KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align_opt)=3)
+ KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
@@ -232,9 +234,6 @@ KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS += $(mflags-y)
-KBUILD_AFLAGS += $(mflags-y)
-
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c3e869eaef0c..e56dbc67e837 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -767,7 +767,7 @@ static efi_status_t setup_e820(struct boot_params *params,
m |= (u64)efi->efi_memmap_hi << 32;
#endif
- d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size));
+ d = efi_early_memdesc_ptr(m, efi->efi_memdesc_size, i);
switch (d->type) {
case EFI_RESERVED_TYPE:
case EFI_RUNTIME_SERVICES_CODE:
@@ -997,6 +997,9 @@ struct boot_params *efi_main(struct efi_config *c,
if (boot_params->secure_boot == efi_secureboot_mode_unset)
boot_params->secure_boot = efi_get_secureboot(sys_table);
+ /* Ask the firmware to clear memory on unclean shutdown */
+ efi_enable_reset_attack_mitigation(sys_table);
+
setup_graphics(boot_params);
setup_efi_pci(boot_params);
@@ -1058,7 +1061,7 @@ struct boot_params *efi_main(struct efi_config *c,
desc->s = DESC_TYPE_CODE_DATA;
desc->dpl = 0;
desc->p = 1;
- desc->limit = 0xf;
+ desc->limit1 = 0xf;
desc->avl = 0;
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
@@ -1078,7 +1081,7 @@ struct boot_params *efi_main(struct efi_config *c,
desc->s = DESC_TYPE_CODE_DATA;
desc->dpl = 0;
desc->p = 1;
- desc->limit = 0xf;
+ desc->limit1 = 0xf;
desc->avl = 0;
if (IS_ENABLED(CONFIG_X86_64)) {
desc->l = 1;
@@ -1099,7 +1102,7 @@ struct boot_params *efi_main(struct efi_config *c,
desc->s = DESC_TYPE_CODE_DATA;
desc->dpl = 0;
desc->p = 1;
- desc->limit = 0xf;
+ desc->limit1 = 0xf;
desc->avl = 0;
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
@@ -1116,7 +1119,7 @@ struct boot_params *efi_main(struct efi_config *c,
desc->s = 0;
desc->dpl = 0;
desc->p = 1;
- desc->limit = 0x0;
+ desc->limit1 = 0x0;
desc->avl = 0;
desc->l = 0;
desc->d = 0;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index d85b9625e836..11c68cf53d4e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -61,71 +61,6 @@
__HEAD
ENTRY(startup_32)
-#ifdef CONFIG_EFI_STUB
- jmp preferred_addr
-
- /*
- * We don't need the return address, so set up the stack so
- * efi_main() can find its arguments.
- */
-ENTRY(efi_pe_entry)
- add $0x4, %esp
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- popl %ecx
- movl %ecx, efi32_config(%esi) /* Handle */
- popl %ecx
- movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-
- call make_boot_params
- cmpl $0, %eax
- je fail
- movl %esi, BP_code32_start(%eax)
- popl %ecx
- pushl %eax
- pushl %ecx
- jmp 2f /* Skip efi_config initialization */
-
-ENTRY(efi32_stub_entry)
- add $0x4, %esp
- popl %ecx
- popl %edx
-
- call 1f
-1: popl %esi
- subl $1b, %esi
-
- movl %ecx, efi32_config(%esi) /* Handle */
- movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
-
- /* Relocate efi_config->call() */
- leal efi32_config(%esi), %eax
- add %esi, 40(%eax)
- pushl %eax
-2:
- call efi_main
- cmpl $0, %eax
- movl %eax, %esi
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
- movl BP_code32_start(%esi), %eax
- leal preferred_addr(%eax), %eax
- jmp *%eax
-
-preferred_addr:
-#endif
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -208,6 +143,70 @@ preferred_addr:
jmp *%eax
ENDPROC(startup_32)
+#ifdef CONFIG_EFI_STUB
+/*
+ * We don't need the return address, so set up the stack so efi_main() can find
+ * its arguments.
+ */
+ENTRY(efi_pe_entry)
+ add $0x4, %esp
+
+ call 1f
+1: popl %esi
+ subl $1b, %esi
+
+ popl %ecx
+ movl %ecx, efi32_config(%esi) /* Handle */
+ popl %ecx
+ movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */
+
+ /* Relocate efi_config->call() */
+ leal efi32_config(%esi), %eax
+ add %esi, 40(%eax)
+ pushl %eax
+
+ call make_boot_params
+ cmpl $0, %eax
+ je fail
+ movl %esi, BP_code32_start(%eax)
+ popl %ecx
+ pushl %eax
+ pushl %ecx
+ jmp 2f /* Skip efi_config initialization */
+ENDPROC(efi_pe_entry)
+
+ENTRY(efi32_stub_entry)
+ add $0x4, %esp
+ popl %ecx
+ popl %edx
+
+ call 1f
+1: popl %esi
+ subl $1b, %esi
+
+ movl %ecx, efi32_config(%esi) /* Handle */
+ movl %edx, efi32_config+8(%esi) /* EFI System table pointer */
+
+ /* Relocate efi_config->call() */
+ leal efi32_config(%esi), %eax
+ add %esi, 40(%eax)
+ pushl %eax
+2:
+ call efi_main
+ cmpl $0, %eax
+ movl %eax, %esi
+ jne 2f
+fail:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp fail
+2:
+ movl BP_code32_start(%esi), %eax
+ leal startup_32(%eax), %eax
+ jmp *%eax
+ENDPROC(efi32_stub_entry)
+#endif
+
.text
relocated:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fbf4c32d0b62..b4a5d284391c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -243,65 +243,6 @@ ENTRY(startup_64)
* that maps our entire kernel(text+data+bss+brk), zero page
* and command line.
*/
-#ifdef CONFIG_EFI_STUB
- /*
- * The entry point for the PE/COFF executable is efi_pe_entry, so
- * only legacy boot loaders will execute this jmp.
- */
- jmp preferred_addr
-
-ENTRY(efi_pe_entry)
- movq %rcx, efi64_config(%rip) /* Handle */
- movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
- leaq efi64_config(%rip), %rax
- movq %rax, efi_config(%rip)
-
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- addq %rbp, efi64_config+40(%rip)
-
- movq %rax, %rdi
- call make_boot_params
- cmpq $0,%rax
- je fail
- mov %rax, %rsi
- leaq startup_32(%rip), %rax
- movl %eax, BP_code32_start(%rsi)
- jmp 2f /* Skip the relocation */
-
-handover_entry:
- call 1f
-1: popq %rbp
- subq $1b, %rbp
-
- /*
- * Relocate efi_config->call().
- */
- movq efi_config(%rip), %rax
- addq %rbp, 40(%rax)
-2:
- movq efi_config(%rip), %rdi
- call efi_main
- movq %rax,%rsi
- cmpq $0,%rax
- jne 2f
-fail:
- /* EFI init failed, so hang. */
- hlt
- jmp fail
-2:
- movl BP_code32_start(%esi), %eax
- leaq preferred_addr(%rax), %rax
- jmp *%rax
-
-preferred_addr:
-#endif
/* Setup data segments. */
xorl %eax, %eax
@@ -413,6 +354,59 @@ lvl5:
jmp *%rax
#ifdef CONFIG_EFI_STUB
+
+/* The entry point for the PE/COFF executable is efi_pe_entry. */
+ENTRY(efi_pe_entry)
+ movq %rcx, efi64_config(%rip) /* Handle */
+ movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
+
+ leaq efi64_config(%rip), %rax
+ movq %rax, efi_config(%rip)
+
+ call 1f
+1: popq %rbp
+ subq $1b, %rbp
+
+ /*
+ * Relocate efi_config->call().
+ */
+ addq %rbp, efi64_config+40(%rip)
+
+ movq %rax, %rdi
+ call make_boot_params
+ cmpq $0,%rax
+ je fail
+ mov %rax, %rsi
+ leaq startup_32(%rip), %rax
+ movl %eax, BP_code32_start(%rsi)
+ jmp 2f /* Skip the relocation */
+
+handover_entry:
+ call 1f
+1: popq %rbp
+ subq $1b, %rbp
+
+ /*
+ * Relocate efi_config->call().
+ */
+ movq efi_config(%rip), %rax
+ addq %rbp, 40(%rax)
+2:
+ movq efi_config(%rip), %rdi
+ call efi_main
+ movq %rax,%rsi
+ cmpq $0,%rax
+ jne 2f
+fail:
+ /* EFI init failed, so hang. */
+ hlt
+ jmp fail
+2:
+ movl BP_code32_start(%esi), %eax
+ leaq startup_64(%rax), %rax
+ jmp *%rax
+ENDPROC(efi_pe_entry)
+
.org 0x390
ENTRY(efi64_stub_entry)
movq %rdi, efi64_config(%rip) /* Handle */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 91f27ab970ef..17818ba6906f 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -37,7 +37,9 @@
#include <linux/uts.h>
#include <linux/utsname.h>
#include <linux/ctype.h>
+#include <linux/efi.h>
#include <generated/utsrelease.h>
+#include <asm/efi.h>
/* Macros used by the included decompressor code below. */
#define STATIC
@@ -479,35 +481,31 @@ static unsigned long slots_fetch_random(void)
return 0;
}
-static void process_e820_entry(struct boot_e820_entry *entry,
+static void process_mem_region(struct mem_vector *entry,
unsigned long minimum,
unsigned long image_size)
{
struct mem_vector region, overlap;
struct slot_area slot_area;
unsigned long start_orig, end;
- struct boot_e820_entry cur_entry;
-
- /* Skip non-RAM entries. */
- if (entry->type != E820_TYPE_RAM)
- return;
+ struct mem_vector cur_entry;
/* On 32-bit, ignore entries entirely above our maximum. */
- if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
+ if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
return;
/* Ignore entries entirely below our minimum. */
- if (entry->addr + entry->size < minimum)
+ if (entry->start + entry->size < minimum)
return;
/* Ignore entries above memory limit */
- end = min(entry->size + entry->addr, mem_limit);
- if (entry->addr >= end)
+ end = min(entry->size + entry->start, mem_limit);
+ if (entry->start >= end)
return;
- cur_entry.addr = entry->addr;
- cur_entry.size = end - entry->addr;
+ cur_entry.start = entry->start;
+ cur_entry.size = end - entry->start;
- region.start = cur_entry.addr;
+ region.start = cur_entry.start;
region.size = cur_entry.size;
/* Give up if slot area array is full. */
@@ -521,8 +519,8 @@ static void process_e820_entry(struct boot_e820_entry *entry,
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
- /* Did we raise the address above this e820 region? */
- if (region.start > cur_entry.addr + cur_entry.size)
+ /* Did we raise the address above the passed in memory entry? */
+ if (region.start > cur_entry.start + cur_entry.size)
return;
/* Reduce size by any delta from the original address. */
@@ -562,31 +560,126 @@ static void process_e820_entry(struct boot_e820_entry *entry,
}
}
-static unsigned long find_random_phys_addr(unsigned long minimum,
- unsigned long image_size)
+#ifdef CONFIG_EFI
+/*
+ * Returns true if mirror region found (and must have been processed
+ * for slots adding)
+ */
+static bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
{
+ struct efi_info *e = &boot_params->efi_info;
+ bool efi_mirror_found = false;
+ struct mem_vector region;
+ efi_memory_desc_t *md;
+ unsigned long pmap;
+ char *signature;
+ u32 nr_desc;
int i;
- unsigned long addr;
- /* Check if we had too many memmaps. */
- if (memmap_too_large) {
- debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n");
- return 0;
+ signature = (char *)&e->efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4))
+ return false;
+
+#ifdef CONFIG_X86_32
+ /* Can't handle data above 4GB at this time */
+ if (e->efi_memmap_hi) {
+ warn("EFI memmap is above 4GB, can't be handled now on x86_32. EFI should be disabled.\n");
+ return false;
}
+ pmap = e->efi_memmap;
+#else
+ pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
+#endif
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ nr_desc = e->efi_memmap_size / e->efi_memdesc_size;
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+ if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
+ efi_mirror_found = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < nr_desc; i++) {
+ md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
+
+ /*
+ * Here we are more conservative in picking free memory than
+ * the EFI spec allows:
+ *
+ * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
+ * free memory and thus available to place the kernel image into,
+ * but in practice there's firmware where using that memory leads
+ * to crashes.
+ *
+ * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
+ */
+ if (md->type != EFI_CONVENTIONAL_MEMORY)
+ continue;
+
+ if (efi_mirror_found &&
+ !(md->attribute & EFI_MEMORY_MORE_RELIABLE))
+ continue;
+
+ region.start = md->phys_addr;
+ region.size = md->num_pages << EFI_PAGE_SHIFT;
+ process_mem_region(&region, minimum, image_size);
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ break;
+ }
+ }
+ return true;
+}
+#else
+static inline bool
+process_efi_entries(unsigned long minimum, unsigned long image_size)
+{
+ return false;
+}
+#endif
+
+static void process_e820_entries(unsigned long minimum,
+ unsigned long image_size)
+{
+ int i;
+ struct mem_vector region;
+ struct boot_e820_entry *entry;
/* Verify potential e820 positions, appending to slots list. */
for (i = 0; i < boot_params->e820_entries; i++) {
- process_e820_entry(&boot_params->e820_table[i], minimum,
- image_size);
+ entry = &boot_params->e820_table[i];
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+ region.start = entry->addr;
+ region.size = entry->size;
+ process_mem_region(&region, minimum, image_size);
if (slot_area_index == MAX_SLOT_AREA) {
debug_putstr("Aborted e820 scan (slot_areas full)!\n");
break;
}
}
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ /* Check if we had too many memmaps. */
+ if (memmap_too_large) {
+ debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
+ return 0;
+ }
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+ if (process_efi_entries(minimum, image_size))
+ return slots_fetch_random();
+ process_e820_entries(minimum, image_size);
return slots_fetch_random();
}
@@ -645,7 +738,7 @@ void choose_random_location(unsigned long input,
*/
min_addr = min(*output, 512UL << 20);
- /* Walk e820 and find a random address. */
+ /* Walk available memory entries to find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);
if (!random_addr) {
warn("Physical KASLR disabled: no suitable memory region!");
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a0838ab929f2..c14217cd0155 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -116,8 +116,7 @@ void __putstr(const char *s)
}
}
- if (boot_params->screen_info.orig_video_mode == 0 &&
- lines == 0 && cols == 0)
+ if (lines == 0 || cols == 0)
return;
x = boot_params->screen_info.orig_x;
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 28029be47fbb..f1aa43854bed 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -15,6 +15,13 @@
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)((unsigned long)(x)))
+/*
+ * The pgtable.h and mm/ident_map.c includes make use of the SME related
+ * information which is not used in the compressed image support. Un-define
+ * the SME support to avoid any compile and link errors.
+ */
+#undef CONFIG_AMD_MEM_ENCRYPT
+
#include "misc.h"
/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2ed8f0c25def..1bb08ecffd24 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -520,8 +520,14 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
# the description in lib/decompressor_xxx.c for specific information.
#
# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
+#
+# LZ4 is even worse: data that cannot be further compressed grows by 0.4%,
+# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as
+# the size-dependent part now grows so fast.
+#
+# extra_bytes = (uncompressed_size >> 8) + 65536
-#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128)
+#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 65536)
#if ZO_z_output_len > ZO_z_input_len
# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \
ZO_z_input_len)
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 4b429df40d7a..550cd5012b73 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,3 +1,5 @@
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
+CONFIG_GUESS_UNWINDER=y
+# CONFIG_FRAME_POINTER_UNWINDER is not set
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4a55cdcdc008..5c15d6b57329 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
unsigned int nbytes = walk->nbytes;
aesni_enc(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
+ crypto_xor_cpy(dst, keystream, src, nbytes);
+
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 17c05531dfd1..f9eca34301e2 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
+ crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 8648158f3916..dbea6020ffe7 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
unsigned int nbytes = walk->nbytes;
__cast5_encrypt(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
+ crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
}
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index d6fc59aaaadf..30c0a37f4882 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
unsigned int nbytes = walk->nbytes;
des3_ede_enc_blk(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
+ crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
}
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792db15ef..1eab79c9ac48 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@
.set T1, REG_T1
.endm
-#define K_BASE %r8
#define HASH_PTR %r9
+#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13
-#define BUFFER_END %r11
#define PRECALC_BUF %r14
#define WK_BUF %r15
@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds
*/
- vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1)
- vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP
.elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7)
@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
vpsrld $30, WY, WY
vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@
.endm
+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+ mov \a, RTA
+ add $\d, RTA
+ cmp $\c, \b
+ cmovge RTA, \a
+.endm
+
/*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/
@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF
# Precalc WK for first 2 blocks
- PRECALC_OFFSET = 0
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
- PRECALC_OFFSET = 128
+
+ /* Go to next block if needed */
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF
.align 32
@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE
*/
- cmp K_BASE, BUFFER_PTR
- jne _begin
+ test BLOCKS_CTR, BLOCKS_CTR
+ jnz _begin
.align 32
jmp _end
.align 32
@@ -512,10 +524,10 @@ _loop0:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
- cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
-
+ /* Update Counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/*
* rounds
* 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E
- cmp K_BASE, BUFFER_PTR /* is current block the last one? */
- je _loop
+ test BLOCKS_CTR, BLOCKS_CTR
+ jz _loop
mov TB, B
@@ -575,10 +587,10 @@ _loop2:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
-
- cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+ /* update counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
jmp _loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:
avx2_zeroupper
- lea K_XMM_AR(%rip), K_BASE
-
+ /* Setup initial values */
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
- lea 64(BUF), BUFFER_PTR2
-
- shl $6, CNT /* mul by 64 */
- add BUF, CNT
- add $64, CNT
- mov CNT, BUFFER_END
- cmp BUFFER_END, BUFFER_PTR2
- cmovae K_BASE, BUFFER_PTR2
+ mov BUF, BUFFER_PTR2
+ mov CNT, BLOCKS_CTR
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index f960a043cdeb..fc61739150e7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
static bool avx2_usable(void)
{
- if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+ if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
&& boot_cpu_has(X86_FEATURE_BMI1)
&& boot_cpu_has(X86_FEATURE_BMI2))
return true;
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 9976fcecd17e..af28a8a24366 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -2,7 +2,6 @@
# Makefile for the x86 low level entry code
#
-OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 05ed3d393da7..640aafebdc00 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,4 +1,5 @@
#include <linux/jump_label.h>
+#include <asm/unwind_hints.h>
/*
@@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with
movq %rdx, 12*8+\offset(%rsp)
movq %rsi, 13*8+\offset(%rsp)
movq %rdi, 14*8+\offset(%rsp)
+ UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro SAVE_C_REGS offset=0
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
@@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with
movq %r12, 3*8+\offset(%rsp)
movq %rbp, 4*8+\offset(%rsp)
movq %rbx, 5*8+\offset(%rsp)
+ UNWIND_HINT_REGS offset=\offset
.endm
.macro RESTORE_EXTRA_REGS offset=0
@@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with
movq 3*8+\offset(%rsp), %r12
movq 4*8+\offset(%rsp), %rbp
movq 5*8+\offset(%rsp), %rbx
+ UNWIND_HINT_REGS offset=\offset extra=0
.endm
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
@@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
.endif
movq 13*8(%rsp), %rsi
movq 14*8(%rsp), %rdi
+ UNWIND_HINT_IRET_REGS offset=16*8
.endm
.macro RESTORE_C_REGS
RESTORE_C_REGS_HELPER 1,1,1,1,1
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index cdefcfdd9e63..03505ffbe1b6 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -23,6 +23,7 @@
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
#include <linux/livepatch.h>
+#include <linux/syscalls.h>
#include <asm/desc.h>
#include <asm/traps.h>
@@ -183,6 +184,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
struct thread_info *ti = current_thread_info();
u32 cached_flags;
+ addr_limit_user_check();
+
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
local_irq_disable();
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 48ef7bb32c42..8a13d468635a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -673,16 +673,8 @@ ENTRY(name) \
jmp ret_from_intr; \
ENDPROC(name)
-
-#ifdef CONFIG_TRACING
-# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
-#else
-# define TRACE_BUILD_INTERRUPT(name, nr)
-#endif
-
#define BUILD_INTERRUPT(name, nr) \
BUILD_INTERRUPT3(name, nr, smp_##name); \
- TRACE_BUILD_INTERRUPT(name, nr)
/* The include is where all of the SMP etc. interrupts come from */
#include <asm/entry_arch.h>
@@ -880,25 +872,17 @@ ENTRY(xen_failsafe_callback)
ENDPROC(xen_failsafe_callback)
BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- xen_evtchn_do_upcall)
+ xen_evtchn_do_upcall)
#endif /* CONFIG_XEN */
#if IS_ENABLED(CONFIG_HYPERV)
BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
- hyperv_vector_handler)
+ hyperv_vector_handler)
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_TRACING
-ENTRY(trace_page_fault)
- ASM_CLAC
- pushl $trace_do_page_fault
- jmp common_exception
-END(trace_page_fault)
-#endif
-
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d271fb79248f..49167258d587 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -36,6 +36,7 @@
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/export.h>
+#include <asm/frame.h>
#include <linux/err.h>
.code64
@@ -43,9 +44,10 @@
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
+ UNWIND_HINT_EMPTY
swapgs
sysretq
-ENDPROC(native_usergs_sysret64)
+END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
.macro TRACE_IRQS_IRETQ
@@ -134,19 +136,14 @@ ENDPROC(native_usergs_sysret64)
*/
ENTRY(entry_SYSCALL_64)
+ UNWIND_HINT_EMPTY
/*
* Interrupts are off on entry.
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
* it is too small to ever cause noticeable irq latency.
*/
- SWAPGS_UNSAFE_STACK
- /*
- * A hypervisor implementation might want to use a label
- * after the swapgs, so that it can do the swapgs
- * for the guest and jump here on syscall.
- */
-GLOBAL(entry_SYSCALL_64_after_swapgs)
+ swapgs
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -158,6 +155,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -169,6 +167,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r10 /* pt_regs->r10 */
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
+ UNWIND_HINT_REGS extra=0
/*
* If we need to do entry work or if we guess we'll need to do
@@ -223,6 +222,7 @@ entry_SYSCALL_64_fastpath:
movq EFLAGS(%rsp), %r11
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
+ UNWIND_HINT_EMPTY
USERGS_SYSRET64
1:
@@ -316,6 +316,7 @@ syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
+ UNWIND_HINT_EMPTY
USERGS_SYSRET64
opportunistic_sysret_failed:
@@ -343,6 +344,7 @@ ENTRY(stub_ptregs_64)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
popq %rax
+ UNWIND_HINT_REGS extra=0
jmp entry_SYSCALL64_slow_path
1:
@@ -351,6 +353,7 @@ END(stub_ptregs_64)
.macro ptregs_stub func
ENTRY(ptregs_\func)
+ UNWIND_HINT_FUNC
leaq \func(%rip), %rax
jmp stub_ptregs_64
END(ptregs_\func)
@@ -367,6 +370,7 @@ END(ptregs_\func)
* %rsi: next task
*/
ENTRY(__switch_to_asm)
+ UNWIND_HINT_FUNC
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -406,6 +410,7 @@ END(__switch_to_asm)
* r12: kernel thread arg
*/
ENTRY(ret_from_fork)
+ UNWIND_HINT_EMPTY
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -413,6 +418,7 @@ ENTRY(ret_from_fork)
jnz 1f /* kernel threads are uncommon */
2:
+ UNWIND_HINT_REGS
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
@@ -440,13 +446,102 @@ END(ret_from_fork)
ENTRY(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ UNWIND_HINT_IRET_REGS
pushq $(~vector+0x80) /* Note: always in signed byte range */
- vector=vector+1
jmp common_interrupt
.align 8
+ vector=vector+1
.endr
END(irq_entries_start)
+.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+#ifdef CONFIG_DEBUG_ENTRY
+ pushfq
+ testl $X86_EFLAGS_IF, (%rsp)
+ jz .Lokay_\@
+ ud2
+.Lokay_\@:
+ addq $8, %rsp
+#endif
+.endm
+
+/*
+ * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers
+ * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
+ * Requires kernel GSBASE.
+ *
+ * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
+ */
+.macro ENTER_IRQ_STACK regs=1 old_rsp
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
+ movq %rsp, \old_rsp
+
+ .if \regs
+ UNWIND_HINT_REGS base=\old_rsp
+ .endif
+
+ incl PER_CPU_VAR(irq_count)
+ jnz .Lirq_stack_push_old_rsp_\@
+
+ /*
+ * Right now, if we just incremented irq_count to zero, we've
+ * claimed the IRQ stack but we haven't switched to it yet.
+ *
+ * If anything is added that can interrupt us here without using IST,
+ * it must be *extremely* careful to limit its stack usage. This
+ * could include kprobes and a hypothetical future IST-less #DB
+ * handler.
+ *
+ * The OOPS unwinder relies on the word at the top of the IRQ
+ * stack linking back to the previous RSP for the entire time we're
+ * on the IRQ stack. For this to work reliably, we need to write
+ * it before we actually move ourselves to the IRQ stack.
+ */
+
+ movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+ movq PER_CPU_VAR(irq_stack_ptr), %rsp
+
+#ifdef CONFIG_DEBUG_ENTRY
+ /*
+ * If the first movq above becomes wrong due to IRQ stack layout
+ * changes, the only way we'll notice is if we try to unwind right
+ * here. Assert that we set up the stack right to catch this type
+ * of bug quickly.
+ */
+ cmpq -8(%rsp), \old_rsp
+ je .Lirq_stack_okay\@
+ ud2
+ .Lirq_stack_okay\@:
+#endif
+
+.Lirq_stack_push_old_rsp_\@:
+ pushq \old_rsp
+
+ .if \regs
+ UNWIND_HINT_REGS indirect=1
+ .endif
+.endm
+
+/*
+ * Undoes ENTER_IRQ_STACK.
+ */
+.macro LEAVE_IRQ_STACK regs=1
+ DEBUG_ENTRY_ASSERT_IRQS_OFF
+ /* We need to be off the IRQ stack before decrementing irq_count. */
+ popq %rsp
+
+ .if \regs
+ UNWIND_HINT_REGS
+ .endif
+
+ /*
+ * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
+ * the irq stack but we're not on it.
+ */
+
+ decl PER_CPU_VAR(irq_count)
+.endm
+
/*
* Interrupt entry/exit.
*
@@ -485,17 +580,7 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1:
- /*
- * Save previous stack pointer, optionally switch to interrupt stack.
- * irq_count is used to check if a CPU is already on an interrupt stack
- * or not. While this is essentially redundant with preempt_count it is
- * a little cheaper to use a separate counter in the PDA (short of
- * moving irq_enter into assembly, which would be too much work)
- */
- movq %rsp, %rdi
- incl PER_CPU_VAR(irq_count)
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rdi
+ ENTER_IRQ_STACK old_rsp=%rdi
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
@@ -515,10 +600,8 @@ common_interrupt:
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- decl PER_CPU_VAR(irq_count)
- /* Restore saved previous stack */
- popq %rsp
+ LEAVE_IRQ_STACK
testb $3, CS(%rsp)
jz retint_kernel
@@ -561,6 +644,7 @@ restore_c_regs_and_iret:
INTERRUPT_RETURN
ENTRY(native_iret)
+ UNWIND_HINT_IRET_REGS
/*
* Are we returning to a stack segment from the LDT? Note: in
* 64-bit mode SS:RSP on the exception stack is always valid.
@@ -633,6 +717,7 @@ native_irq_return_ldt:
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
movq %rax, %rsp
+ UNWIND_HINT_IRET_REGS offset=8
/*
* At this point, we cannot write to the stack any more, but we can
@@ -654,6 +739,7 @@ END(common_interrupt)
*/
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
+ UNWIND_HINT_IRET_REGS
ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
@@ -662,31 +748,13 @@ ENTRY(\sym)
END(\sym)
.endm
-#ifdef CONFIG_TRACING
-#define trace(sym) trace_##sym
-#define smp_trace(sym) smp_trace_##sym
-
-.macro trace_apicinterrupt num sym
-apicinterrupt3 \num trace(\sym) smp_trace(\sym)
-.endm
-#else
-.macro trace_apicinterrupt num sym do_sym
-.endm
-#endif
-
/* Make sure APIC interrupt handlers end up in the irqentry section: */
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
-# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
-# define POP_SECTION_IRQENTRY .popsection
-#else
-# define PUSH_SECTION_IRQENTRY
-# define POP_SECTION_IRQENTRY
-#endif
+#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+#define POP_SECTION_IRQENTRY .popsection
.macro apicinterrupt num sym do_sym
PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
-trace_apicinterrupt \num \sym
POP_SECTION_IRQENTRY
.endm
@@ -740,13 +808,14 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
+ UNWIND_HINT_IRET_REGS offset=8
+
/* Sanity check */
.if \shift_ist != -1 && \paranoid == 0
.error "using shift_ist requires paranoid=1"
.endif
ASM_CLAC
- PARAVIRT_ADJUST_EXCEPTION_FRAME
.ifeq \has_error_code
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -763,6 +832,7 @@ ENTRY(\sym)
.else
call error_entry
.endif
+ UNWIND_HINT_REGS
/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
.if \paranoid
@@ -829,17 +899,6 @@ ENTRY(\sym)
END(\sym)
.endm
-#ifdef CONFIG_TRACING
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#else
-.macro trace_idtentry sym do_sym has_error_code:req
-idtentry \sym \do_sym has_error_code=\has_error_code
-.endm
-#endif
-
idtentry divide_error do_divide_error has_error_code=0
idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
@@ -860,6 +919,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
* edi: new selector
*/
ENTRY(native_load_gs_index)
+ FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
@@ -868,8 +928,9 @@ ENTRY(native_load_gs_index)
2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
popfq
+ FRAME_END
ret
-END(native_load_gs_index)
+ENDPROC(native_load_gs_index)
EXPORT_SYMBOL(native_load_gs_index)
_ASM_EXTABLE(.Lgs_change, bad_gs)
@@ -892,17 +953,15 @@ bad_gs:
ENTRY(do_softirq_own_stack)
pushq %rbp
mov %rsp, %rbp
- incl PER_CPU_VAR(irq_count)
- cmove PER_CPU_VAR(irq_stack_ptr), %rsp
- push %rbp /* frame pointer backlink */
+ ENTER_IRQ_STACK regs=0 old_rsp=%r11
call __do_softirq
+ LEAVE_IRQ_STACK regs=0
leaveq
- decl PER_CPU_VAR(irq_count)
ret
-END(do_softirq_own_stack)
+ENDPROC(do_softirq_own_stack)
#ifdef CONFIG_XEN
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
/*
* A note on the "critical region" in our callback handler.
@@ -923,14 +982,14 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
* see the correct pointer to the pt_regs
*/
+ UNWIND_HINT_FUNC
movq %rdi, %rsp /* we don't return, adjust the stack frame */
-11: incl PER_CPU_VAR(irq_count)
- movq %rsp, %rbp
- cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp
- pushq %rbp /* frame pointer backlink */
+ UNWIND_HINT_REGS
+
+ ENTER_IRQ_STACK old_rsp=%r10
call xen_evtchn_do_upcall
- popq %rsp
- decl PER_CPU_VAR(irq_count)
+ LEAVE_IRQ_STACK
+
#ifndef CONFIG_PREEMPT
call xen_maybe_preempt_hcall
#endif
@@ -951,6 +1010,7 @@ END(xen_do_hypervisor_callback)
* with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
+ UNWIND_HINT_EMPTY
movl %ds, %ecx
cmpw %cx, 0x10(%rsp)
jne 1f
@@ -968,13 +1028,13 @@ ENTRY(xen_failsafe_callback)
movq 8(%rsp), %r11
addq $0x30, %rsp
pushq $0 /* RIP */
- pushq %r11
- pushq %rcx
+ UNWIND_HINT_IRET_REGS offset=8
jmp general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp), %rcx
movq 8(%rsp), %r11
addq $0x30, %rsp
+ UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
@@ -998,13 +1058,12 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
-idtentry xen_debug do_debug has_error_code=0
-idtentry xen_int3 do_int3 has_error_code=0
-idtentry xen_stack_segment do_stack_segment has_error_code=1
+idtentry xendebug do_debug has_error_code=0
+idtentry xenint3 do_int3 has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
-trace_idtentry page_fault do_page_fault has_error_code=1
+idtentry page_fault do_page_fault has_error_code=1
#ifdef CONFIG_KVM_GUEST
idtentry async_page_fault do_async_page_fault has_error_code=1
@@ -1020,6 +1079,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
+ UNWIND_HINT_FUNC
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
@@ -1047,6 +1107,7 @@ END(paranoid_entry)
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
*/
ENTRY(paranoid_exit)
+ UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
testl %ebx, %ebx /* swapgs needed? */
@@ -1068,6 +1129,7 @@ END(paranoid_exit)
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
+ UNWIND_HINT_FUNC
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
@@ -1152,6 +1214,7 @@ END(error_entry)
* 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
*/
ENTRY(error_exit)
+ UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
testl %ebx, %ebx
@@ -1160,19 +1223,9 @@ ENTRY(error_exit)
END(error_exit)
/* Runs on exception stack */
+/* XXX: broken on Xen PV */
ENTRY(nmi)
- /*
- * Fix up the exception frame if we're on Xen.
- * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
- * one value to the stack on native, so it may clobber the rdx
- * scratch slot, but it won't clobber any of the important
- * slots past it.
- *
- * Xen is a different story, because the Xen frame itself overlaps
- * the "NMI executing" variable.
- */
- PARAVIRT_ADJUST_EXCEPTION_FRAME
-
+ UNWIND_HINT_IRET_REGS
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
@@ -1211,6 +1264,8 @@ ENTRY(nmi)
* other IST entries.
*/
+ ASM_CLAC
+
/* Use %rdx as our temp variable throughout */
pushq %rdx
@@ -1232,11 +1287,13 @@ ENTRY(nmi)
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT_IRET_REGS base=%rdx offset=8
pushq 5*8(%rdx) /* pt_regs->ss */
pushq 4*8(%rdx) /* pt_regs->rsp */
pushq 3*8(%rdx) /* pt_regs->flags */
pushq 2*8(%rdx) /* pt_regs->cs */
pushq 1*8(%rdx) /* pt_regs->rip */
+ UNWIND_HINT_IRET_REGS
pushq $-1 /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -1253,6 +1310,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
ENCODE_FRAME_POINTER
/*
@@ -1407,6 +1465,7 @@ first_nmi:
.rept 5
pushq 11*8(%rsp)
.endr
+ UNWIND_HINT_IRET_REGS
/* Everything up to here is safe from nested NMIs */
@@ -1422,6 +1481,7 @@ first_nmi:
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
INTERRUPT_RETURN /* continues at repeat_nmi below */
+ UNWIND_HINT_IRET_REGS
1:
#endif
@@ -1471,6 +1531,7 @@ end_repeat_nmi:
* exceptions might do.
*/
call paranoid_entry
+ UNWIND_HINT_REGS
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp, %rdi
@@ -1508,17 +1569,19 @@ nmi_restore:
END(nmi)
ENTRY(ignore_sysret)
+ UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
ENTRY(rewind_stack_do_exit)
+ UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
- leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+ leaq -PTREGS_SIZE(%rax), %rsp
+ UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
call do_exit
-1: jmp 1b
END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..e26c25ca7756 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -183,21 +183,20 @@ ENDPROC(entry_SYSENTER_compat)
*/
ENTRY(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
+ swapgs
/* Stash user ESP and switch to the kernel stack. */
movl %esp, %r8d
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- /* Zero-extending 32-bit regs, do not remove */
- movl %eax, %eax
-
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
pushq %r8 /* pt_regs->sp */
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
+GLOBAL(entry_SYSCALL_compat_after_hwframe)
+ movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -294,7 +293,6 @@ ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
*/
- PARAVIRT_ADJUST_EXCEPTION_FRAME
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS
@@ -342,8 +340,7 @@ ENTRY(entry_INT80_compat)
jmp restore_regs_and_iret
END(entry_INT80_compat)
- ALIGN
-GLOBAL(stub32_clone)
+ENTRY(stub32_clone)
/*
* The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
* The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
@@ -353,3 +350,4 @@ GLOBAL(stub32_clone)
*/
xchg %r8, %rcx
jmp sys_clone
+ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 726355ce8497..1911310959f8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -351,7 +351,7 @@ static void vgetcpu_cpu_init(void *arg)
* and 8 bits for the node)
*/
d.limit0 = cpu | ((node & 0xf) << 12);
- d.limit = node >> 4;
+ d.limit1 = node >> 4;
d.type = 5; /* RO data, expand down, accessed */
d.dpl = 3; /* Visible to user code */
d.s = 1; /* Not a system segment */
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index ad44af0dd667..f5cbbba99283 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -400,11 +400,24 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
if (amd_uncore_llc) {
unsigned int apicid = cpu_data(cpu).apicid;
- unsigned int nshared;
+ unsigned int nshared, subleaf, prev_eax = 0;
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
- nshared = ((eax >> 14) & 0xfff) + 1;
+ /*
+ * Iterate over Cache Topology Definition leaves until no
+ * more cache descriptions are available.
+ */
+ for (subleaf = 0; subleaf < 5; subleaf++) {
+ cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
+
+ /* EAX[0:4] gives type of cache */
+ if (!(eax & 0x1f))
+ break;
+
+ prev_eax = eax;
+ }
+ nshared = ((prev_eax >> 14) & 0xfff) + 1;
+
uncore->id = apicid - (apicid % nshared);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
@@ -555,7 +568,7 @@ static int __init amd_uncore_init(void)
ret = 0;
}
- if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_llc) {
ret = -ENOMEM;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e3db8f642a7..80534d3c2480 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -487,22 +487,28 @@ static inline int precise_br_compat(struct perf_event *event)
return m == b;
}
-int x86_pmu_hw_config(struct perf_event *event)
+int x86_pmu_max_precise(void)
{
- if (event->attr.precise_ip) {
- int precise = 0;
+ int precise = 0;
- /* Support for constant skid */
- if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+ /* Support for constant skid */
+ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+ precise++;
+
+ /* Support for IP fixup */
+ if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
- /* Support for IP fixup */
- if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
- precise++;
+ if (x86_pmu.pebs_prec_dist)
+ precise++;
+ }
+ return precise;
+}
- if (x86_pmu.pebs_prec_dist)
- precise++;
- }
+int x86_pmu_hw_config(struct perf_event *event)
+{
+ if (event->attr.precise_ip) {
+ int precise = x86_pmu_max_precise();
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
@@ -1751,6 +1757,7 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
}
static struct attribute_group x86_pmu_attr_group;
+static struct attribute_group x86_pmu_caps_group;
static int __init init_hw_perf_events(void)
{
@@ -1799,6 +1806,14 @@ static int __init init_hw_perf_events(void)
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+ if (x86_pmu.caps_attrs) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
+ if (!WARN_ON(!tmp))
+ x86_pmu_caps_group.attrs = tmp;
+ }
+
if (x86_pmu.event_attrs)
x86_pmu_events_group.attrs = x86_pmu.event_attrs;
@@ -2114,7 +2129,7 @@ static void refresh_pce(void *ignored)
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
-static void x86_pmu_event_mapped(struct perf_event *event)
+static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
@@ -2129,22 +2144,20 @@ static void x86_pmu_event_mapped(struct perf_event *event)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+ lockdep_assert_held_exclusive(&mm->mmap_sem);
- if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
-static void x86_pmu_event_unmapped(struct perf_event *event)
+static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
- if (!current->mm)
- return;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
- if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
- on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+ if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static int x86_pmu_event_idx(struct perf_event *event)
@@ -2215,10 +2228,30 @@ static struct attribute_group x86_pmu_attr_group = {
.attrs = x86_pmu_attrs,
};
+static ssize_t max_precise_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
+}
+
+static DEVICE_ATTR_RO(max_precise);
+
+static struct attribute *x86_pmu_caps_attrs[] = {
+ &dev_attr_max_precise.attr,
+ NULL
+};
+
+static struct attribute_group x86_pmu_caps_group = {
+ .name = "caps",
+ .attrs = x86_pmu_caps_attrs,
+};
+
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
&x86_pmu_format_group,
&x86_pmu_events_group,
+ &x86_pmu_caps_group,
NULL,
};
@@ -2337,12 +2370,9 @@ static unsigned long get_segment_base(unsigned int segment)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
- if (idx > LDT_ENTRIES)
- return 0;
-
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
- if (!ldt || idx > ldt->nr_entries)
+ if (!ldt || idx >= ldt->nr_entries)
return 0;
desc = &ldt->entries[idx];
@@ -2350,7 +2380,7 @@ static unsigned long get_segment_base(unsigned int segment)
return 0;
#endif
} else {
- if (idx > GDT_ENTRIES)
+ if (idx >= GDT_ENTRIES)
return 0;
desc = raw_cpu_ptr(gdt_page.gdt) + idx;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 06c2baa51814..e9d8520a801a 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 8ae8c5ce3a1f..16076eb34699 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -69,7 +69,7 @@ struct bts_buffer {
struct bts_phys buf[0];
};
-struct pmu bts_pmu;
+static struct pmu bts_pmu;
static size_t buf_size(struct page *page)
{
@@ -268,7 +268,7 @@ static void bts_event_start(struct perf_event *event, int flags)
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
- event->hw.itrace_started = 1;
+ perf_event_itrace_started(event);
event->hw.state = 0;
__bts_event_start(event);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 98b0f0729527..829e89cfcee2 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3415,12 +3415,26 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
+ NULL,
+};
+
+static struct attribute *hsw_format_attr[] = {
&format_attr_in_tx.attr,
&format_attr_in_tx_cp.attr,
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ NULL
+};
- &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
- &format_attr_ldlat.attr, /* PEBS load latency */
- NULL,
+static struct attribute *nhm_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ &format_attr_ldlat.attr,
+ NULL
+};
+
+static struct attribute *slm_format_attr[] = {
+ &format_attr_offcore_rsp.attr,
+ NULL
};
static struct attribute *skl_format_attr[] = {
@@ -3781,6 +3795,36 @@ done:
static DEVICE_ATTR_RW(freeze_on_smi);
+static ssize_t branches_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
+}
+
+static DEVICE_ATTR_RO(branches);
+
+static struct attribute *lbr_attrs[] = {
+ &dev_attr_branches.attr,
+ NULL
+};
+
+static char pmu_name_str[30];
+
+static ssize_t pmu_name_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
+}
+
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *intel_pmu_caps_attrs[] = {
+ &dev_attr_pmu_name.attr,
+ NULL
+};
+
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
NULL,
@@ -3795,6 +3839,8 @@ __init int intel_pmu_init(void)
unsigned int unused;
struct extra_reg *er;
int version, i;
+ struct attribute **extra_attr = NULL;
+ char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
@@ -3862,6 +3908,7 @@ __init int intel_pmu_init(void)
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
+ name = "core";
break;
case INTEL_FAM6_CORE2_MEROM:
@@ -3877,6 +3924,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_core2_event_constraints;
x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
+ name = "core2";
break;
case INTEL_FAM6_NEHALEM:
@@ -3905,8 +3953,11 @@ __init int intel_pmu_init(void)
intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk);
+ x86_pmu.pebs_no_tlb = 1;
+ extra_attr = nhm_format_attr;
pr_cont("Nehalem events, ");
+ name = "nehalem";
break;
case INTEL_FAM6_ATOM_PINEVIEW:
@@ -3923,6 +3974,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
+ name = "bonnell";
break;
case INTEL_FAM6_ATOM_SILVERMONT1:
@@ -3940,7 +3992,9 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.cpu_events = slm_events_attrs;
+ extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
+ name = "silvermont";
break;
case INTEL_FAM6_ATOM_GOLDMONT:
@@ -3965,7 +4019,9 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.cpu_events = glm_events_attrs;
+ extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
+ name = "goldmont";
break;
case INTEL_FAM6_ATOM_GEMINI_LAKE:
@@ -3991,7 +4047,9 @@ __init int intel_pmu_init(void)
x86_pmu.cpu_events = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
+ extra_attr = slm_format_attr;
pr_cont("Goldmont plus events, ");
+ name = "goldmont_plus";
break;
case INTEL_FAM6_WESTMERE:
@@ -4020,7 +4078,9 @@ __init int intel_pmu_init(void)
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
+ extra_attr = nhm_format_attr;
pr_cont("Westmere events, ");
+ name = "westmere";
break;
case INTEL_FAM6_SANDYBRIDGE:
@@ -4056,7 +4116,10 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
+ extra_attr = nhm_format_attr;
+
pr_cont("SandyBridge events, ");
+ name = "sandybridge";
break;
case INTEL_FAM6_IVYBRIDGE:
@@ -4090,7 +4153,10 @@ __init int intel_pmu_init(void)
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+ extra_attr = nhm_format_attr;
+
pr_cont("IvyBridge events, ");
+ name = "ivybridge";
break;
@@ -4118,7 +4184,10 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
pr_cont("Haswell events, ");
+ name = "haswell";
break;
case INTEL_FAM6_BROADWELL_CORE:
@@ -4154,7 +4223,10 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
pr_cont("Broadwell events, ");
+ name = "broadwell";
break;
case INTEL_FAM6_XEON_PHI_KNL:
@@ -4172,8 +4244,9 @@ __init int intel_pmu_init(void)
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
+ extra_attr = slm_format_attr;
pr_cont("Knights Landing/Mill events, ");
+ name = "knights-landing";
break;
case INTEL_FAM6_SKYLAKE_MOBILE:
@@ -4203,11 +4276,14 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
- skl_format_attr);
- WARN_ON(!x86_pmu.format_attrs);
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_attr = merge_attr(extra_attr, skl_format_attr);
x86_pmu.cpu_events = hsw_events_attrs;
+ intel_pmu_pebs_data_source_skl(
+ boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
+ name = "skylake";
break;
default:
@@ -4215,6 +4291,7 @@ __init int intel_pmu_init(void)
case 1:
x86_pmu.event_constraints = intel_v1_event_constraints;
pr_cont("generic architected perfmon v1, ");
+ name = "generic_arch_v1";
break;
default:
/*
@@ -4222,10 +4299,19 @@ __init int intel_pmu_init(void)
*/
x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("generic architected perfmon, ");
+ name = "generic_arch_v2+";
break;
}
}
+ snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name);
+
+ if (version >= 2 && extra_attr) {
+ x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+ extra_attr);
+ WARN_ON(!x86_pmu.format_attrs);
+ }
+
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
@@ -4272,8 +4358,13 @@ __init int intel_pmu_init(void)
x86_pmu.lbr_nr = 0;
}
- if (x86_pmu.lbr_nr)
+ x86_pmu.caps_attrs = intel_pmu_caps_attrs;
+
+ if (x86_pmu.lbr_nr) {
+ x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
+ }
+
/*
* Access extra MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support offcore event
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
deleted file mode 100644
index 2521f771f2f5..000000000000
--- a/arch/x86/events/intel/cqm.c
+++ /dev/null
@@ -1,1766 +0,0 @@
-/*
- * Intel Cache Quality-of-Service Monitoring (CQM) support.
- *
- * Based very, very heavily on work by Peter Zijlstra.
- */
-
-#include <linux/perf_event.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include <asm/intel_rdt_common.h>
-#include "../perf_event.h"
-
-#define MSR_IA32_QM_CTR 0x0c8e
-#define MSR_IA32_QM_EVTSEL 0x0c8d
-
-#define MBM_CNTR_WIDTH 24
-/*
- * Guaranteed time in ms as per SDM where MBM counters will not overflow.
- */
-#define MBM_CTR_OVERFLOW_TIME 1000
-
-static u32 cqm_max_rmid = -1;
-static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-static bool cqm_enabled, mbm_enabled;
-unsigned int mbm_socket_max;
-
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
-static struct hrtimer *mbm_timers;
-/**
- * struct sample - mbm event's (local or total) data
- * @total_bytes #bytes since we began monitoring
- * @prev_msr previous value of MSR
- */
-struct sample {
- u64 total_bytes;
- u64 prev_msr;
-};
-
-/*
- * samples profiled for total memory bandwidth type events
- */
-static struct sample *mbm_total;
-/*
- * samples profiled for local memory bandwidth type events
- */
-static struct sample *mbm_local;
-
-#define pkg_id topology_physical_package_id(smp_processor_id())
-/*
- * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
- * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
- * rmids per socket, an example is given below
- * RMID1 of Socket0: vrmid = 1
- * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1
- * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1
- */
-#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid)
-/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-/*
- * Groups of events that have the same target(s), one RMID per group.
- */
-static LIST_HEAD(cache_groups);
-
-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-#define RMID_VAL_ERROR (1ULL << 63)
-#define RMID_VAL_UNAVAIL (1ULL << 62)
-
-/*
- * Event IDs are used to program IA32_QM_EVTSEL before reading event
- * counter from IA32_QM_CTR
- */
-#define QOS_L3_OCCUP_EVENT_ID 0x01
-#define QOS_MBM_TOTAL_EVENT_ID 0x02
-#define QOS_MBM_LOCAL_EVENT_ID 0x03
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
-
-#define INVALID_RMID (-1)
-
-/*
- * Is @rmid valid for programming the hardware?
- *
- * rmid 0 is reserved by the hardware for all non-monitored tasks, which
- * means that we should never come across an rmid with that value.
- * Likewise, an rmid value of -1 is used to indicate "no rmid currently
- * assigned" and is used as part of the rotation code.
- */
-static inline bool __rmid_valid(u32 rmid)
-{
- if (!rmid || rmid == INVALID_RMID)
- return false;
-
- return true;
-}
-
-static u64 __rmid_read(u32 rmid)
-{
- u64 val;
-
- /*
- * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
- * it just says that to increase confusion.
- */
- wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
- rdmsrl(MSR_IA32_QM_CTR, val);
-
- /*
- * Aside from the ERROR and UNAVAIL bits, assume this thing returns
- * the number of cachelines tagged with @rmid.
- */
- return val;
-}
-
-enum rmid_recycle_state {
- RMID_YOUNG = 0,
- RMID_AVAILABLE,
- RMID_DIRTY,
-};
-
-struct cqm_rmid_entry {
- u32 rmid;
- enum rmid_recycle_state state;
- struct list_head list;
- unsigned long queue_time;
-};
-
-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cache_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
-
-/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
- */
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
-{
- struct cqm_rmid_entry *entry;
-
- entry = cqm_rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
-
- return entry;
-}
-
-/*
- * Returns < 0 on fail.
- *
- * We expect to be called with cache_mutex held.
- */
-static u32 __get_rmid(void)
-{
- struct cqm_rmid_entry *entry;
-
- lockdep_assert_held(&cache_mutex);
-
- if (list_empty(&cqm_rmid_free_lru))
- return INVALID_RMID;
-
- entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
- list_del(&entry->list);
-
- return entry->rmid;
-}
-
-static void __put_rmid(u32 rmid)
-{
- struct cqm_rmid_entry *entry;
-
- lockdep_assert_held(&cache_mutex);
-
- WARN_ON(!__rmid_valid(rmid));
- entry = __rmid_entry(rmid);
-
- entry->queue_time = jiffies;
- entry->state = RMID_YOUNG;
-
- list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
-}
-
-static void cqm_cleanup(void)
-{
- int i;
-
- if (!cqm_rmid_ptrs)
- return;
-
- for (i = 0; i < cqm_max_rmid; i++)
- kfree(cqm_rmid_ptrs[i]);
-
- kfree(cqm_rmid_ptrs);
- cqm_rmid_ptrs = NULL;
- cqm_enabled = false;
-}
-
-static int intel_cqm_setup_rmid_cache(void)
-{
- struct cqm_rmid_entry *entry;
- unsigned int nr_rmids;
- int r = 0;
-
- nr_rmids = cqm_max_rmid + 1;
- cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
- nr_rmids, GFP_KERNEL);
- if (!cqm_rmid_ptrs)
- return -ENOMEM;
-
- for (; r <= cqm_max_rmid; r++) {
- struct cqm_rmid_entry *entry;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- goto fail;
-
- INIT_LIST_HEAD(&entry->list);
- entry->rmid = r;
- cqm_rmid_ptrs[r] = entry;
-
- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
-
- /*
- * RMID 0 is special and is always allocated. It's used for all
- * tasks that are not monitored.
- */
- entry = __rmid_entry(0);
- list_del(&entry->list);
-
- mutex_lock(&cache_mutex);
- intel_cqm_rotation_rmid = __get_rmid();
- mutex_unlock(&cache_mutex);
-
- return 0;
-
-fail:
- cqm_cleanup();
- return -ENOMEM;
-}
-
-/*
- * Determine if @a and @b measure the same set of tasks.
- *
- * If @a and @b measure the same set of tasks then we want to share a
- * single RMID.
- */
-static bool __match_event(struct perf_event *a, struct perf_event *b)
-{
- /* Per-cpu and task events don't mix */
- if ((a->attach_state & PERF_ATTACH_TASK) !=
- (b->attach_state & PERF_ATTACH_TASK))
- return false;
-
-#ifdef CONFIG_CGROUP_PERF
- if (a->cgrp != b->cgrp)
- return false;
-#endif
-
- /* If not task event, we're machine wide */
- if (!(b->attach_state & PERF_ATTACH_TASK))
- return true;
-
- /*
- * Events that target same task are placed into the same cache group.
- * Mark it as a multi event group, so that we update ->count
- * for every event rather than just the group leader later.
- */
- if (a->hw.target == b->hw.target) {
- b->hw.is_group_event = true;
- return true;
- }
-
- /*
- * Are we an inherited event?
- */
- if (b->parent == a)
- return true;
-
- return false;
-}
-
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
- if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.target, event->ctx);
-
- return event->cgrp;
-}
-#endif
-
-/*
- * Determine if @a's tasks intersect with @b's tasks
- *
- * There are combinations of events that we explicitly prohibit,
- *
- * PROHIBITS
- * system-wide -> cgroup and task
- * cgroup -> system-wide
- * -> task in cgroup
- * task -> system-wide
- * -> task in cgroup
- *
- * Call this function before allocating an RMID.
- */
-static bool __conflict_event(struct perf_event *a, struct perf_event *b)
-{
-#ifdef CONFIG_CGROUP_PERF
- /*
- * We can have any number of cgroups but only one system-wide
- * event at a time.
- */
- if (a->cgrp && b->cgrp) {
- struct perf_cgroup *ac = a->cgrp;
- struct perf_cgroup *bc = b->cgrp;
-
- /*
- * This condition should have been caught in
- * __match_event() and we should be sharing an RMID.
- */
- WARN_ON_ONCE(ac == bc);
-
- if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
- cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
- return true;
-
- return false;
- }
-
- if (a->cgrp || b->cgrp) {
- struct perf_cgroup *ac, *bc;
-
- /*
- * cgroup and system-wide events are mutually exclusive
- */
- if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
- (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
- return true;
-
- /*
- * Ensure neither event is part of the other's cgroup
- */
- ac = event_to_cgroup(a);
- bc = event_to_cgroup(b);
- if (ac == bc)
- return true;
-
- /*
- * Must have cgroup and non-intersecting task events.
- */
- if (!ac || !bc)
- return false;
-
- /*
- * We have cgroup and task events, and the task belongs
- * to a cgroup. Check for for overlap.
- */
- if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
- cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
- return true;
-
- return false;
- }
-#endif
- /*
- * If one of them is not a task, same story as above with cgroups.
- */
- if (!(a->attach_state & PERF_ATTACH_TASK) ||
- !(b->attach_state & PERF_ATTACH_TASK))
- return true;
-
- /*
- * Must be non-overlapping.
- */
- return false;
-}
-
-struct rmid_read {
- u32 rmid;
- u32 evt_type;
- atomic64_t value;
-};
-
-static void __intel_cqm_event_count(void *info);
-static void init_mbm_sample(u32 rmid, u32 evt_type);
-static void __intel_mbm_event_count(void *info);
-
-static bool is_cqm_event(int e)
-{
- return (e == QOS_L3_OCCUP_EVENT_ID);
-}
-
-static bool is_mbm_event(int e)
-{
- return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
-}
-
-static void cqm_mask_call(struct rmid_read *rr)
-{
- if (is_mbm_event(rr->evt_type))
- on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
- else
- on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
-}
-
-/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
- struct perf_event *event;
- struct list_head *head = &group->hw.cqm_group_entry;
- u32 old_rmid = group->hw.cqm_rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- /*
- * If our RMID is being deallocated, perform a read now.
- */
- if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
- struct rmid_read rr = {
- .rmid = old_rmid,
- .evt_type = group->attr.config,
- .value = ATOMIC64_INIT(0),
- };
-
- cqm_mask_call(&rr);
- local64_set(&group->count, atomic64_read(&rr.value));
- }
-
- raw_spin_lock_irq(&cache_lock);
-
- group->hw.cqm_rmid = rmid;
- list_for_each_entry(event, head, hw.cqm_group_entry)
- event->hw.cqm_rmid = rmid;
-
- raw_spin_unlock_irq(&cache_lock);
-
- /*
- * If the allocation is for mbm, init the mbm stats.
- * Need to check if each event in the group is mbm event
- * because there could be multiple type of events in the same group.
- */
- if (__rmid_valid(rmid)) {
- event = group;
- if (is_mbm_event(event->attr.config))
- init_mbm_sample(rmid, event->attr.config);
-
- list_for_each_entry(event, head, hw.cqm_group_entry) {
- if (is_mbm_event(event->attr.config))
- init_mbm_sample(rmid, event->attr.config);
- }
- }
-
- return old_rmid;
-}
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
- struct cqm_rmid_entry *entry;
-
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- if (entry->state != RMID_AVAILABLE)
- break;
-
- if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
- entry->state = RMID_DIRTY;
- }
-}
-
-/*
- * If we have group events waiting for an RMID that don't conflict with
- * events already running, assign @rmid.
- */
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
- struct perf_event *leader, *event;
-
- lockdep_assert_held(&cache_mutex);
-
- leader = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
- event = leader;
-
- list_for_each_entry_continue(event, &cache_groups,
- hw.cqm_groups_entry) {
- if (__rmid_valid(event->hw.cqm_rmid))
- continue;
-
- if (__conflict_event(event, leader))
- continue;
-
- intel_cqm_xchg_rmid(event, rmid);
- return true;
- }
-
- return false;
-}
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
-
-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
- struct cqm_rmid_entry *entry, *tmp;
-
- lockdep_assert_held(&cache_mutex);
-
- *available = 0;
- list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
- unsigned long min_queue_time;
- unsigned long now = jiffies;
-
- /*
- * We hold RMIDs placed into limbo for a minimum queue
- * time. Before the minimum queue time has elapsed we do
- * not recycle RMIDs.
- *
- * The reasoning is that until a sufficient time has
- * passed since we stopped using an RMID, any RMID
- * placed onto the limbo list will likely still have
- * data tagged in the cache, which means we'll probably
- * fail to recycle it anyway.
- *
- * We can save ourselves an expensive IPI by skipping
- * any RMIDs that have not been queued for the minimum
- * time.
- */
- min_queue_time = entry->queue_time +
- msecs_to_jiffies(__rmid_queue_time_ms);
-
- if (time_after(min_queue_time, now))
- break;
-
- entry->state = RMID_AVAILABLE;
- (*available)++;
- }
-
- /*
- * Fast return if none of the RMIDs on the limbo list have been
- * sitting on the queue for the minimum queue time.
- */
- if (!*available)
- return false;
-
- /*
- * Test whether an RMID is free for each package.
- */
- on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
- list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
- /*
- * Exhausted all RMIDs that have waited min queue time.
- */
- if (entry->state == RMID_YOUNG)
- break;
-
- if (entry->state == RMID_DIRTY)
- continue;
-
- list_del(&entry->list); /* remove from limbo */
-
- /*
- * The rotation RMID gets priority if it's
- * currently invalid. In which case, skip adding
- * the RMID to the the free lru.
- */
- if (!__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_rotation_rmid = entry->rmid;
- continue;
- }
-
- /*
- * If we have groups waiting for RMIDs, hand
- * them one now provided they don't conflict.
- */
- if (intel_cqm_sched_in_event(entry->rmid))
- continue;
-
- /*
- * Otherwise place it onto the free list.
- */
- list_add_tail(&entry->list, &cqm_rmid_free_lru);
- }
-
-
- return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
- struct perf_event *rotor;
- u32 rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- rotor = list_first_entry(&cache_groups, struct perf_event,
- hw.cqm_groups_entry);
-
- /*
- * The group at the front of the list should always have a valid
- * RMID. If it doesn't then no groups have RMIDs assigned and we
- * don't need to rotate the list.
- */
- if (next == rotor)
- return;
-
- rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
- __put_rmid(rmid);
-
- list_rotate_left(&cache_groups);
-}
-
-/*
- * Deallocate the RMIDs from any events that conflict with @event, and
- * place them on the back of the group list.
- */
-static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
-{
- struct perf_event *group, *g;
- u32 rmid;
-
- lockdep_assert_held(&cache_mutex);
-
- list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
- if (group == event)
- continue;
-
- rmid = group->hw.cqm_rmid;
-
- /*
- * Skip events that don't have a valid RMID.
- */
- if (!__rmid_valid(rmid))
- continue;
-
- /*
- * No conflict? No problem! Leave the event alone.
- */
- if (!__conflict_event(group, event))
- continue;
-
- intel_cqm_xchg_rmid(group, INVALID_RMID);
- __put_rmid(rmid);
- }
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * We rotate for two reasons,
- * 1. To handle the scheduling of conflicting events
- * 2. To recycle RMIDs
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
- struct perf_event *group, *start = NULL;
- unsigned int threshold_limit;
- unsigned int nr_needed = 0;
- unsigned int nr_available;
- bool rotated = false;
-
- mutex_lock(&cache_mutex);
-
-again:
- /*
- * Fast path through this function if there are no groups and no
- * RMIDs that need cleaning.
- */
- if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
- if (!__rmid_valid(group->hw.cqm_rmid)) {
- if (!start)
- start = group;
- nr_needed++;
- }
- }
-
- /*
- * We have some event groups, but they all have RMIDs assigned
- * and no RMIDs need cleaning.
- */
- if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
- goto out;
-
- if (!nr_needed)
- goto stabilize;
-
- /*
- * We have more event groups without RMIDs than available RMIDs,
- * or we have event groups that conflict with the ones currently
- * scheduled.
- *
- * We force deallocate the rmid of the group at the head of
- * cache_groups. The first event group without an RMID then gets
- * assigned intel_cqm_rotation_rmid. This ensures we always make
- * forward progress.
- *
- * Rotate the cache_groups list so the previous head is now the
- * tail.
- */
- __intel_cqm_pick_and_rotate(start);
-
- /*
- * If the rotation is going to succeed, reduce the threshold so
- * that we don't needlessly reuse dirty RMIDs.
- */
- if (__rmid_valid(intel_cqm_rotation_rmid)) {
- intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
- intel_cqm_rotation_rmid = __get_rmid();
-
- intel_cqm_sched_out_conflicting_events(start);
-
- if (__intel_cqm_threshold)
- __intel_cqm_threshold--;
- }
-
- rotated = true;
-
-stabilize:
- /*
- * We now need to stablize the RMID we freed above (if any) to
- * ensure that the next time we rotate we have an RMID with zero
- * occupancy value.
- *
- * Alternatively, if we didn't need to perform any rotation,
- * we'll have a bunch of RMIDs in limbo that need stabilizing.
- */
- threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
- while (intel_cqm_rmid_stabilize(&nr_available) &&
- __intel_cqm_threshold < threshold_limit) {
- unsigned int steal_limit;
-
- /*
- * Don't spin if nobody is actively waiting for an RMID,
- * the rotation worker will be kicked as soon as an
- * event needs an RMID anyway.
- */
- if (!nr_needed)
- break;
-
- /* Allow max 25% of RMIDs to be in limbo. */
- steal_limit = (cqm_max_rmid + 1) / 4;
-
- /*
- * We failed to stabilize any RMIDs so our rotation
- * logic is now stuck. In order to make forward progress
- * we have a few options:
- *
- * 1. rotate ("steal") another RMID
- * 2. increase the threshold
- * 3. do nothing
- *
- * We do both of 1. and 2. until we hit the steal limit.
- *
- * The steal limit prevents all RMIDs ending up on the
- * limbo list. This can happen if every RMID has a
- * non-zero occupancy above threshold_limit, and the
- * occupancy values aren't dropping fast enough.
- *
- * Note that there is prioritisation at work here - we'd
- * rather increase the number of RMIDs on the limbo list
- * than increase the threshold, because increasing the
- * threshold skews the event data (because we reuse
- * dirty RMIDs) - threshold bumps are a last resort.
- */
- if (nr_available < steal_limit)
- goto again;
-
- __intel_cqm_threshold++;
- }
-
-out:
- mutex_unlock(&cache_mutex);
- return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
-static struct pmu intel_cqm_pmu;
-
-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
- unsigned long delay;
-
- __intel_cqm_rmid_rotate();
-
- delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
- schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
-static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
-{
- struct sample *mbm_current;
- u32 vrmid = rmid_2_index(rmid);
- u64 val, bytes, shift;
- u32 eventid;
-
- if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
- mbm_current = &mbm_local[vrmid];
- eventid = QOS_MBM_LOCAL_EVENT_ID;
- } else {
- mbm_current = &mbm_total[vrmid];
- eventid = QOS_MBM_TOTAL_EVENT_ID;
- }
-
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
- rdmsrl(MSR_IA32_QM_CTR, val);
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- return mbm_current->total_bytes;
-
- if (first) {
- mbm_current->prev_msr = val;
- mbm_current->total_bytes = 0;
- return mbm_current->total_bytes;
- }
-
- /*
- * The h/w guarantees that counters will not overflow
- * so long as we poll them at least once per second.
- */
- shift = 64 - MBM_CNTR_WIDTH;
- bytes = (val << shift) - (mbm_current->prev_msr << shift);
- bytes >>= shift;
-
- bytes *= cqm_l3_scale;
-
- mbm_current->total_bytes += bytes;
- mbm_current->prev_msr = val;
-
- return mbm_current->total_bytes;
-}
-
-static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
-{
- return update_sample(rmid, evt_type, 0);
-}
-
-static void __intel_mbm_event_init(void *info)
-{
- struct rmid_read *rr = info;
-
- update_sample(rr->rmid, rr->evt_type, 1);
-}
-
-static void init_mbm_sample(u32 rmid, u32 evt_type)
-{
- struct rmid_read rr = {
- .rmid = rmid,
- .evt_type = evt_type,
- .value = ATOMIC64_INIT(0),
- };
-
- /* on each socket, init sample */
- on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
-}
-
-/*
- * Find a group and setup RMID.
- *
- * If we're part of a group, we use the group's RMID.
- */
-static void intel_cqm_setup_event(struct perf_event *event,
- struct perf_event **group)
-{
- struct perf_event *iter;
- bool conflict = false;
- u32 rmid;
-
- event->hw.is_group_event = false;
- list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
- rmid = iter->hw.cqm_rmid;
-
- if (__match_event(iter, event)) {
- /* All tasks in a group share an RMID */
- event->hw.cqm_rmid = rmid;
- *group = iter;
- if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
- init_mbm_sample(rmid, event->attr.config);
- return;
- }
-
- /*
- * We only care about conflicts for events that are
- * actually scheduled in (and hence have a valid RMID).
- */
- if (__conflict_event(iter, event) && __rmid_valid(rmid))
- conflict = true;
- }
-
- if (conflict)
- rmid = INVALID_RMID;
- else
- rmid = __get_rmid();
-
- if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
- init_mbm_sample(rmid, event->attr.config);
-
- event->hw.cqm_rmid = rmid;
-}
-
-static void intel_cqm_event_read(struct perf_event *event)
-{
- unsigned long flags;
- u32 rmid;
- u64 val;
-
- /*
- * Task events are handled by intel_cqm_event_count().
- */
- if (event->cpu == -1)
- return;
-
- raw_spin_lock_irqsave(&cache_lock, flags);
- rmid = event->hw.cqm_rmid;
-
- if (!__rmid_valid(rmid))
- goto out;
-
- if (is_mbm_event(event->attr.config))
- val = rmid_read_mbm(rmid, event->attr.config);
- else
- val = __rmid_read(rmid);
-
- /*
- * Ignore this reading on error states and do not update the value.
- */
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- goto out;
-
- local64_set(&event->count, val);
-out:
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-static void __intel_cqm_event_count(void *info)
-{
- struct rmid_read *rr = info;
- u64 val;
-
- val = __rmid_read(rr->rmid);
-
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- return;
-
- atomic64_add(val, &rr->value);
-}
-
-static inline bool cqm_group_leader(struct perf_event *event)
-{
- return !list_empty(&event->hw.cqm_groups_entry);
-}
-
-static void __intel_mbm_event_count(void *info)
-{
- struct rmid_read *rr = info;
- u64 val;
-
- val = rmid_read_mbm(rr->rmid, rr->evt_type);
- if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
- return;
- atomic64_add(val, &rr->value);
-}
-
-static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
-{
- struct perf_event *iter, *iter1;
- int ret = HRTIMER_RESTART;
- struct list_head *head;
- unsigned long flags;
- u32 grp_rmid;
-
- /*
- * Need to cache_lock as the timer Event Select MSR reads
- * can race with the mbm/cqm count() and mbm_init() reads.
- */
- raw_spin_lock_irqsave(&cache_lock, flags);
-
- if (list_empty(&cache_groups)) {
- ret = HRTIMER_NORESTART;
- goto out;
- }
-
- list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
- grp_rmid = iter->hw.cqm_rmid;
- if (!__rmid_valid(grp_rmid))
- continue;
- if (is_mbm_event(iter->attr.config))
- update_sample(grp_rmid, iter->attr.config, 0);
-
- head = &iter->hw.cqm_group_entry;
- if (list_empty(head))
- continue;
- list_for_each_entry(iter1, head, hw.cqm_group_entry) {
- if (!iter1->hw.is_group_event)
- break;
- if (is_mbm_event(iter1->attr.config))
- update_sample(iter1->hw.cqm_rmid,
- iter1->attr.config, 0);
- }
- }
-
- hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
-out:
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-
- return ret;
-}
-
-static void __mbm_start_timer(void *info)
-{
- hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
- HRTIMER_MODE_REL_PINNED);
-}
-
-static void __mbm_stop_timer(void *info)
-{
- hrtimer_cancel(&mbm_timers[pkg_id]);
-}
-
-static void mbm_start_timers(void)
-{
- on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
-}
-
-static void mbm_stop_timers(void)
-{
- on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
-}
-
-static void mbm_hrtimer_init(void)
-{
- struct hrtimer *hr;
- int i;
-
- for (i = 0; i < mbm_socket_max; i++) {
- hr = &mbm_timers[i];
- hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hr->function = mbm_hrtimer_handle;
- }
-}
-
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
- unsigned long flags;
- struct rmid_read rr = {
- .evt_type = event->attr.config,
- .value = ATOMIC64_INIT(0),
- };
-
- /*
- * We only need to worry about task events. System-wide events
- * are handled like usual, i.e. entirely with
- * intel_cqm_event_read().
- */
- if (event->cpu != -1)
- return __perf_event_count(event);
-
- /*
- * Only the group leader gets to report values except in case of
- * multiple events in the same group, we still need to read the
- * other events.This stops us
- * reporting duplicate values to userspace, and gives us a clear
- * rule for which task gets to report the values.
- *
- * Note that it is impossible to attribute these values to
- * specific packages - we forfeit that ability when we create
- * task events.
- */
- if (!cqm_group_leader(event) && !event->hw.is_group_event)
- return 0;
-
- /*
- * Getting up-to-date values requires an SMP IPI which is not
- * possible if we're being called in interrupt context. Return
- * the cached values instead.
- */
- if (unlikely(in_interrupt()))
- goto out;
-
- /*
- * Notice that we don't perform the reading of an RMID
- * atomically, because we can't hold a spin lock across the
- * IPIs.
- *
- * Speculatively perform the read, since @event might be
- * assigned a different (possibly invalid) RMID while we're
- * busying performing the IPI calls. It's therefore necessary to
- * check @event's RMID afterwards, and if it has changed,
- * discard the result of the read.
- */
- rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
- if (!__rmid_valid(rr.rmid))
- goto out;
-
- cqm_mask_call(&rr);
-
- raw_spin_lock_irqsave(&cache_lock, flags);
- if (event->hw.cqm_rmid == rr.rmid)
- local64_set(&event->count, atomic64_read(&rr.value));
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
- return __perf_event_count(event);
-}
-
-static void intel_cqm_event_start(struct perf_event *event, int mode)
-{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
- u32 rmid = event->hw.cqm_rmid;
-
- if (!(event->hw.cqm_state & PERF_HES_STOPPED))
- return;
-
- event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
- if (state->rmid_usecnt++) {
- if (!WARN_ON_ONCE(state->rmid != rmid))
- return;
- } else {
- WARN_ON_ONCE(state->rmid);
- }
-
- state->rmid = rmid;
- wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-static void intel_cqm_event_stop(struct perf_event *event, int mode)
-{
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
- if (event->hw.cqm_state & PERF_HES_STOPPED)
- return;
-
- event->hw.cqm_state |= PERF_HES_STOPPED;
-
- intel_cqm_event_read(event);
-
- if (!--state->rmid_usecnt) {
- state->rmid = 0;
- wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
- } else {
- WARN_ON_ONCE(!state->rmid);
- }
-}
-
-static int intel_cqm_event_add(struct perf_event *event, int mode)
-{
- unsigned long flags;
- u32 rmid;
-
- raw_spin_lock_irqsave(&cache_lock, flags);
-
- event->hw.cqm_state = PERF_HES_STOPPED;
- rmid = event->hw.cqm_rmid;
-
- if (__rmid_valid(rmid) && (mode & PERF_EF_START))
- intel_cqm_event_start(event, mode);
-
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-
- return 0;
-}
-
-static void intel_cqm_event_destroy(struct perf_event *event)
-{
- struct perf_event *group_other = NULL;
- unsigned long flags;
-
- mutex_lock(&cache_mutex);
- /*
- * Hold the cache_lock as mbm timer handlers could be
- * scanning the list of events.
- */
- raw_spin_lock_irqsave(&cache_lock, flags);
-
- /*
- * If there's another event in this group...
- */
- if (!list_empty(&event->hw.cqm_group_entry)) {
- group_other = list_first_entry(&event->hw.cqm_group_entry,
- struct perf_event,
- hw.cqm_group_entry);
- list_del(&event->hw.cqm_group_entry);
- }
-
- /*
- * And we're the group leader..
- */
- if (cqm_group_leader(event)) {
- /*
- * If there was a group_other, make that leader, otherwise
- * destroy the group and return the RMID.
- */
- if (group_other) {
- list_replace(&event->hw.cqm_groups_entry,
- &group_other->hw.cqm_groups_entry);
- } else {
- u32 rmid = event->hw.cqm_rmid;
-
- if (__rmid_valid(rmid))
- __put_rmid(rmid);
- list_del(&event->hw.cqm_groups_entry);
- }
- }
-
- raw_spin_unlock_irqrestore(&cache_lock, flags);
-
- /*
- * Stop the mbm overflow timers when the last event is destroyed.
- */
- if (mbm_enabled && list_empty(&cache_groups))
- mbm_stop_timers();
-
- mutex_unlock(&cache_mutex);
-}
-
-static int intel_cqm_event_init(struct perf_event *event)
-{
- struct perf_event *group = NULL;
- bool rotate = false;
- unsigned long flags;
-
- if (event->attr.type != intel_cqm_pmu.type)
- return -ENOENT;
-
- if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
- (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
- return -EINVAL;
-
- if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
- (is_mbm_event(event->attr.config) && !mbm_enabled))
- return -EINVAL;
-
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest ||
- event->attr.sample_period) /* no sampling */
- return -EINVAL;
-
- INIT_LIST_HEAD(&event->hw.cqm_group_entry);
- INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
-
- event->destroy = intel_cqm_event_destroy;
-
- mutex_lock(&cache_mutex);
-
- /*
- * Start the mbm overflow timers when the first event is created.
- */
- if (mbm_enabled && list_empty(&cache_groups))
- mbm_start_timers();
-
- /* Will also set rmid */
- intel_cqm_setup_event(event, &group);
-
- /*
- * Hold the cache_lock as mbm timer handlers be
- * scanning the list of events.
- */
- raw_spin_lock_irqsave(&cache_lock, flags);
-
- if (group) {
- list_add_tail(&event->hw.cqm_group_entry,
- &group->hw.cqm_group_entry);
- } else {
- list_add_tail(&event->hw.cqm_groups_entry,
- &cache_groups);
-
- /*
- * All RMIDs are either in use or have recently been
- * used. Kick the rotation worker to clean/free some.
- *
- * We only do this for the group leader, rather than for
- * every event in a group to save on needless work.
- */
- if (!__rmid_valid(event->hw.cqm_rmid))
- rotate = true;
- }
-
- raw_spin_unlock_irqrestore(&cache_lock, flags);
- mutex_unlock(&cache_mutex);
-
- if (rotate)
- schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
- return 0;
-}
-
-EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
-EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
-EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
-EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
-EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
-
-EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
-EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
-EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
-EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
-
-EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
-EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
-EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
-EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
-
-static struct attribute *intel_cqm_events_attr[] = {
- EVENT_PTR(intel_cqm_llc),
- EVENT_PTR(intel_cqm_llc_pkg),
- EVENT_PTR(intel_cqm_llc_unit),
- EVENT_PTR(intel_cqm_llc_scale),
- EVENT_PTR(intel_cqm_llc_snapshot),
- NULL,
-};
-
-static struct attribute *intel_mbm_events_attr[] = {
- EVENT_PTR(intel_cqm_total_bytes),
- EVENT_PTR(intel_cqm_local_bytes),
- EVENT_PTR(intel_cqm_total_bytes_pkg),
- EVENT_PTR(intel_cqm_local_bytes_pkg),
- EVENT_PTR(intel_cqm_total_bytes_unit),
- EVENT_PTR(intel_cqm_local_bytes_unit),
- EVENT_PTR(intel_cqm_total_bytes_scale),
- EVENT_PTR(intel_cqm_local_bytes_scale),
- NULL,
-};
-
-static struct attribute *intel_cmt_mbm_events_attr[] = {
- EVENT_PTR(intel_cqm_llc),
- EVENT_PTR(intel_cqm_total_bytes),
- EVENT_PTR(intel_cqm_local_bytes),
- EVENT_PTR(intel_cqm_llc_pkg),
- EVENT_PTR(intel_cqm_total_bytes_pkg),
- EVENT_PTR(intel_cqm_local_bytes_pkg),
- EVENT_PTR(intel_cqm_llc_unit),
- EVENT_PTR(intel_cqm_total_bytes_unit),
- EVENT_PTR(intel_cqm_local_bytes_unit),
- EVENT_PTR(intel_cqm_llc_scale),
- EVENT_PTR(intel_cqm_total_bytes_scale),
- EVENT_PTR(intel_cqm_local_bytes_scale),
- EVENT_PTR(intel_cqm_llc_snapshot),
- NULL,
-};
-
-static struct attribute_group intel_cqm_events_group = {
- .name = "events",
- .attrs = NULL,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-static struct attribute *intel_cqm_formats_attr[] = {
- &format_attr_event.attr,
- NULL,
-};
-
-static struct attribute_group intel_cqm_format_group = {
- .name = "format",
- .attrs = intel_cqm_formats_attr,
-};
-
-static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
- char *page)
-{
- ssize_t rv;
-
- mutex_lock(&cache_mutex);
- rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
- mutex_unlock(&cache_mutex);
-
- return rv;
-}
-
-static ssize_t
-max_recycle_threshold_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- unsigned int bytes, cachelines;
- int ret;
-
- ret = kstrtouint(buf, 0, &bytes);
- if (ret)
- return ret;
-
- mutex_lock(&cache_mutex);
-
- __intel_cqm_max_threshold = bytes;
- cachelines = bytes / cqm_l3_scale;
-
- /*
- * The new maximum takes effect immediately.
- */
- if (__intel_cqm_threshold > cachelines)
- __intel_cqm_threshold = cachelines;
-
- mutex_unlock(&cache_mutex);
-
- return count;
-}
-
-static DEVICE_ATTR_RW(max_recycle_threshold);
-
-static struct attribute *intel_cqm_attrs[] = {
- &dev_attr_max_recycle_threshold.attr,
- NULL,
-};
-
-static const struct attribute_group intel_cqm_group = {
- .attrs = intel_cqm_attrs,
-};
-
-static const struct attribute_group *intel_cqm_attr_groups[] = {
- &intel_cqm_events_group,
- &intel_cqm_format_group,
- &intel_cqm_group,
- NULL,
-};
-
-static struct pmu intel_cqm_pmu = {
- .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
- .attr_groups = intel_cqm_attr_groups,
- .task_ctx_nr = perf_sw_context,
- .event_init = intel_cqm_event_init,
- .add = intel_cqm_event_add,
- .del = intel_cqm_event_stop,
- .start = intel_cqm_event_start,
- .stop = intel_cqm_event_stop,
- .read = intel_cqm_event_read,
- .count = intel_cqm_event_count,
-};
-
-static inline void cqm_pick_event_reader(int cpu)
-{
- int reader;
-
- /* First online cpu in package becomes the reader */
- reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
- if (reader >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cqm_cpumask);
-}
-
-static int intel_cqm_cpu_starting(unsigned int cpu)
-{
- struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- state->rmid = 0;
- state->closid = 0;
- state->rmid_usecnt = 0;
-
- WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
- WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-
- cqm_pick_event_reader(cpu);
- return 0;
-}
-
-static int intel_cqm_cpu_exit(unsigned int cpu)
-{
- int target;
-
- /* Is @cpu the current cqm reader for this package ? */
- if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
- return 0;
-
- /* Find another online reader in this package */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
-
- if (target < nr_cpu_ids)
- cpumask_set_cpu(target, &cqm_cpumask);
-
- return 0;
-}
-
-static const struct x86_cpu_id intel_cqm_match[] = {
- { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
- {}
-};
-
-static void mbm_cleanup(void)
-{
- if (!mbm_enabled)
- return;
-
- kfree(mbm_local);
- kfree(mbm_total);
- mbm_enabled = false;
-}
-
-static const struct x86_cpu_id intel_mbm_local_match[] = {
- { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
- {}
-};
-
-static const struct x86_cpu_id intel_mbm_total_match[] = {
- { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
- {}
-};
-
-static int intel_mbm_init(void)
-{
- int ret = 0, array_size, maxid = cqm_max_rmid + 1;
-
- mbm_socket_max = topology_max_packages();
- array_size = sizeof(struct sample) * maxid * mbm_socket_max;
- mbm_local = kmalloc(array_size, GFP_KERNEL);
- if (!mbm_local)
- return -ENOMEM;
-
- mbm_total = kmalloc(array_size, GFP_KERNEL);
- if (!mbm_total) {
- ret = -ENOMEM;
- goto out;
- }
-
- array_size = sizeof(struct hrtimer) * mbm_socket_max;
- mbm_timers = kmalloc(array_size, GFP_KERNEL);
- if (!mbm_timers) {
- ret = -ENOMEM;
- goto out;
- }
- mbm_hrtimer_init();
-
-out:
- if (ret)
- mbm_cleanup();
-
- return ret;
-}
-
-static int __init intel_cqm_init(void)
-{
- char *str = NULL, scale[20];
- int cpu, ret;
-
- if (x86_match_cpu(intel_cqm_match))
- cqm_enabled = true;
-
- if (x86_match_cpu(intel_mbm_local_match) &&
- x86_match_cpu(intel_mbm_total_match))
- mbm_enabled = true;
-
- if (!cqm_enabled && !mbm_enabled)
- return -ENODEV;
-
- cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
-
- /*
- * It's possible that not all resources support the same number
- * of RMIDs. Instead of making scheduling much more complicated
- * (where we have to match a task's RMID to a cpu that supports
- * that many RMIDs) just find the minimum RMIDs supported across
- * all cpus.
- *
- * Also, check that the scales match on all cpus.
- */
- cpus_read_lock();
- for_each_online_cpu(cpu) {
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (c->x86_cache_max_rmid < cqm_max_rmid)
- cqm_max_rmid = c->x86_cache_max_rmid;
-
- if (c->x86_cache_occ_scale != cqm_l3_scale) {
- pr_err("Multiple LLC scale values, disabling\n");
- ret = -EINVAL;
- goto out;
- }
- }
-
- /*
- * A reasonable upper limit on the max threshold is the number
- * of lines tagged per RMID if all RMIDs have the same number of
- * lines tagged in the LLC.
- *
- * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
- */
- __intel_cqm_max_threshold =
- boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
-
- snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
- str = kstrdup(scale, GFP_KERNEL);
- if (!str) {
- ret = -ENOMEM;
- goto out;
- }
-
- event_attr_intel_cqm_llc_scale.event_str = str;
-
- ret = intel_cqm_setup_rmid_cache();
- if (ret)
- goto out;
-
- if (mbm_enabled)
- ret = intel_mbm_init();
- if (ret && !cqm_enabled)
- goto out;
-
- if (cqm_enabled && mbm_enabled)
- intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
- else if (!cqm_enabled && mbm_enabled)
- intel_cqm_events_group.attrs = intel_mbm_events_attr;
- else if (cqm_enabled && !mbm_enabled)
- intel_cqm_events_group.attrs = intel_cqm_events_attr;
-
- ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
- if (ret) {
- pr_err("Intel CQM perf registration failed: %d\n", ret);
- goto out;
- }
-
- if (cqm_enabled)
- pr_info("Intel CQM monitoring enabled\n");
- if (mbm_enabled)
- pr_info("Intel MBM enabled\n");
-
- /*
- * Setup the hot cpu notifier once we are sure cqm
- * is enabled to avoid notifier leak.
- */
- cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_STARTING,
- "perf/x86/cqm:starting",
- intel_cqm_cpu_starting, NULL);
- cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_ONLINE,
- "perf/x86/cqm:online",
- NULL, intel_cqm_cpu_exit);
-out:
- cpus_read_unlock();
-
- if (ret) {
- kfree(str);
- cqm_cleanup();
- mbm_cleanup();
- }
-
- return ret;
-}
-device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a322fed5f8ed..e1965e5ff570 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -49,34 +49,47 @@ union intel_x86_pebs_dse {
*/
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
/* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
- P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
- OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
- OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
- OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
- OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
- OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
- OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
- OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
- OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
- OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
- OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
- OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
};
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
- pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
- pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
- pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+ pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+ u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
+
+ pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+ pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
+ pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+ pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
+ pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}
static u64 precise_store_data(u64 status)
@@ -149,8 +162,6 @@ static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
u64 val;
- int model = boot_cpu_data.x86_model;
- int fam = boot_cpu_data.x86;
dse.val = status;
@@ -162,8 +173,7 @@ static u64 load_latency_data(u64 status)
/*
* Nehalem models do not support TLB, Lock infos
*/
- if (fam == 0x6 && (model == 26 || model == 30
- || model == 31 || model == 46)) {
+ if (x86_pmu.pebs_no_tlb) {
val |= P(TLB, NA) | P(LOCK, NA);
return val;
}
@@ -1175,7 +1185,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
else
regs->flags &= ~PERF_EFLAGS_EXACT;
- if ((sample_type & PERF_SAMPLE_ADDR) &&
+ if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
x86_pmu.intel_cap.pebs_format >= 1)
data->addr = pebs->dla;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 955457a30197..8a6bbacd17dc 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -109,6 +109,9 @@ enum {
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
X86_BR_CALL_STACK = 1 << 16,/* call stack */
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
+
+ X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
+
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -514,6 +517,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].in_tx = 0;
cpuc->lbr_entries[i].abort = 0;
cpuc->lbr_entries[i].cycles = 0;
+ cpuc->lbr_entries[i].type = 0;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -600,6 +604,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[out].in_tx = in_tx;
cpuc->lbr_entries[out].abort = abort;
cpuc->lbr_entries[out].cycles = cycles;
+ cpuc->lbr_entries[out].type = 0;
cpuc->lbr_entries[out].reserved = 0;
out++;
}
@@ -677,6 +682,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_CALL)
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+ mask |= X86_BR_TYPE_SAVE;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
@@ -930,6 +939,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
return ret;
}
+#define X86_BR_TYPE_MAP_MAX 16
+
+static int branch_map[X86_BR_TYPE_MAP_MAX] = {
+ PERF_BR_CALL, /* X86_BR_CALL */
+ PERF_BR_RET, /* X86_BR_RET */
+ PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
+ PERF_BR_SYSRET, /* X86_BR_SYSRET */
+ PERF_BR_UNKNOWN, /* X86_BR_INT */
+ PERF_BR_UNKNOWN, /* X86_BR_IRET */
+ PERF_BR_COND, /* X86_BR_JCC */
+ PERF_BR_UNCOND, /* X86_BR_JMP */
+ PERF_BR_UNKNOWN, /* X86_BR_IRQ */
+ PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
+ PERF_BR_UNKNOWN, /* X86_BR_ABORT */
+ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
+ PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
+ PERF_BR_CALL, /* X86_BR_ZERO_CALL */
+ PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
+ PERF_BR_IND, /* X86_BR_IND_JMP */
+};
+
+static int
+common_branch_type(int type)
+{
+ int i;
+
+ type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+ if (type) {
+ i = __ffs(type);
+ if (i < X86_BR_TYPE_MAP_MAX)
+ return branch_map[i];
+ }
+
+ return PERF_BR_UNKNOWN;
+}
+
/*
* implement actual branch filter based on user demand.
* Hardware may not exactly satisfy that request, thus
@@ -946,7 +992,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
bool compress = false;
/* if sampling all branches, then nothing to filter */
- if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+ ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
return;
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -967,6 +1014,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].from = 0;
compress = true;
}
+
+ if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+ cpuc->lbr_entries[i].type = common_branch_type(type);
}
if (!compress)
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index eb0533558c2b..d32c0eed38ca 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -587,7 +587,7 @@ static __initconst const u64 p4_hw_cache_event_ids
* P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
* either up to date automatically or not applicable at all.
*/
-struct p4_event_alias {
+static struct p4_event_alias {
u64 original;
u64 alternative;
} p4_event_aliases[] = {
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index ae8324d65e61..81fd41d5a0d9 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -471,8 +471,9 @@ static void pt_config(struct perf_event *event)
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg;
- if (!event->hw.itrace_started) {
- event->hw.itrace_started = 1;
+ /* First round: clear STATUS, in particular the PSB byte counter. */
+ if (!event->hw.config) {
+ perf_event_itrace_started(event);
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index a45e2114a846..8e2457cb6b4a 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -559,7 +559,7 @@ static struct attribute_group rapl_pmu_format_group = {
.attrs = rapl_formats_attr,
};
-const struct attribute_group *rapl_attr_groups[] = {
+static const struct attribute_group *rapl_attr_groups[] = {
&rapl_pmu_attr_group,
&rapl_pmu_format_group,
&rapl_pmu_events_group,
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 44ec523287f6..1c5390f1cf09 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -721,7 +721,7 @@ static struct attribute *uncore_pmu_attrs[] = {
NULL,
};
-static struct attribute_group uncore_pmu_attr_group = {
+static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index cda569332005..6a5cbe90f859 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -272,7 +272,7 @@ static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_ubox_format_group = {
+static const struct attribute_group nhmex_uncore_ubox_format_group = {
.name = "format",
.attrs = nhmex_uncore_ubox_formats_attr,
};
@@ -299,7 +299,7 @@ static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_cbox_format_group = {
+static const struct attribute_group nhmex_uncore_cbox_format_group = {
.name = "format",
.attrs = nhmex_uncore_cbox_formats_attr,
};
@@ -407,7 +407,7 @@ static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_bbox_format_group = {
+static const struct attribute_group nhmex_uncore_bbox_format_group = {
.name = "format",
.attrs = nhmex_uncore_bbox_formats_attr,
};
@@ -484,7 +484,7 @@ static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_sbox_format_group = {
+static const struct attribute_group nhmex_uncore_sbox_format_group = {
.name = "format",
.attrs = nhmex_uncore_sbox_formats_attr,
};
@@ -898,7 +898,7 @@ static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_mbox_format_group = {
+static const struct attribute_group nhmex_uncore_mbox_format_group = {
.name = "format",
.attrs = nhmex_uncore_mbox_formats_attr,
};
@@ -1163,7 +1163,7 @@ static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
NULL,
};
-static struct attribute_group nhmex_uncore_rbox_format_group = {
+static const struct attribute_group nhmex_uncore_rbox_format_group = {
.name = "format",
.attrs = nhmex_uncore_rbox_formats_attr,
};
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index a3dcc12bef4a..db1127ce685e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -130,7 +130,7 @@ static struct attribute *snb_uncore_formats_attr[] = {
NULL,
};
-static struct attribute_group snb_uncore_format_group = {
+static const struct attribute_group snb_uncore_format_group = {
.name = "format",
.attrs = snb_uncore_formats_attr,
};
@@ -289,7 +289,7 @@ static struct attribute *snb_uncore_imc_formats_attr[] = {
NULL,
};
-static struct attribute_group snb_uncore_imc_format_group = {
+static const struct attribute_group snb_uncore_imc_format_group = {
.name = "format",
.attrs = snb_uncore_imc_formats_attr,
};
@@ -769,7 +769,7 @@ static struct attribute *nhm_uncore_formats_attr[] = {
NULL,
};
-static struct attribute_group nhm_uncore_format_group = {
+static const struct attribute_group nhm_uncore_format_group = {
.name = "format",
.attrs = nhm_uncore_formats_attr,
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 4f9127644b80..db1fe377e6dd 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -602,27 +602,27 @@ static struct uncore_event_desc snbep_uncore_qpi_events[] = {
{ /* end: all zeroes */ },
};
-static struct attribute_group snbep_uncore_format_group = {
+static const struct attribute_group snbep_uncore_format_group = {
.name = "format",
.attrs = snbep_uncore_formats_attr,
};
-static struct attribute_group snbep_uncore_ubox_format_group = {
+static const struct attribute_group snbep_uncore_ubox_format_group = {
.name = "format",
.attrs = snbep_uncore_ubox_formats_attr,
};
-static struct attribute_group snbep_uncore_cbox_format_group = {
+static const struct attribute_group snbep_uncore_cbox_format_group = {
.name = "format",
.attrs = snbep_uncore_cbox_formats_attr,
};
-static struct attribute_group snbep_uncore_pcu_format_group = {
+static const struct attribute_group snbep_uncore_pcu_format_group = {
.name = "format",
.attrs = snbep_uncore_pcu_formats_attr,
};
-static struct attribute_group snbep_uncore_qpi_format_group = {
+static const struct attribute_group snbep_uncore_qpi_format_group = {
.name = "format",
.attrs = snbep_uncore_qpi_formats_attr,
};
@@ -1431,27 +1431,27 @@ static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
NULL,
};
-static struct attribute_group ivbep_uncore_format_group = {
+static const struct attribute_group ivbep_uncore_format_group = {
.name = "format",
.attrs = ivbep_uncore_formats_attr,
};
-static struct attribute_group ivbep_uncore_ubox_format_group = {
+static const struct attribute_group ivbep_uncore_ubox_format_group = {
.name = "format",
.attrs = ivbep_uncore_ubox_formats_attr,
};
-static struct attribute_group ivbep_uncore_cbox_format_group = {
+static const struct attribute_group ivbep_uncore_cbox_format_group = {
.name = "format",
.attrs = ivbep_uncore_cbox_formats_attr,
};
-static struct attribute_group ivbep_uncore_pcu_format_group = {
+static const struct attribute_group ivbep_uncore_pcu_format_group = {
.name = "format",
.attrs = ivbep_uncore_pcu_formats_attr,
};
-static struct attribute_group ivbep_uncore_qpi_format_group = {
+static const struct attribute_group ivbep_uncore_qpi_format_group = {
.name = "format",
.attrs = ivbep_uncore_qpi_formats_attr,
};
@@ -1887,7 +1887,7 @@ static struct attribute *knl_uncore_ubox_formats_attr[] = {
NULL,
};
-static struct attribute_group knl_uncore_ubox_format_group = {
+static const struct attribute_group knl_uncore_ubox_format_group = {
.name = "format",
.attrs = knl_uncore_ubox_formats_attr,
};
@@ -1927,7 +1927,7 @@ static struct attribute *knl_uncore_cha_formats_attr[] = {
NULL,
};
-static struct attribute_group knl_uncore_cha_format_group = {
+static const struct attribute_group knl_uncore_cha_format_group = {
.name = "format",
.attrs = knl_uncore_cha_formats_attr,
};
@@ -2037,7 +2037,7 @@ static struct attribute *knl_uncore_pcu_formats_attr[] = {
NULL,
};
-static struct attribute_group knl_uncore_pcu_format_group = {
+static const struct attribute_group knl_uncore_pcu_format_group = {
.name = "format",
.attrs = knl_uncore_pcu_formats_attr,
};
@@ -2187,7 +2187,7 @@ static struct attribute *knl_uncore_irp_formats_attr[] = {
NULL,
};
-static struct attribute_group knl_uncore_irp_format_group = {
+static const struct attribute_group knl_uncore_irp_format_group = {
.name = "format",
.attrs = knl_uncore_irp_formats_attr,
};
@@ -2385,7 +2385,7 @@ static struct attribute *hswep_uncore_ubox_formats_attr[] = {
NULL,
};
-static struct attribute_group hswep_uncore_ubox_format_group = {
+static const struct attribute_group hswep_uncore_ubox_format_group = {
.name = "format",
.attrs = hswep_uncore_ubox_formats_attr,
};
@@ -2439,7 +2439,7 @@ static struct attribute *hswep_uncore_cbox_formats_attr[] = {
NULL,
};
-static struct attribute_group hswep_uncore_cbox_format_group = {
+static const struct attribute_group hswep_uncore_cbox_format_group = {
.name = "format",
.attrs = hswep_uncore_cbox_formats_attr,
};
@@ -2621,7 +2621,7 @@ static struct attribute *hswep_uncore_sbox_formats_attr[] = {
NULL,
};
-static struct attribute_group hswep_uncore_sbox_format_group = {
+static const struct attribute_group hswep_uncore_sbox_format_group = {
.name = "format",
.attrs = hswep_uncore_sbox_formats_attr,
};
@@ -3314,7 +3314,7 @@ static struct attribute *skx_uncore_cha_formats_attr[] = {
NULL,
};
-static struct attribute_group skx_uncore_chabox_format_group = {
+static const struct attribute_group skx_uncore_chabox_format_group = {
.name = "format",
.attrs = skx_uncore_cha_formats_attr,
};
@@ -3427,7 +3427,7 @@ static struct attribute *skx_uncore_iio_formats_attr[] = {
NULL,
};
-static struct attribute_group skx_uncore_iio_format_group = {
+static const struct attribute_group skx_uncore_iio_format_group = {
.name = "format",
.attrs = skx_uncore_iio_formats_attr,
};
@@ -3484,7 +3484,7 @@ static struct attribute *skx_uncore_formats_attr[] = {
NULL,
};
-static struct attribute_group skx_uncore_format_group = {
+static const struct attribute_group skx_uncore_format_group = {
.name = "format",
.attrs = skx_uncore_formats_attr,
};
@@ -3605,7 +3605,7 @@ static struct attribute *skx_upi_uncore_formats_attr[] = {
NULL,
};
-static struct attribute_group skx_upi_uncore_format_group = {
+static const struct attribute_group skx_upi_uncore_format_group = {
.name = "format",
.attrs = skx_upi_uncore_formats_attr,
};
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 476aec3a4cab..4196f81ec0e1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -91,7 +91,7 @@ struct amd_nb {
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
- PERF_SAMPLE_TRANSACTION)
+ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
/*
* A debug store configuration.
@@ -558,6 +558,7 @@ struct x86_pmu {
int attr_rdpmc;
struct attribute **format_attrs;
struct attribute **event_attrs;
+ struct attribute **caps_attrs;
ssize_t (*events_sysfs_show)(char *page, u64 config);
struct attribute **cpu_events;
@@ -591,7 +592,8 @@ struct x86_pmu {
pebs :1,
pebs_active :1,
pebs_broken :1,
- pebs_prec_dist :1;
+ pebs_prec_dist :1,
+ pebs_no_tlb :1;
int pebs_record_size;
int pebs_buffer_size;
void (*drain_pebs)(struct pt_regs *regs);
@@ -741,6 +743,8 @@ int x86_reserve_hardware(void);
void x86_release_hardware(void);
+int x86_pmu_max_precise(void);
+
void hw_perf_lbr_event_destroy(struct perf_event *event);
int x86_setup_perfctr(struct perf_event *event);
@@ -947,6 +951,8 @@ void intel_pmu_lbr_init_knl(void);
void intel_pmu_pebs_data_source_nhm(void);
+void intel_pmu_pebs_data_source_skl(bool pmem);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 171ae09864d7..367a8203cfcf 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1 @@
-obj-y := hv_init.o
+obj-y := hv_init.o mmu.o
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 5b882cc0c0e9..1a8eb550c40f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -26,6 +26,8 @@
#include <linux/mm.h>
#include <linux/clockchips.h>
#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
#ifdef CONFIG_HYPERV_TSCPAGE
@@ -75,10 +77,25 @@ static struct clocksource hyperv_cs_msr = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static void *hypercall_pg;
+void *hv_hypercall_pg;
+EXPORT_SYMBOL_GPL(hv_hypercall_pg);
struct clocksource *hyperv_cs;
EXPORT_SYMBOL_GPL(hyperv_cs);
+u32 *hv_vp_index;
+EXPORT_SYMBOL_GPL(hv_vp_index);
+
+static int hv_cpu_init(unsigned int cpu)
+{
+ u64 msr_vp_index;
+
+ hv_get_vp_index(msr_vp_index);
+
+ hv_vp_index[smp_processor_id()] = msr_vp_index;
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -94,6 +111,16 @@ void hyperv_init(void)
if (x86_hyper != &x86_hyper_ms_hyperv)
return;
+ /* Allocate percpu VP index */
+ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+ GFP_KERNEL);
+ if (!hv_vp_index)
+ return;
+
+ if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, NULL) < 0)
+ goto free_vp_index;
+
/*
* Setup the hypercall page and enable hypercalls.
* 1. Register the guest ID
@@ -102,17 +129,19 @@ void hyperv_init(void)
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
- if (hypercall_pg == NULL) {
+ hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
+ if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- return;
+ goto free_vp_index;
}
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
- hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg);
+ hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hyper_alloc_mmu();
+
/*
* Register Hyper-V specific clocksource.
*/
@@ -148,6 +177,12 @@ register_msr_cs:
hyperv_cs = &hyperv_cs_msr;
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+ return;
+
+free_vp_index:
+ kfree(hv_vp_index);
+ hv_vp_index = NULL;
}
/*
@@ -170,51 +205,6 @@ void hyperv_cleanup(void)
}
EXPORT_SYMBOL_GPL(hyperv_cleanup);
-/*
- * hv_do_hypercall- Invoke the specified hypercall
- */
-u64 hv_do_hypercall(u64 control, void *input, void *output)
-{
- u64 input_address = (input) ? virt_to_phys(input) : 0;
- u64 output_address = (output) ? virt_to_phys(output) : 0;
-#ifdef CONFIG_X86_64
- u64 hv_status = 0;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
- __asm__ __volatile__("call *%3" : "=a" (hv_status) :
- "c" (control), "d" (input_address),
- "m" (hypercall_pg));
-
- return hv_status;
-
-#else
-
- u32 control_hi = control >> 32;
- u32 control_lo = control & 0xFFFFFFFF;
- u32 hv_status_hi = 1;
- u32 hv_status_lo = 1;
- u32 input_address_hi = input_address >> 32;
- u32 input_address_lo = input_address & 0xFFFFFFFF;
- u32 output_address_hi = output_address >> 32;
- u32 output_address_lo = output_address & 0xFFFFFFFF;
-
- if (!hypercall_pg)
- return (u64)ULLONG_MAX;
-
- __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
- "=a"(hv_status_lo) : "d" (control_hi),
- "a" (control_lo), "b" (input_address_hi),
- "c" (input_address_lo), "D"(output_address_hi),
- "S"(output_address_lo), "m" (hypercall_pg));
-
- return hv_status_lo | ((u64)hv_status_hi << 32);
-#endif /* !x86_64 */
-}
-EXPORT_SYMBOL_GPL(hv_do_hypercall);
-
void hyperv_report_panic(struct pt_regs *regs)
{
static bool panic_reported;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
new file mode 100644
index 000000000000..39e7f6e50919
--- /dev/null
+++ b/arch/x86/hyperv/mmu.c
@@ -0,0 +1,272 @@
+#define pr_fmt(fmt) "Hyper-V: " fmt
+
+#include <linux/hyperv.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/fpu/api.h>
+#include <asm/mshyperv.h>
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hyperv.h>
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_flush_pcpu {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_flush_pcpu_ex {
+ u64 address_space;
+ u64 flags;
+ struct {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+ } hv_vp_set;
+ u64 gva_list[];
+};
+
+/* Each gva in gva_list encodes up to 4096 pages to flush */
+#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
+
+static struct hv_flush_pcpu __percpu *pcpu_flush;
+
+static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+
+/*
+ * Fills in gva_list starting from offset. Returns the number of items added.
+ */
+static inline int fill_gva_list(u64 gva_list[], int offset,
+ unsigned long start, unsigned long end)
+{
+ int gva_n = offset;
+ unsigned long cur = start, diff;
+
+ do {
+ diff = end > cur ? end - cur : 0;
+
+ gva_list[gva_n] = cur & PAGE_MASK;
+ /*
+ * Lower 12 bits encode the number of additional
+ * pages to flush (in addition to the 'cur' page).
+ */
+ if (diff >= HV_TLB_FLUSH_UNIT)
+ gva_list[gva_n] |= ~PAGE_MASK;
+ else if (diff)
+ gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+
+ cur += HV_TLB_FLUSH_UNIT;
+ gva_n++;
+
+ } while (cur < end);
+
+ return gva_n - offset;
+}
+
+/* Return the number of banks in the resulting vp_set */
+static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
+ const struct cpumask *cpus)
+{
+ int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+ /*
+ * Some banks may end up being empty but this is acceptable.
+ */
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ vcpu_bank = vcpu / 64;
+ vcpu_offset = vcpu % 64;
+
+ /* valid_bank_mask can represent up to 64 banks */
+ if (vcpu_bank >= 64)
+ return 0;
+
+ __set_bit(vcpu_offset, (unsigned long *)
+ &flush->hv_vp_set.bank_contents[vcpu_bank]);
+ if (vcpu_bank >= nr_bank)
+ nr_bank = vcpu_bank + 1;
+ }
+ flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+
+ return nr_bank;
+}
+
+static void hyperv_flush_tlb_others(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
+{
+ int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+ if (!pcpu_flush || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush = this_cpu_ptr(pcpu_flush);
+
+ if (info->mm) {
+ flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->processor_mask = 0;
+ if (cpumask_equal(cpus, cpu_present_mask)) {
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu >= 64)
+ goto do_native;
+
+ __set_bit(vcpu, (unsigned long *)
+ &flush->processor_mask);
+ }
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
+
+ if (info->end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else if (info->end &&
+ ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
+ flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, 0,
+ info->start, info->end);
+ status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
+ gva_n, 0, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, info);
+}
+
+static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
+ const struct flush_tlb_info *info)
+{
+ int nr_bank = 0, max_gvas, gva_n;
+ struct hv_flush_pcpu_ex *flush;
+ u64 status = U64_MAX;
+ unsigned long flags;
+
+ trace_hyperv_mmu_flush_tlb_others(cpus, info);
+
+ if (!pcpu_flush_ex || !hv_hypercall_pg)
+ goto do_native;
+
+ if (cpumask_empty(cpus))
+ return;
+
+ local_irq_save(flags);
+
+ flush = this_cpu_ptr(pcpu_flush_ex);
+
+ if (info->mm) {
+ flush->address_space = virt_to_phys(info->mm->pgd);
+ flush->flags = 0;
+ } else {
+ flush->address_space = 0;
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ }
+
+ flush->hv_vp_set.valid_bank_mask = 0;
+
+ if (!cpumask_equal(cpus, cpu_present_mask)) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
+ nr_bank = cpumask_to_vp_set(flush, cpus);
+ }
+
+ if (!nr_bank) {
+ flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush->flags |= HV_FLUSH_ALL_PROCESSORS;
+ }
+
+ /*
+ * We can flush not more than max_gvas with one hypercall. Flush the
+ * whole address space if we were asked to do more.
+ */
+ max_gvas =
+ (PAGE_SIZE - sizeof(*flush) - nr_bank *
+ sizeof(flush->hv_vp_set.bank_contents[0])) /
+ sizeof(flush->gva_list[0]);
+
+ if (info->end == TLB_FLUSH_ALL) {
+ flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank + 2, flush, NULL);
+ } else if (info->end &&
+ ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ 0, nr_bank + 2, flush, NULL);
+ } else {
+ gva_n = fill_gva_list(flush->gva_list, nr_bank,
+ info->start, info->end);
+ status = hv_do_rep_hypercall(
+ HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
+ gva_n, nr_bank + 2, flush, NULL);
+ }
+
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ return;
+do_native:
+ native_flush_tlb_others(cpus, info);
+}
+
+void hyperv_setup_mmu_ops(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
+ pr_info("Using hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
+ } else {
+ pr_info("Using ext hypercall for remote TLB flush\n");
+ pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
+ }
+}
+
+void hyper_alloc_mmu(void)
+{
+ if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
+ return;
+
+ if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ else
+ pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 724153797209..e0bb46c02857 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -226,7 +226,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
if (ksig->ka.sa.sa_flags & SA_ONSTACK)
sp = sigsp(sp, ksig);
/* This is the legacy signal stack switching. */
- else if ((regs->ss & 0xffff) != __USER32_DS &&
+ else if (regs->ss != __USER32_DS &&
!(ksig->ka.sa.sa_flags & SA_RESTORER) &&
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2efc768e4362..72d867f6b518 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,8 +150,6 @@ static inline void disable_acpi(void) { }
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
-#define acpi_unlazy_tlb(x) leave_mm(x)
-
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
{
@@ -162,12 +160,13 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
* you call efi_mem_attributes() during boot and at runtime,
* you could theoretically see different attributes.
*
- * Since we are yet to see any x86 platforms that require
- * anything other than PAGE_KERNEL (some arm64 platforms
- * require the equivalent of PAGE_KERNEL_NOCACHE), return that
- * until we know differently.
+ * We are yet to see any x86 platforms that require anything
+ * other than PAGE_KERNEL (some ARM64 platforms require the
+ * equivalent of PAGE_KERNEL_NOCACHE). Additionally, if SME
+ * is active, the ACPI information will not be encrypted,
+ * so return PAGE_KERNEL_NOENC until we know differently.
*/
- return PAGE_KERNEL;
+ return PAGE_KERNEL_NOENC;
}
#endif
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7a9df3beb89b..676ee5807d86 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -74,6 +74,9 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+# define _ASM_EXTABLE_REFCOUNT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
@@ -123,6 +126,9 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+# define _ASM_EXTABLE_REFCOUNT(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
+
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 33380b871463..0874ebda3069 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new)
return xchg(&v->counter, new);
}
-#define ATOMIC_OP(op) \
-static inline void atomic_##op(int i, atomic_t *v) \
-{ \
- asm volatile(LOCK_PREFIX #op"l %1,%0" \
- : "+m" (v->counter) \
- : "ir" (i) \
- : "memory"); \
+static inline void atomic_and(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "andl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
+}
+
+static inline int atomic_fetch_and(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
+
+ do { } while (!atomic_try_cmpxchg(v, &val, val & i));
+
+ return val;
}
-#define ATOMIC_FETCH_OP(op, c_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
-{ \
- int val = atomic_read(v); \
- do { \
- } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \
- return val; \
+static inline void atomic_or(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "orl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
}
-#define ATOMIC_OPS(op, c_op) \
- ATOMIC_OP(op) \
- ATOMIC_FETCH_OP(op, c_op)
+static inline int atomic_fetch_or(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
-ATOMIC_OPS(and, &)
-ATOMIC_OPS(or , |)
-ATOMIC_OPS(xor, ^)
+ do { } while (!atomic_try_cmpxchg(v, &val, val | i));
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP
+ return val;
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "xorl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
+}
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
+
+ do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
+
+ return val;
+}
/**
* __atomic_add_unless - add unless the number is already a given value
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^)
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c = atomic_read(v);
+
do {
if (unlikely(c == u))
break;
} while (!atomic_try_cmpxchg(v, &c, c + a));
+
return c;
}
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 71d7705fb303..9e206f31ce2a 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-#define ATOMIC64_OP(op, c_op) \
-static inline void atomic64_##op(long long i, atomic64_t *v) \
-{ \
- long long old, c = 0; \
- while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
- c = old; \
+static inline void atomic64_and(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ c = old;
}
-#define ATOMIC64_FETCH_OP(op, c_op) \
-static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
-{ \
- long long old, c = 0; \
- while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
- c = old; \
- return old; \
+static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ c = old;
+
+ return old;
}
-ATOMIC64_FETCH_OP(add, +)
+static inline void atomic64_or(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
-#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
+ while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ c = old;
+}
+
+static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ c = old;
+
+ return old;
+}
-#define ATOMIC64_OPS(op, c_op) \
- ATOMIC64_OP(op, c_op) \
- ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_xor(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ c = old;
+}
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ c = old;
+
+ return old;
+}
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
+ c = old;
+
+ return old;
+}
+
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a433c9a9..5d9de36a2f04 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
}
#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
{
return try_cmpxchg(&v->counter, old, new);
}
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
*/
static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
{
- long c = atomic64_read(v);
+ s64 c = atomic64_read(v);
do {
if (unlikely(c == u))
return false;
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
*/
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
- long dec, c = atomic64_read(v);
+ s64 dec, c = atomic64_read(v);
do {
dec = c - 1;
if (unlikely(dec < 0))
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
return dec;
}
-#define ATOMIC64_OP(op) \
-static inline void atomic64_##op(long i, atomic64_t *v) \
-{ \
- asm volatile(LOCK_PREFIX #op"q %1,%0" \
- : "+m" (v->counter) \
- : "er" (i) \
- : "memory"); \
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "andq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
}
-#define ATOMIC64_FETCH_OP(op, c_op) \
-static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
-{ \
- long val = atomic64_read(v); \
- do { \
- } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \
- return val; \
+static inline long atomic64_fetch_and(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
+
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val & i));
+ return val;
}
-#define ATOMIC64_OPS(op, c_op) \
- ATOMIC64_OP(op) \
- ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "orq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
+}
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long atomic64_fetch_or(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val | i));
+ return val;
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "xorq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
+}
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
+
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val ^ i));
+ return val;
+}
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index e01f7f7ccb0c..84ae170bc3d0 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,5 +2,7 @@
#define _ASM_X86_CMDLINE_H
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
+int cmdline_find_option(const char *cmdline_ptr, const char *option,
+ char *buffer, int bufsize);
#endif /* _ASM_X86_CMDLINE_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d90296d061e8..b5069e802d5c 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void)
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
- __typeof__(_ptr) _old = (_pold); \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ca3c48c0872f..2519c6c801c9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -177,7 +177,7 @@
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
/*
@@ -196,6 +196,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
@@ -286,7 +287,8 @@
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d0a21b12dd58..1a2ba368da39 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -5,6 +5,7 @@
#include <asm/ldt.h>
#include <asm/mmu.h>
#include <asm/fixmap.h>
+#include <asm/irq_vectors.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -22,7 +23,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->s = 1;
desc->dpl = 0x3;
desc->p = info->seg_not_present ^ 1;
- desc->limit = (info->limit & 0xf0000) >> 16;
+ desc->limit1 = (info->limit & 0xf0000) >> 16;
desc->avl = info->useable;
desc->d = info->seg_32bit;
desc->g = info->limit_in_pages;
@@ -83,33 +84,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
}
-#ifdef CONFIG_X86_64
-
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
unsigned dpl, unsigned ist, unsigned seg)
{
- gate->offset_low = PTR_LOW(func);
+ gate->offset_low = (u16) func;
+ gate->bits.p = 1;
+ gate->bits.dpl = dpl;
+ gate->bits.zero = 0;
+ gate->bits.type = type;
+ gate->offset_middle = (u16) (func >> 16);
+#ifdef CONFIG_X86_64
gate->segment = __KERNEL_CS;
- gate->ist = ist;
- gate->p = 1;
- gate->dpl = dpl;
- gate->zero0 = 0;
- gate->zero1 = 0;
- gate->type = type;
- gate->offset_middle = PTR_MIDDLE(func);
- gate->offset_high = PTR_HIGH(func);
-}
-
+ gate->bits.ist = ist;
+ gate->reserved = 0;
+ gate->offset_high = (u32) (func >> 32);
#else
-static inline void pack_gate(gate_desc *gate, unsigned char type,
- unsigned long base, unsigned dpl, unsigned flags,
- unsigned short seg)
-{
- gate->a = (seg << 16) | (base & 0xffff);
- gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
-}
-
+ gate->segment = seg;
+ gate->bits.ist = 0;
#endif
+}
static inline int desc_empty(const void *ptr)
{
@@ -173,35 +166,22 @@ native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int
memcpy(&gdt[entry], desc, size);
}
-static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
- unsigned long limit, unsigned char type,
- unsigned char flags)
-{
- desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
- desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
- (limit & 0x000f0000) | ((type & 0xff) << 8) |
- ((flags & 0xf) << 20);
- desc->p = 1;
-}
-
-
-static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
+static inline void set_tssldt_descriptor(void *d, unsigned long addr,
+ unsigned type, unsigned size)
{
-#ifdef CONFIG_X86_64
- struct ldttss_desc64 *desc = d;
+ struct ldttss_desc *desc = d;
memset(desc, 0, sizeof(*desc));
- desc->limit0 = size & 0xFFFF;
- desc->base0 = PTR_LOW(addr);
- desc->base1 = PTR_MIDDLE(addr) & 0xFF;
+ desc->limit0 = (u16) size;
+ desc->base0 = (u16) addr;
+ desc->base1 = (addr >> 16) & 0xFF;
desc->type = type;
desc->p = 1;
desc->limit1 = (size >> 16) & 0xF;
- desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
- desc->base3 = PTR_HIGH(addr);
-#else
- pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+ desc->base2 = (addr >> 24) & 0xFF;
+#ifdef CONFIG_X86_64
+ desc->base3 = (u32) (addr >> 32);
#endif
}
@@ -401,147 +381,20 @@ static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
static inline unsigned long get_desc_limit(const struct desc_struct *desc)
{
- return desc->limit0 | (desc->limit << 16);
+ return desc->limit0 | (desc->limit1 << 16);
}
static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
{
desc->limit0 = limit & 0xffff;
- desc->limit = (limit >> 16) & 0xf;
-}
-
-#ifdef CONFIG_X86_64
-static inline void set_nmi_gate(int gate, void *addr)
-{
- gate_desc s;
-
- pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
- write_idt_entry(debug_idt_table, gate, &s);
+ desc->limit1 = (limit >> 16) & 0xf;
}
-#endif
-#ifdef CONFIG_TRACING
-extern struct desc_ptr trace_idt_descr;
-extern gate_desc trace_idt_table[];
-static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
-{
- write_idt_entry(trace_idt_table, entry, gate);
-}
+void update_intr_gate(unsigned int n, const void *addr);
+void alloc_intr_gate(unsigned int n, const void *addr);
-static inline void _trace_set_gate(int gate, unsigned type, void *addr,
- unsigned dpl, unsigned ist, unsigned seg)
-{
- gate_desc s;
-
- pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
- /*
- * does not need to be atomic because it is only done once at
- * setup time
- */
- write_trace_idt_entry(gate, &s);
-}
-#else
-static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
-{
-}
-
-#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
-#endif
-
-static inline void _set_gate(int gate, unsigned type, void *addr,
- unsigned dpl, unsigned ist, unsigned seg)
-{
- gate_desc s;
-
- pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
- /*
- * does not need to be atomic because it is only done once at
- * setup time
- */
- write_idt_entry(idt_table, gate, &s);
- write_trace_idt_entry(gate, &s);
-}
-
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-#define set_intr_gate_notrace(n, addr) \
- do { \
- BUG_ON((unsigned)n > 0xFF); \
- _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
- __KERNEL_CS); \
- } while (0)
-
-#define set_intr_gate(n, addr) \
- do { \
- set_intr_gate_notrace(n, addr); \
- _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
- 0, 0, __KERNEL_CS); \
- } while (0)
-
-extern int first_system_vector;
-/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors[];
-static inline void alloc_system_vector(int vector)
-{
- if (!test_bit(vector, used_vectors)) {
- set_bit(vector, used_vectors);
- if (first_system_vector > vector)
- first_system_vector = vector;
- } else {
- BUG();
- }
-}
-
-#define alloc_intr_gate(n, addr) \
- do { \
- alloc_system_vector(n); \
- set_intr_gate(n, addr); \
- } while (0)
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_system_trap_gate(unsigned int n, void *addr)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_trap_gate(unsigned int n, void *addr)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
-}
-
-static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
-}
-
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
-}
-
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
-}
-
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u32, debug_idt_ctr);
static inline bool is_debug_idt_enabled(void)
@@ -567,31 +420,6 @@ static inline void load_debug_idt(void)
}
#endif
-#ifdef CONFIG_TRACING
-extern atomic_t trace_idt_ctr;
-static inline bool is_trace_idt_enabled(void)
-{
- if (atomic_read(&trace_idt_ctr))
- return true;
-
- return false;
-}
-
-static inline void load_trace_idt(void)
-{
- load_idt((const struct desc_ptr *)&trace_idt_descr);
-}
-#else
-static inline bool is_trace_idt_enabled(void)
-{
- return false;
-}
-
-static inline void load_trace_idt(void)
-{
-}
-#endif
-
/*
* The load_current_idt() must be called with interrupts disabled
* to avoid races. That way the IDT will always be set back to the expected
@@ -603,9 +431,25 @@ static inline void load_current_idt(void)
{
if (is_debug_idt_enabled())
load_debug_idt();
- else if (is_trace_idt_enabled())
- load_trace_idt();
else
load_idt((const struct desc_ptr *)&idt_descr);
}
+
+extern void idt_setup_early_handler(void);
+extern void idt_setup_early_traps(void);
+extern void idt_setup_traps(void);
+extern void idt_setup_apic_and_irq_gates(void);
+
+#ifdef CONFIG_X86_64
+extern void idt_setup_early_pf(void);
+extern void idt_setup_ist_traps(void);
+extern void idt_setup_debugidt_traps(void);
+#else
+static inline void idt_setup_early_pf(void) { }
+static inline void idt_setup_ist_traps(void) { }
+static inline void idt_setup_debugidt_traps(void) { }
+#endif
+
+extern void idt_invalidate(void *addr);
+
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 49265345d4d2..346d252029b7 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -11,34 +11,30 @@
#include <linux/types.h>
-/*
- * FIXME: Accessing the desc_struct through its fields is more elegant,
- * and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b accessors, and doing this allow us to do it
- * incrementally. We keep the signature as a struct, rather than a union,
- * so we can get rid of it transparently in the future -- glommer
- */
/* 8 byte segment descriptor */
struct desc_struct {
- union {
- struct {
- unsigned int a;
- unsigned int b;
- };
- struct {
- u16 limit0;
- u16 base0;
- unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
- unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
- };
- };
+ u16 limit0;
+ u16 base0;
+ u16 base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+ u16 limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
} __attribute__((packed));
-#define GDT_ENTRY_INIT(flags, base, limit) { { { \
- .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
- .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
- ((limit) & 0xf0000) | ((base) & 0xff000000), \
- } } }
+#define GDT_ENTRY_INIT(flags, base, limit) \
+ { \
+ .limit0 = (u16) (limit), \
+ .limit1 = ((limit) >> 16) & 0x0F, \
+ .base0 = (u16) (base), \
+ .base1 = ((base) >> 16) & 0xFF, \
+ .base2 = ((base) >> 24) & 0xFF, \
+ .type = (flags & 0x0f), \
+ .s = (flags >> 4) & 0x01, \
+ .dpl = (flags >> 5) & 0x03, \
+ .p = (flags >> 7) & 0x01, \
+ .avl = (flags >> 12) & 0x01, \
+ .l = (flags >> 13) & 0x01, \
+ .d = (flags >> 14) & 0x01, \
+ .g = (flags >> 15) & 0x01, \
+ }
enum {
GATE_INTERRUPT = 0xE,
@@ -47,49 +43,63 @@ enum {
GATE_TASK = 0x5,
};
-/* 16byte gate */
-struct gate_struct64 {
- u16 offset_low;
- u16 segment;
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
- u16 offset_middle;
- u32 offset_high;
- u32 zero1;
-} __attribute__((packed));
-
-#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF)
-#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long long)(x) >> 32)
-
enum {
DESC_TSS = 0x9,
DESC_LDT = 0x2,
DESCTYPE_S = 0x10, /* !system */
};
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct ldttss_desc64 {
- u16 limit0;
- u16 base0;
- unsigned base1 : 8, type : 5, dpl : 2, p : 1;
- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
- u32 base3;
- u32 zero1;
+/* LDT or TSS descriptor in the GDT. */
+struct ldttss_desc {
+ u16 limit0;
+ u16 base0;
+
+ u16 base1 : 8, type : 5, dpl : 2, p : 1;
+ u16 limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+#ifdef CONFIG_X86_64
+ u32 base3;
+ u32 zero1;
+#endif
} __attribute__((packed));
+typedef struct ldttss_desc ldt_desc;
+typedef struct ldttss_desc tss_desc;
+
+struct idt_bits {
+ u16 ist : 3,
+ zero : 5,
+ type : 5,
+ dpl : 2,
+ p : 1;
+} __attribute__((packed));
+
+struct gate_struct {
+ u16 offset_low;
+ u16 segment;
+ struct idt_bits bits;
+ u16 offset_middle;
+#ifdef CONFIG_X86_64
+ u32 offset_high;
+ u32 reserved;
+#endif
+} __attribute__((packed));
+
+typedef struct gate_struct gate_desc;
+
+static inline unsigned long gate_offset(const gate_desc *g)
+{
#ifdef CONFIG_X86_64
-typedef struct gate_struct64 gate_desc;
-typedef struct ldttss_desc64 ldt_desc;
-typedef struct ldttss_desc64 tss_desc;
-#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
-#define gate_segment(g) ((g).segment)
+ return g->offset_low | ((unsigned long)g->offset_middle << 16) |
+ ((unsigned long) g->offset_high << 32);
#else
-typedef struct desc_struct gate_desc;
-typedef struct desc_struct ldt_desc;
-typedef struct desc_struct tss_desc;
-#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
-#define gate_segment(g) ((g).a >> 16)
+ return g->offset_low | ((unsigned long)g->offset_middle << 16);
#endif
+}
+
+static inline unsigned long gate_segment(const gate_desc *g)
+{
+ return g->segment;
+}
struct desc_ptr {
unsigned short size;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 5dff775af7cd..c10c9128f54e 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID 0
#else
# define DISABLE_VME 0
# define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0
+# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
#define DISABLED_MASK1 0
#define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 0
+#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 398c79889f5c..1387dafdba2d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,6 +12,7 @@
#include <asm/io.h>
#include <asm/swiotlb.h>
#include <linux/dma-contiguous.h>
+#include <linux/mem_encrypt.h>
#ifdef CONFIG_ISA
# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -57,12 +58,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- return paddr;
+ return __sme_set(paddr);
}
static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- return daddr;
+ return __sme_clr(daddr);
}
#endif /* CONFIG_X86_DMA_REMAP */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 3c69fed215c5..a8e15b04565b 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -13,9 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len)
}
/* Use early IO mappings for DMI because it's initialized early */
-#define dmi_early_remap early_ioremap
-#define dmi_early_unmap early_iounmap
-#define dmi_remap ioremap_cache
-#define dmi_unmap iounmap
+#define dmi_early_remap early_memremap
+#define dmi_early_unmap early_memunmap
+#define dmi_remap(_x, _l) memremap(_x, _l, MEMREMAP_WB)
+#define dmi_unmap(_x) memunmap(_x)
#endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index a504adc661a4..cd266d830e49 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -39,6 +39,8 @@ extern void e820__setup_pci_gap(void);
extern void e820__reallocate_tables(void);
extern void e820__register_nosave_regions(unsigned long limit_pfn);
+extern int e820__get_entry_type(u64 start, u64 end);
+
/*
* Returns true iff the specified range [start,end) is completely contained inside
* the ISA region.
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1c18d83d3f09..04330c8d9af9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -126,15 +126,15 @@ do { \
pr_reg[4] = regs->di; \
pr_reg[5] = regs->bp; \
pr_reg[6] = regs->ax; \
- pr_reg[7] = regs->ds & 0xffff; \
- pr_reg[8] = regs->es & 0xffff; \
- pr_reg[9] = regs->fs & 0xffff; \
+ pr_reg[7] = regs->ds; \
+ pr_reg[8] = regs->es; \
+ pr_reg[9] = regs->fs; \
pr_reg[11] = regs->orig_ax; \
pr_reg[12] = regs->ip; \
- pr_reg[13] = regs->cs & 0xffff; \
+ pr_reg[13] = regs->cs; \
pr_reg[14] = regs->flags; \
pr_reg[15] = regs->sp; \
- pr_reg[16] = regs->ss & 0xffff; \
+ pr_reg[16] = regs->ss; \
} while (0);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
@@ -204,6 +204,7 @@ void set_personality_ia32(bool);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
+ unsigned long base; \
unsigned v; \
(pr_reg)[0] = (regs)->r15; \
(pr_reg)[1] = (regs)->r14; \
@@ -226,8 +227,8 @@ do { \
(pr_reg)[18] = (regs)->flags; \
(pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
- (pr_reg)[21] = current->thread.fsbase; \
- (pr_reg)[22] = current->thread.gsbase; \
+ rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \
+ rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \
asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
@@ -247,11 +248,11 @@ extern int force_personality32;
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
- 0x100000000UL)
+ (TASK_SIZE / 3 * 2))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. This could be done in user space,
@@ -304,8 +305,8 @@ static inline int mmap_is_ia32(void)
test_thread_flag(TIF_ADDR32));
}
-extern unsigned long tasksize_32bit(void);
-extern unsigned long tasksize_64bit(void);
+extern unsigned long task_size_32bit(void);
+extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 07b06955a05d..aa15d1f7e530 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,20 +13,14 @@
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
-BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR,
- smp_irq_move_cleanup_interrupt)
-BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt)
+BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
#endif
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
#ifdef CONFIG_HAVE_KVM
-BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
- smp_kvm_posted_intr_ipi)
-BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
- smp_kvm_posted_intr_wakeup_ipi)
-BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
- smp_kvm_posted_intr_nested_ipi)
+BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
+BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
+BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR)
#endif
/*
@@ -41,6 +35,7 @@ BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_IRQ_WORK
BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b65155cc3760..dcd9fb55e679 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -157,6 +157,26 @@ static inline void __set_fixmap(enum fixed_addresses idx,
}
#endif
+/*
+ * FIXMAP_PAGE_NOCACHE is used for MMIO. Memory encryption is not
+ * supported for MMIO addresses, so make sure that the memory encryption
+ * mask is not part of the page attributes.
+ */
+#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_IO_NOCACHE
+
+/*
+ * Early memremap routines used for in-place encryption. The mappings created
+ * by these routines are intended to be used as temporary mappings.
+ */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+ unsigned long size);
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+ unsigned long size);
+
#include <asm-generic/fixmap.h>
#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..554cdb205d17 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
return 0;
}
-static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
{
if (use_xsave()) {
- copy_kernel_to_xregs(&fpstate->xsave, -1);
+ copy_kernel_to_xregs(&fpstate->xsave, mask);
} else {
if (use_fxsr())
copy_kernel_to_fxregs(&fpstate->fxsave);
@@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
: : [addr] "m" (fpstate));
}
- __copy_kernel_to_fpregs(fpstate);
+ __copy_kernel_to_fpregs(fpstate, -1);
}
extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index b4c1f5453436..f4dc9b63bdda 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -41,20 +41,11 @@
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
-
pagefault_disable();
switch (op) {
@@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ:
- ret = (oldval == cmparg);
- break;
- case FUTEX_OP_CMP_NE:
- ret = (oldval != cmparg);
- break;
- case FUTEX_OP_CMP_LT:
- ret = (oldval < cmparg);
- break;
- case FUTEX_OP_CMP_GE:
- ret = (oldval >= cmparg);
- break;
- case FUTEX_OP_CMP_LE:
- ret = (oldval <= cmparg);
- break;
- case FUTEX_OP_CMP_GT:
- ret = (oldval > cmparg);
- break;
- default:
- ret = -ENOSYS;
- }
- }
+ if (!ret)
+ *oval = oldval;
+
return ret;
}
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index d6dbafbd4207..6dfe366a8804 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -46,26 +46,6 @@ extern asmlinkage void deferred_error_interrupt(void);
extern asmlinkage void call_function_interrupt(void);
extern asmlinkage void call_function_single_interrupt(void);
-#ifdef CONFIG_TRACING
-/* Interrupt handlers registered during init_IRQ */
-extern void trace_apic_timer_interrupt(void);
-extern void trace_x86_platform_ipi(void);
-extern void trace_error_interrupt(void);
-extern void trace_irq_work_interrupt(void);
-extern void trace_spurious_interrupt(void);
-extern void trace_thermal_interrupt(void);
-extern void trace_reschedule_interrupt(void);
-extern void trace_threshold_interrupt(void);
-extern void trace_deferred_error_interrupt(void);
-extern void trace_call_function_interrupt(void);
-extern void trace_call_function_single_interrupt(void);
-#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
-#define trace_reboot_interrupt reboot_interrupt
-#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
-#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
-#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi
-#endif /* CONFIG_TRACING */
-
#ifdef CONFIG_X86_LOCAL_APIC
struct irq_data;
struct pci_dev;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 21126155a739..0ead9dbb9130 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -43,6 +43,9 @@ struct hypervisor_x86 {
/* pin current vcpu to specified physical cpu (run rarely) */
void (*pin_vcpu)(int);
+
+ /* called during init_mem_mapping() to setup early mappings. */
+ void (*init_mem_mapping)(void);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -57,8 +60,15 @@ extern const struct hypervisor_x86 x86_hyper_kvm;
extern void init_hypervisor_platform(void);
extern bool hypervisor_x2apic_available(void);
extern void hypervisor_pin_vcpu(int cpu);
+
+static inline void hypervisor_init_mem_mapping(void)
+{
+ if (x86_hyper && x86_hyper->init_mem_mapping)
+ x86_hyper->init_mem_mapping();
+}
#else
static inline void init_hypervisor_platform(void) { }
static inline bool hypervisor_x2apic_available(void) { return false; }
+static inline void hypervisor_init_mem_mapping(void) { }
#endif /* CONFIG_HYPERVISOR_GUEST */
#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c66fee..05c4aa00cc86 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
bool direct_gbpages; /* PUD level 1GB page support */
+ unsigned long kernpg_flag; /* kernel pagetable flag override */
};
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
deleted file mode 100644
index 597dc4995678..000000000000
--- a/arch/x86/include/asm/intel_rdt.h
+++ /dev/null
@@ -1,286 +0,0 @@
-#ifndef _ASM_X86_INTEL_RDT_H
-#define _ASM_X86_INTEL_RDT_H
-
-#ifdef CONFIG_INTEL_RDT_A
-
-#include <linux/sched.h>
-#include <linux/kernfs.h>
-#include <linux/jump_label.h>
-
-#include <asm/intel_rdt_common.h>
-
-#define IA32_L3_QOS_CFG 0xc81
-#define IA32_L3_CBM_BASE 0xc90
-#define IA32_L2_CBM_BASE 0xd10
-#define IA32_MBA_THRTL_BASE 0xd50
-
-#define L3_QOS_CDP_ENABLE 0x01ULL
-
-/**
- * struct rdtgroup - store rdtgroup's data in resctrl file system.
- * @kn: kernfs node
- * @rdtgroup_list: linked list for all rdtgroups
- * @closid: closid for this rdtgroup
- * @cpu_mask: CPUs assigned to this rdtgroup
- * @flags: status bits
- * @waitcount: how many cpus expect to find this
- * group when they acquire rdtgroup_mutex
- */
-struct rdtgroup {
- struct kernfs_node *kn;
- struct list_head rdtgroup_list;
- int closid;
- struct cpumask cpu_mask;
- int flags;
- atomic_t waitcount;
-};
-
-/* rdtgroup.flags */
-#define RDT_DELETED 1
-
-/* rftype.flags */
-#define RFTYPE_FLAGS_CPUS_LIST 1
-
-/* List of all resource groups */
-extern struct list_head rdt_all_groups;
-
-extern int max_name_width, max_data_width;
-
-int __init rdtgroup_init(void);
-
-/**
- * struct rftype - describe each file in the resctrl file system
- * @name: File name
- * @mode: Access mode
- * @kf_ops: File operations
- * @flags: File specific RFTYPE_FLAGS_* flags
- * @seq_show: Show content of the file
- * @write: Write to the file
- */
-struct rftype {
- char *name;
- umode_t mode;
- struct kernfs_ops *kf_ops;
- unsigned long flags;
-
- int (*seq_show)(struct kernfs_open_file *of,
- struct seq_file *sf, void *v);
- /*
- * write() is the generic write callback which maps directly to
- * kernfs write operation and overrides all other operations.
- * Maximum write size is determined by ->max_write_len.
- */
- ssize_t (*write)(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-};
-
-/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list: all instances of this resource
- * @id: unique id for this instance
- * @cpu_mask: which cpus share this resource
- * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
- * @new_ctrl: new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- */
-struct rdt_domain {
- struct list_head list;
- int id;
- struct cpumask cpu_mask;
- u32 *ctrl_val;
- u32 new_ctrl;
- bool have_new_ctrl;
-};
-
-/**
- * struct msr_param - set a range of MSRs from a domain
- * @res: The resource to use
- * @low: Beginning index from base MSR
- * @high: End index
- */
-struct msr_param {
- struct rdt_resource *res;
- int low;
- int high;
-};
-
-/**
- * struct rdt_cache - Cache allocation related data
- * @cbm_len: Length of the cache bit mask
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * @cbm_idx_mult: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- * in a cache bit mask
- */
-struct rdt_cache {
- unsigned int cbm_len;
- unsigned int min_cbm_bits;
- unsigned int cbm_idx_mult;
- unsigned int cbm_idx_offset;
-};
-
-/**
- * struct rdt_membw - Memory bandwidth allocation related data
- * @max_delay: Max throttle delay. Delay is the hardware
- * representation for memory bandwidth.
- * @min_bw: Minimum memory bandwidth percentage user can request
- * @bw_gran: Granularity at which the memory bandwidth is allocated
- * @delay_linear: True if memory B/W delay is in linear scale
- * @mb_map: Mapping of memory B/W percentage to memory B/W delay
- */
-struct rdt_membw {
- u32 max_delay;
- u32 min_bw;
- u32 bw_gran;
- u32 delay_linear;
- u32 *mb_map;
-};
-
-/**
- * struct rdt_resource - attributes of an RDT resource
- * @enabled: Is this feature enabled on this machine
- * @capable: Is this feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @cache_level: Which cache level defines scope of this resource
- * @default_ctrl: Specifies default cache cbm or memory B/W percent.
- * @msr_base: Base MSR address for CBMs
- * @msr_update: Function pointer to update QOS MSRs
- * @data_width: Character width of data when displaying
- * @domains: All domains for this resource
- * @cache: Cache allocation related data
- * @info_files: resctrl info files for the resource
- * @nr_info_files: Number of info files
- * @format_str: Per resource format string to show domain value
- * @parse_ctrlval: Per resource function pointer to parse control values
- */
-struct rdt_resource {
- bool enabled;
- bool capable;
- char *name;
- int num_closid;
- int cache_level;
- u32 default_ctrl;
- unsigned int msr_base;
- void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
- struct rdt_resource *r);
- int data_width;
- struct list_head domains;
- struct rdt_cache cache;
- struct rdt_membw membw;
- struct rftype *info_files;
- int nr_info_files;
- const char *format_str;
- int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
- struct rdt_domain *d);
-};
-
-void rdt_get_cache_infofile(struct rdt_resource *r);
-void rdt_get_mba_infofile(struct rdt_resource *r);
-int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
-int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
-
-extern struct mutex rdtgroup_mutex;
-
-extern struct rdt_resource rdt_resources_all[];
-extern struct rdtgroup rdtgroup_default;
-DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
-
-int __init rdtgroup_init(void);
-
-enum {
- RDT_RESOURCE_L3,
- RDT_RESOURCE_L3DATA,
- RDT_RESOURCE_L3CODE,
- RDT_RESOURCE_L2,
- RDT_RESOURCE_MBA,
-
- /* Must be the last */
- RDT_NUM_RESOURCES,
-};
-
-#define for_each_capable_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
- if (r->capable)
-
-#define for_each_enabled_rdt_resource(r) \
- for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
- r++) \
- if (r->enabled)
-
-/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
-union cpuid_0x10_1_eax {
- struct {
- unsigned int cbm_len:5;
- } split;
- unsigned int full;
-};
-
-/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
-union cpuid_0x10_3_eax {
- struct {
- unsigned int max_delay:12;
- } split;
- unsigned int full;
-};
-
-/* CPUID.(EAX=10H, ECX=ResID).EDX */
-union cpuid_0x10_x_edx {
- struct {
- unsigned int cos_max:16;
- } split;
- unsigned int full;
-};
-
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-
-void rdt_ctrl_update(void *arg);
-struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
-void rdtgroup_kn_unlock(struct kernfs_node *kn);
-ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
- char *buf, size_t nbytes, loff_t off);
-int rdtgroup_schemata_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v);
-
-/*
- * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
- *
- * Following considerations are made so that this has minimal impact
- * on scheduler hot path:
- * - This will stay as no-op unless we are running on an Intel SKU
- * which supports resource control and we enable by mounting the
- * resctrl file system.
- * - Caches the per cpu CLOSid values and does the MSR write only
- * when a task with a different CLOSid is scheduled in.
- *
- * Must be called with preemption disabled.
- */
-static inline void intel_rdt_sched_in(void)
-{
- if (static_branch_likely(&rdt_enable_key)) {
- struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
- int closid;
-
- /*
- * If this task has a closid assigned, use it.
- * Else use the closid assigned to this cpu.
- */
- closid = current->closid;
- if (closid == 0)
- closid = this_cpu_read(cpu_closid);
-
- if (closid != state->closid) {
- state->closid = closid;
- wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
- }
- }
-}
-
-#else
-
-static inline void intel_rdt_sched_in(void) {}
-
-#endif /* CONFIG_INTEL_RDT_A */
-#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
deleted file mode 100644
index b31081b89407..000000000000
--- a/arch/x86/include/asm/intel_rdt_common.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef _ASM_X86_INTEL_RDT_COMMON_H
-#define _ASM_X86_INTEL_RDT_COMMON_H
-
-#define MSR_IA32_PQR_ASSOC 0x0c8f
-
-/**
- * struct intel_pqr_state - State cache for the PQR MSR
- * @rmid: The cached Resource Monitoring ID
- * @closid: The cached Class Of Service ID
- * @rmid_usecnt: The usage counter for rmid
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct intel_pqr_state {
- u32 rmid;
- u32 closid;
- int rmid_usecnt;
-};
-
-DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
-
-#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
diff --git a/arch/x86/include/asm/intel_rdt_sched.h b/arch/x86/include/asm/intel_rdt_sched.h
new file mode 100644
index 000000000000..b4bbf8b21512
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt_sched.h
@@ -0,0 +1,92 @@
+#ifndef _ASM_X86_INTEL_RDT_SCHED_H
+#define _ASM_X86_INTEL_RDT_SCHED_H
+
+#ifdef CONFIG_INTEL_RDT
+
+#include <linux/sched.h>
+#include <linux/jump_label.h>
+
+#define IA32_PQR_ASSOC 0x0c8f
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @cur_rmid: The cached Resource Monitoring ID
+ * @cur_closid: The cached Class Of Service ID
+ * @default_rmid: The user assigned Resource Monitoring ID
+ * @default_closid: The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+ u32 cur_rmid;
+ u32 cur_closid;
+ u32 default_rmid;
+ u32 default_closid;
+};
+
+DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+
+/*
+ * __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - This will stay as no-op unless we are running on an Intel SKU
+ * which supports resource control or monitoring and we enable by
+ * mounting the resctrl file system.
+ * - Caches the per cpu CLOSid/RMID values and does the MSR write only
+ * when a task with a different CLOSid/RMID is scheduled in.
+ * - We allocate RMIDs/CLOSids globally in order to keep this as
+ * simple as possible.
+ * Must be called with preemption disabled.
+ */
+static void __intel_rdt_sched_in(void)
+{
+ struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 closid = state->default_closid;
+ u32 rmid = state->default_rmid;
+
+ /*
+ * If this task has a closid/rmid assigned, use it.
+ * Else use the closid/rmid assigned to this cpu.
+ */
+ if (static_branch_likely(&rdt_alloc_enable_key)) {
+ if (current->closid)
+ closid = current->closid;
+ }
+
+ if (static_branch_likely(&rdt_mon_enable_key)) {
+ if (current->rmid)
+ rmid = current->rmid;
+ }
+
+ if (closid != state->cur_closid || rmid != state->cur_rmid) {
+ state->cur_closid = closid;
+ state->cur_rmid = rmid;
+ wrmsr(IA32_PQR_ASSOC, rmid, closid);
+ }
+}
+
+static inline void intel_rdt_sched_in(void)
+{
+ if (static_branch_likely(&rdt_enable_key))
+ __intel_rdt_sched_in();
+}
+
+#else
+
+static inline void intel_rdt_sched_in(void) {}
+
+#endif /* CONFIG_INTEL_RDT */
+
+#endif /* _ASM_X86_INTEL_RDT_SCHED_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 48febf07e828..c40a95c33bb8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -69,6 +69,9 @@ build_mmio_write(__writeb, "b", unsigned char, "q", )
build_mmio_write(__writew, "w", unsigned short, "r", )
build_mmio_write(__writel, "l", unsigned int, "r", )
+#define readb readb
+#define readw readw
+#define readl readl
#define readb_relaxed(a) __readb(a)
#define readw_relaxed(a) __readw(a)
#define readl_relaxed(a) __readl(a)
@@ -76,6 +79,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define __raw_readw __readw
#define __raw_readl __readl
+#define writeb writeb
+#define writew writew
+#define writel writel
#define writeb_relaxed(v, a) __writeb(v, a)
#define writew_relaxed(v, a) __writew(v, a)
#define writel_relaxed(v, a) __writel(v, a)
@@ -88,13 +94,15 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#ifdef CONFIG_X86_64
build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
+build_mmio_read(__readq, "q", unsigned long, "=r", )
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
+build_mmio_write(__writeq, "q", unsigned long, "r", )
-#define readq_relaxed(a) readq(a)
-#define writeq_relaxed(v, a) writeq(v, a)
+#define readq_relaxed(a) __readq(a)
+#define writeq_relaxed(v, a) __writeq(v, a)
-#define __raw_readq(a) readq(a)
-#define __raw_writeq(val, addr) writeq(val, addr)
+#define __raw_readq __readq
+#define __raw_writeq __writeq
/* Let people know that we have them */
#define readq readq
@@ -119,6 +127,7 @@ static inline phys_addr_t virt_to_phys(volatile void *address)
{
return __pa(address);
}
+#define virt_to_phys virt_to_phys
/**
* phys_to_virt - map physical address to virtual
@@ -137,6 +146,7 @@ static inline void *phys_to_virt(phys_addr_t address)
{
return __va(address);
}
+#define phys_to_virt phys_to_virt
/*
* Change "struct page" to physical address.
@@ -169,11 +179,14 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
* else, you probably want one of the following.
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+#define ioremap_nocache ioremap_nocache
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+#define ioremap_cache ioremap_cache
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+#define ioremap_prot ioremap_prot
/**
* ioremap - map bus memory into CPU space
@@ -193,8 +206,10 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
{
return ioremap_nocache(offset, size);
}
+#define ioremap ioremap
extern void iounmap(volatile void __iomem *addr);
+#define iounmap iounmap
extern void set_iounmap_nonlazy(void);
@@ -203,53 +218,6 @@ extern void set_iounmap_nonlazy(void);
#include <asm-generic/iomap.h>
/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
-/**
- * memset_io Set a range of I/O memory to a constant value
- * @addr: The beginning of the I/O-memory range to set
- * @val: The value to set the memory to
- * @count: The number of bytes to set
- *
- * Set a range of I/O memory to a given value.
- */
-static inline void
-memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
-{
- memset((void __force *)addr, val, count);
-}
-
-/**
- * memcpy_fromio Copy a block of data from I/O memory
- * @dst: The (RAM) destination for the copy
- * @src: The (I/O memory) source for the data
- * @count: The number of bytes to copy
- *
- * Copy a block of data from I/O memory.
- */
-static inline void
-memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
-{
- memcpy(dst, (const void __force *)src, count);
-}
-
-/**
- * memcpy_toio Copy a block of data into I/O memory
- * @dst: The (I/O memory) destination for the copy
- * @src: The (RAM) source for the data
- * @count: The number of bytes to copy
- *
- * Copy a block of data to I/O memory.
- */
-static inline void
-memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
-{
- memcpy((void __force *)dst, src, count);
-}
-
-/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
* to PAGE_OFFSET is pure coincidence - it does not mean ISA values
@@ -341,13 +309,38 @@ BUILDIO(b, b, char)
BUILDIO(w, w, short)
BUILDIO(l, , int)
+#define inb inb
+#define inw inw
+#define inl inl
+#define inb_p inb_p
+#define inw_p inw_p
+#define inl_p inl_p
+#define insb insb
+#define insw insw
+#define insl insl
+
+#define outb outb
+#define outw outw
+#define outl outl
+#define outb_p outb_p
+#define outw_p outw_p
+#define outl_p outl_p
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
+
extern void *xlate_dev_mem_ptr(phys_addr_t phys);
extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+
extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum page_cache_mode pcm);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
+#define ioremap_wc ioremap_wc
extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
+#define ioremap_wt ioremap_wt
extern bool is_early_ioremap_ptep(pte_t *ptep);
@@ -365,6 +358,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
#define IO_SPACE_LIMIT 0xffff
+#include <asm-generic/io.h>
+#undef PCI_IOBASE
+
#ifdef CONFIG_MTRR
extern int __must_check arch_phys_wc_index(int handle);
#define arch_phys_wc_index arch_phys_wc_index
@@ -381,4 +377,12 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
#endif
+extern bool arch_memremap_can_ram_remap(resource_size_t offset,
+ unsigned long size,
+ unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
+
+extern bool phys_mem_access_encrypted(unsigned long phys_addr,
+ unsigned long size);
+
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 668cca540025..9958ceea2fa3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -42,10 +42,6 @@ extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible unsigned int do_IRQ(struct pt_regs *regs);
-/* Interrupt vector management */
-extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
-extern int vector_used_by_percpu_irq(unsigned int vector);
-
extern void init_ISA_irqs(void);
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index f70604125286..ddbb8ea0f5a9 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -3,9 +3,17 @@
#include <asm/cpufeature.h>
+#ifdef CONFIG_X86_LOCAL_APIC
static inline bool arch_irq_work_has_interrupt(void)
{
return boot_cpu_has(X86_FEATURE_APIC);
}
+extern void arch_irq_work_raise(void);
+#else
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return false;
+}
+#endif
#endif /* _ASM_IRQ_WORK_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205489f0..942c1f444da8 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -147,7 +147,8 @@ unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
unsigned long start_address,
- unsigned int preserve_context);
+ unsigned int preserve_context,
+ unsigned int sme_active);
#endif
#define ARCH_HAS_KIMAGE_ARCH
@@ -207,6 +208,14 @@ struct kexec_entry64_regs {
uint64_t r15;
uint64_t rip;
};
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+ gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
#endif
typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index fde36f189836..fa2558e12024 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -219,8 +219,8 @@ struct x86_emulate_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+ bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx, bool check_limit);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fba6d8e..8844eee290b2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,15 +79,14 @@
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
#define CR3_PCID_INVD BIT_64(63)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
- | X86_CR4_PKE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
+ | X86_CR4_SMAP | X86_CR4_PKE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -204,7 +203,6 @@ enum {
#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
- PFERR_USER_MASK | \
PFERR_WRITE_MASK | \
PFERR_PRESENT_MASK)
@@ -317,15 +315,17 @@ struct kvm_pio_request {
int size;
};
+#define PT64_ROOT_MAX_LEVEL 5
+
struct rsvd_bits_validate {
- u64 rsvd_bits_mask[2][4];
+ u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
u64 bad_mt_xwr;
};
/*
- * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
- * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
- * mode.
+ * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
+ * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
+ * current mmu mode.
*/
struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
@@ -492,6 +492,7 @@ struct kvm_vcpu_arch {
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
+ u32 pkru;
u32 hflags;
u64 efer;
u64 apic_base;
@@ -547,8 +548,8 @@ struct kvm_vcpu_arch {
struct kvm_queued_exception {
bool pending;
+ bool injected;
bool has_error_code;
- bool reinject;
u8 nr;
u32 error_code;
u8 nested_apf;
@@ -686,8 +687,12 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;
- /* GPA available (AMD only) */
+ /* GPA available */
bool gpa_available;
+ gpa_t gpa_val;
+
+ /* be preempted when it's in kernel-mode(cpl=0) */
+ bool preempted_in_kernel;
};
struct kvm_lpage_info {
@@ -978,7 +983,7 @@ struct kvm_x86_ops {
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
- int (*get_tdp_level)(void);
+ int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
@@ -1078,7 +1083,7 @@ void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- u64 acc_track_mask);
+ u64 acc_track_mask, u64 me_mask);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -1296,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
-static inline u64 get_canonical(u64 la)
-{
- return ((int64_t)la << 16) >> 16;
-}
-
-static inline bool is_noncanonical_address(u64 la)
-{
-#ifdef CONFIG_X86_64
- return get_canonical(la) != la;
-#else
- return false;
-#endif
-}
-
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
@@ -1374,8 +1365,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
-void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
deleted file mode 100644
index 73d0c9b92087..000000000000
--- a/arch/x86/include/asm/lguest.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _ASM_X86_LGUEST_H
-#define _ASM_X86_LGUEST_H
-
-#define GDT_ENTRY_LGUEST_CS 10
-#define GDT_ENTRY_LGUEST_DS 11
-#define LGUEST_CS (GDT_ENTRY_LGUEST_CS * 8)
-#define LGUEST_DS (GDT_ENTRY_LGUEST_DS * 8)
-
-#ifndef __ASSEMBLY__
-#include <asm/desc.h>
-
-#define GUEST_PL 1
-
-/* Page for Switcher text itself, then two pages per cpu */
-#define SWITCHER_TEXT_PAGES (1)
-#define SWITCHER_STACK_PAGES (2 * nr_cpu_ids)
-#define TOTAL_SWITCHER_PAGES (SWITCHER_TEXT_PAGES + SWITCHER_STACK_PAGES)
-
-/* Where we map the Switcher, in both Host and Guest. */
-extern unsigned long switcher_addr;
-
-/* Found in switcher.S */
-extern unsigned long default_idt_entries[];
-
-/* Declarations for definitions in arch/x86/lguest/head_32.S */
-extern char lguest_noirq_iret[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_pushf[], lgend_pushf[];
-
-extern void lguest_iret(void);
-extern void lguest_init(void);
-
-struct lguest_regs {
- /* Manually saved part. */
- unsigned long eax, ebx, ecx, edx;
- unsigned long esi, edi, ebp;
- unsigned long gs;
- unsigned long fs, ds, es;
- unsigned long trapnum, errcode;
- /* Trap pushed part */
- unsigned long eip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long esp;
- unsigned long ss;
-};
-
-/* This is a guest-specific page (mapped ro) into the guest. */
-struct lguest_ro_state {
- /* Host information we need to restore when we switch back. */
- u32 host_cr3;
- struct desc_ptr host_idt_desc;
- struct desc_ptr host_gdt_desc;
- u32 host_sp;
-
- /* Fields which are used when guest is running. */
- struct desc_ptr guest_idt_desc;
- struct desc_ptr guest_gdt_desc;
- struct x86_hw_tss guest_tss;
- struct desc_struct guest_idt[IDT_ENTRIES];
- struct desc_struct guest_gdt[GDT_ENTRIES];
-};
-
-struct lg_cpu_arch {
- /* The GDT entries copied into lguest_ro_state when running. */
- struct desc_struct gdt[GDT_ENTRIES];
-
- /* The IDT entries: some copied into lguest_ro_state when running. */
- struct desc_struct idt[IDT_ENTRIES];
-
- /* The address of the last guest-visible pagefault (ie. cr2). */
- unsigned long last_pagefault;
-};
-
-static inline void lguest_set_ts(void)
-{
- u32 cr0;
-
- cr0 = read_cr0();
- if (!(cr0 & 8))
- write_cr0(cr0 | 8);
-}
-
-/* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT \
- ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff))
-#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff))
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_LGUEST_H */
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
deleted file mode 100644
index 6c119cfae218..000000000000
--- a/arch/x86/include/asm/lguest_hcall.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Architecture specific portion of the lguest hypercalls */
-#ifndef _ASM_X86_LGUEST_HCALL_H
-#define _ASM_X86_LGUEST_HCALL_H
-
-#define LHCALL_FLUSH_ASYNC 0
-#define LHCALL_LGUEST_INIT 1
-#define LHCALL_SHUTDOWN 2
-#define LHCALL_NEW_PGTABLE 4
-#define LHCALL_FLUSH_TLB 5
-#define LHCALL_LOAD_IDT_ENTRY 6
-#define LHCALL_SET_STACK 7
-#define LHCALL_SET_CLOCKEVENT 9
-#define LHCALL_HALT 10
-#define LHCALL_SET_PMD 13
-#define LHCALL_SET_PTE 14
-#define LHCALL_SET_PGD 15
-#define LHCALL_LOAD_TLS 16
-#define LHCALL_LOAD_GDT_ENTRY 18
-#define LHCALL_SEND_INTERRUPTS 19
-
-#define LGUEST_TRAP_ENTRY 0x1F
-
-/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */
-#define LGUEST_SHUTDOWN_POWEROFF 1
-#define LGUEST_SHUTDOWN_RESTART 2
-
-#ifndef __ASSEMBLY__
-#include <asm/hw_irq.h>
-
-/*G:030
- * But first, how does our Guest contact the Host to ask for privileged
- * operations? There are two ways: the direct way is to make a "hypercall",
- * to make requests of the Host Itself.
- *
- * Our hypercall mechanism uses the highest unused trap code (traps 32 and
- * above are used by real hardware interrupts). Seventeen hypercalls are
- * available: the hypercall number is put in the %eax register, and the
- * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
- * If a return value makes sense, it's returned in %eax.
- *
- * Grossly invalid calls result in Sudden Death at the hands of the vengeful
- * Host, rather than returning failure. This reflects Winston Churchill's
- * definition of a gentleman: "someone who is only rude intentionally".
- */
-static inline unsigned long
-hcall(unsigned long call,
- unsigned long arg1, unsigned long arg2, unsigned long arg3,
- unsigned long arg4)
-{
- /* "int" is the Intel instruction to trigger a trap. */
- asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
- /* The call in %eax (aka "a") might be overwritten */
- : "=a"(call)
- /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
- : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
- /* "memory" means this might write somewhere in memory.
- * This isn't true for all calls, but it's safe to tell
- * gcc that it might happen so it doesn't get clever. */
- : "memory");
- return call;
-}
-/*:*/
-
-/* Can't use our min() macro here: needs to be a constant */
-#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
-
-#define LHCALL_RING_SIZE 64
-struct hcall_args {
- /* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */
- unsigned long arg0, arg1, arg2, arg3, arg4;
-};
-
-#endif /* !__ASSEMBLY__ */
-#endif /* _ASM_X86_LGUEST_HCALL_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..8e618fcf1f7c
--- /dev/null
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -0,0 +1,80 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __X86_MEM_ENCRYPT_H__
+#define __X86_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#include <linux/init.h>
+
+#include <asm/bootparam.h>
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern unsigned long sme_me_mask;
+
+void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
+ unsigned long decrypted_kernel_vaddr,
+ unsigned long kernel_len,
+ unsigned long encryption_wa,
+ unsigned long encryption_pgd);
+
+void __init sme_early_encrypt(resource_size_t paddr,
+ unsigned long size);
+void __init sme_early_decrypt(resource_size_t paddr,
+ unsigned long size);
+
+void __init sme_map_bootdata(char *real_mode_data);
+void __init sme_unmap_bootdata(char *real_mode_data);
+
+void __init sme_early_init(void);
+
+void __init sme_encrypt_kernel(void);
+void __init sme_enable(struct boot_params *bp);
+
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void);
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
+
+#else /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define sme_me_mask 0UL
+
+static inline void __init sme_early_encrypt(resource_size_t paddr,
+ unsigned long size) { }
+static inline void __init sme_early_decrypt(resource_size_t paddr,
+ unsigned long size) { }
+
+static inline void __init sme_map_bootdata(char *real_mode_data) { }
+static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
+
+static inline void __init sme_early_init(void) { }
+
+static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_enable(struct boot_params *bp) { }
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+/*
+ * The __sme_pa() and __sme_pa_nodebug() macros are meant for use when
+ * writing to or comparing values from the cr3 register. Having the
+ * encryption mask set in cr3 enables the PGD entry to be encrypted and
+ * avoid special case handling of PGD allocations.
+ */
+#define __sme_pa(x) (__pa(x) | sme_me_mask)
+#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 79b647a7ebd0..bb8c597c2248 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,12 +3,28 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
+#include <linux/atomic.h>
/*
- * The x86 doesn't have a mmu context, but
- * we put the segment information here.
+ * x86 has arch-specific MMU state beyond what lives in mm_struct.
*/
typedef struct {
+ /*
+ * ctx_id uniquely identifies this mm_struct. A ctx_id will never
+ * be reused, and zero is not a valid ctx_id.
+ */
+ u64 ctx_id;
+
+ /*
+ * Any code that needs to do any sort of TLB flushing for this
+ * mm will first make its changes to the page tables, then
+ * increment tlb_gen, then flush. This lets the low-level
+ * flushing code keep track of what needs flushing.
+ *
+ * This is not used on Xen PV.
+ */
+ atomic64_t tlb_gen;
+
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
#endif
@@ -37,6 +53,11 @@ typedef struct {
#endif
} mm_context_t;
+#define INIT_MM_CONTEXT(mm) \
+ .context = { \
+ .ctx_id = 1, \
+ }
+
void leave_mm(int cpu);
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907d7d4c..7ae318c340d9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -12,6 +12,9 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/mpx.h>
+
+extern atomic64_t last_mm_ctx_id;
+
#ifndef CONFIG_PARAVIRT
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
@@ -125,13 +128,18 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+ int cpu = smp_processor_id();
+
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+ cpumask_clear_cpu(cpu, mm_cpumask(mm));
}
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
+ atomic64_set(&mm->context.tlb_gen, 0);
+
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */
@@ -140,9 +148,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
- init_new_context_ldt(tsk, mm);
-
- return 0;
+ return init_new_context_ldt(tsk, mm);
}
static inline void destroy_context(struct mm_struct *mm)
{
@@ -292,6 +298,9 @@ static inline unsigned long __get_current_cr3_fast(void)
{
unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index e3b7819caeef..9eb7c718aaf8 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -2,6 +2,15 @@
#define _ASM_X86_MODULE_H
#include <asm-generic/module.h>
+#include <asm/orc_types.h>
+
+struct mod_arch_specific {
+#ifdef CONFIG_ORC_UNWINDER
+ unsigned int num_orcs;
+ int *orc_unwind_ip;
+ struct orc_entry *orc_unwind;
+#endif
+};
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a0d662be4c5b..7d7404756bb4 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -73,6 +73,9 @@ static inline void mpx_mm_init(struct mm_struct *mm)
}
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
+
+unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
+ unsigned long flags);
#else
static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
{
@@ -94,6 +97,12 @@ static inline void mpx_notify_unmap(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
+
+static inline unsigned long mpx_unmapped_area_check(unsigned long addr,
+ unsigned long len, unsigned long flags)
+{
+ return addr;
+}
#endif /* CONFIG_X86_INTEL_MPX */
#endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 2b58c8c1eeaa..63cc96f064dc 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -3,6 +3,8 @@
#include <linux/types.h>
#include <linux/atomic.h>
+#include <linux/nmi.h>
+#include <asm/io.h>
#include <asm/hyperv.h>
/*
@@ -28,6 +30,8 @@ struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
+ u32 max_vp_index;
+ u32 max_lp_index;
};
extern struct ms_hyperv_info ms_hyperv;
@@ -168,12 +172,155 @@ void hv_remove_crash_handler(void);
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
+extern void *hv_hypercall_pg;
+
+static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
+{
+ u64 input_address = input ? virt_to_phys(input) : 0;
+ u64 output_address = output ? virt_to_phys(output) : 0;
+ u64 hv_status;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("mov %4, %%r8\n"
+ "call *%5"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address), "m" (hv_hypercall_pg)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
+ u32 input_address_hi = upper_32_bits(input_address);
+ u32 input_address_lo = lower_32_bits(input_address);
+ u32 output_address_hi = upper_32_bits(output_address);
+ u32 output_address_lo = lower_32_bits(output_address);
+
+ if (!hv_hypercall_pg)
+ return U64_MAX;
+
+ __asm__ __volatile__("call *%7"
+ : "=A" (hv_status),
+ "+c" (input_address_lo), "+r" (__sp)
+ : "A" (control),
+ "b" (input_address_hi),
+ "D"(output_address_hi), "S"(output_address_lo),
+ "m" (hv_hypercall_pg)
+ : "cc", "memory");
+#endif /* !x86_64 */
+ return hv_status;
+}
+
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
+/* Fast hypercall with 8 bytes of input and no output */
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ register void *__sp asm(_ASM_SP);
+
+#ifdef CONFIG_X86_64
+ {
+ __asm__ __volatile__("call *%4"
+ : "=a" (hv_status), "+r" (__sp),
+ "+c" (control), "+d" (input1)
+ : "m" (hv_hypercall_pg)
+ : "cc", "r8", "r9", "r10", "r11");
+ }
+#else
+ {
+ u32 input1_hi = upper_32_bits(input1);
+ u32 input1_lo = lower_32_bits(input1);
+
+ __asm__ __volatile__ ("call *%5"
+ : "=A"(hv_status),
+ "+c"(input1_lo),
+ "+r"(__sp)
+ : "A" (control),
+ "b" (input1_hi),
+ "m" (hv_hypercall_pg)
+ : "cc", "edi", "esi");
+ }
+#endif
+ return hv_status;
+}
+
+/*
+ * Rep hypercalls. Callers of this functions are supposed to ensure that
+ * rep_count and varhead_size comply with Hyper-V hypercall definition.
+ */
+static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
+ void *input, void *output)
+{
+ u64 control = code;
+ u64 status;
+ u16 rep_comp;
+
+ control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
+ control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
+
+ do {
+ status = hv_do_hypercall(control, input, output);
+ if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
+ return status;
+
+ /* Bits 32-43 of status have 'Reps completed' data. */
+ rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
+ HV_HYPERCALL_REP_COMP_OFFSET;
+
+ control &= ~HV_HYPERCALL_REP_START_MASK;
+ control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
+
+ touch_nmi_watchdog();
+ } while (rep_comp < rep_count);
+
+ return status;
+}
+
+/*
+ * Hypervisor's notion of virtual processor ID is different from
+ * Linux' notion of CPU ID. This information can only be retrieved
+ * in the context of the calling CPU. Setup a map for easy access
+ * to this information.
+ */
+extern u32 *hv_vp_index;
+
+/**
+ * hv_cpu_number_to_vp_number() - Map CPU to VP.
+ * @cpu_number: CPU number in Linux terms
+ *
+ * This function returns the mapping between the Linux processor
+ * number and the hypervisor's virtual processor number, useful
+ * in making hypercalls and such that talk about specific
+ * processors.
+ *
+ * Return: Virtual processor number in Hyper-V terms
+ */
+static inline int hv_cpu_number_to_vp_number(int cpu_number)
+{
+ return hv_vp_index[cpu_number];
+}
void hyperv_init(void);
+void hyperv_setup_mmu_ops(void);
+void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void);
-#endif
+#else /* CONFIG_HYPERV */
+static inline void hyperv_init(void) {}
+static inline bool hv_is_hypercall_page_setup(void) { return false; }
+static inline void hyperv_cleanup(void) {}
+static inline void hyperv_setup_mmu_ops(void) {}
+#endif /* CONFIG_HYPERV */
+
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5573c75f8e4c..17f5c12e1afd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -356,6 +356,8 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
new file mode 100644
index 000000000000..91c8d868424d
--- /dev/null
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ORC_LOOKUP_H
+#define _ORC_LOOKUP_H
+
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range. An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER 8
+#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER)
+
+#ifndef LINKER_SCRIPT
+
+extern unsigned int orc_lookup[];
+extern unsigned int orc_lookup_end[];
+
+#define LOOKUP_START_IP (unsigned long)_stext
+#define LOOKUP_STOP_IP (unsigned long)_etext
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* _ORC_LOOKUP_H */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
new file mode 100644
index 000000000000..9c9dc579bd7d
--- /dev/null
+++ b/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED 0
+#define ORC_REG_PREV_SP 1
+#define ORC_REG_DX 2
+#define ORC_REG_DI 3
+#define ORC_REG_BP 4
+#define ORC_REG_SP 5
+#define ORC_REG_R10 6
+#define ORC_REG_R13 7
+#define ORC_REG_BP_INDIRECT 8
+#define ORC_REG_SP_INDIRECT 9
+#define ORC_REG_MAX 15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL 0
+#define ORC_TYPE_REGS 1
+#define ORC_TYPE_REGS_IRET 2
+#define UNWIND_HINT_TYPE_SAVE 3
+#define UNWIND_HINT_TYPE_RESTORE 4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard. It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address. Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+ s16 sp_offset;
+ s16 bp_offset;
+ unsigned sp_reg:4;
+ unsigned bp_reg:4;
+ unsigned type:2;
+} __packed;
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index b4a0d43248cf..b50df06ad251 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -51,6 +51,10 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
+#ifdef CONFIG_X86_MCE
+#define arch_unmap_kpfn arch_unmap_kpfn
+#endif
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 7bd0099384ca..b98ed9d14630 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -3,6 +3,7 @@
#include <linux/const.h>
#include <linux/types.h>
+#include <linux/mem_encrypt.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@@ -15,7 +16,7 @@
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
-#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
+#define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1)))
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9ccac1926587..c25dd22f7c70 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -960,11 +960,6 @@ extern void default_banner(void);
#define GET_CR2_INTO_RAX \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
- CLBR_NONE, \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
-
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9ffc36bfe4cd..6b64fc6367f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -196,9 +196,6 @@ struct pv_irq_ops {
void (*safe_halt)(void);
void (*halt)(void);
-#ifdef CONFIG_X86_64
- void (*adjust_exception_frame)(void);
-#endif
} __no_randomize_layout;
struct pv_mmu_ops {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 77037b6f1caa..5b4c44d419c5 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_PGTABLE_H
#define _ASM_X86_PGTABLE_H
+#include <linux/mem_encrypt.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
@@ -13,9 +14,18 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
+/*
+ * Macros to add or remove encryption attribute
+ */
+#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot)))
+#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
+
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_checkwx(void);
@@ -38,6 +48,8 @@ extern struct list_head pgd_list;
extern struct mm_struct *pgd_page_get_mm(struct page *page);
+extern pmdval_t early_pmd_flags;
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else /* !CONFIG_PARAVIRT */
@@ -195,6 +207,11 @@ static inline unsigned long p4d_pfn(p4d_t p4d)
return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
}
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+ return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
static inline int p4d_large(p4d_t p4d)
{
/* No 512 GiB pages yet */
@@ -704,8 +721,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pmd_page(pmd) \
- pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -773,8 +789,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pud_page(pud) \
- pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
@@ -824,8 +839,7 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define p4d_page(p4d) \
- pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
/* Find an entry in the third-level page table.. */
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
@@ -859,7 +873,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
@@ -1158,6 +1172,23 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+#endif
#endif
#define PKRU_AD_BIT 0x1
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2160c1fee920..972a4698c530 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -180,15 +180,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
/*
* Encode and de-code a swap entry
*
- * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
- * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number
+ * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
* there. We also need to avoid using A and D because of an
* erratum where they can be incorrectly set by hardware on
* non-present PTEs.
+ *
+ * SD (1) in swp entry is used to store soft dirty bit, which helps us
+ * remember soft dirty over page migration
+ *
+ * Bit 7 in swp entry should be 0 because pmd_present checks not only P,
+ * but also L and G.
*/
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
#define SWP_TYPE_BITS 5
@@ -204,7 +210,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
((type) << (SWP_TYPE_FIRST_BIT)) \
| ((offset) << SWP_OFFSET_FIRST_BIT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
+#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
extern int kern_addr_valid(unsigned long addr);
extern void cleanup_highmap(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index bf9638e1ee42..f1492473f10e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -2,6 +2,8 @@
#define _ASM_X86_PGTABLE_DEFS_H
#include <linux/const.h>
+#include <linux/mem_encrypt.h>
+
#include <asm/page_types.h>
#define FIRST_USER_ADDRESS 0UL
@@ -97,15 +99,15 @@
/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
- * with swap entry format. On x86 bits 6 and 7 are *not* involved
- * into swap entry computation, but bit 6 is used for nonlinear
- * file mapping, so we borrow bit 7 for soft dirty tracking.
+ * with swap entry format. On x86 bits 1-4 are *not* involved
+ * into swap entry computation, but bit 7 is used for thp migration,
+ * so we borrow bit 1 for soft dirty tracking.
*
* Please note that this bit must be treated as swap dirty page
- * mark if and only if the PTE has present bit clear!
+ * mark if and only if the PTE/PMD has present bit clear!
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
+#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW
#else
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
@@ -121,10 +123,10 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
- _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
- _PAGE_DIRTY)
+#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
/*
* Set of bits not changed in pte_modify. The pte's
@@ -159,6 +161,7 @@ enum page_cache_mode {
#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
+#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
@@ -187,22 +190,42 @@ enum page_cache_mode {
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP)
#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
-#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR)
+#ifndef __ASSEMBLY__
+
+#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
+ _PAGE_DIRTY | _PAGE_ENC)
+
+#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
+#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
+
+#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
+#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
+
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC)
+#define PAGE_KERNEL_NOENC __pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
+#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+
+#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
-#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#endif /* __ASSEMBLY__ */
/* xwr */
#define __P000 PAGE_NONE
@@ -287,6 +310,11 @@ static inline p4dval_t native_p4d_val(p4d_t p4d)
#else
#include <asm-generic/pgtable-nop4d.h>
+static inline p4d_t native_make_p4d(pudval_t val)
+{
+ return (p4d_t) { .pgd = native_make_pgd((pgdval_t)val) };
+}
+
static inline p4dval_t native_p4d_val(p4d_t p4d)
{
return native_pgd_val(p4d.pgd);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 79aa2f98398d..dc723b64acf0 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PROCESSOR_FLAGS_H
#include <uapi/asm/processor-flags.h>
+#include <linux/mem_encrypt.h>
#ifdef CONFIG_VM86
#define X86_VM_MASK X86_EFLAGS_VM
@@ -32,16 +33,18 @@
* CR3_ADDR_MASK is the mask used by read_cr3_pa().
*/
#ifdef CONFIG_X86_64
-/* Mask off the address space ID bits. */
-#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
-#define CR3_PCID_MASK 0xFFFull
+/* Mask off the address space ID and SME encryption bits. */
+#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_PCID_MASK 0xFFFull
+#define CR3_NOFLUSH BIT_ULL(63)
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
* a tiny bit of code size by setting all the bits.
*/
-#define CR3_ADDR_MASK 0xFFFFFFFFull
-#define CR3_PCID_MASK 0ull
+#define CR3_ADDR_MASK 0xFFFFFFFFull
+#define CR3_PCID_MASK 0ull
+#define CR3_NOFLUSH 0
#endif
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 028245e1c42b..3fa26a61eabc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -22,6 +22,7 @@ struct vm86;
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
+#include <asm/unwind_hints.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -29,6 +30,7 @@ struct vm86;
#include <linux/math64.h>
#include <linux/err.h>
#include <linux/irqflags.h>
+#include <linux/mem_encrypt.h>
/*
* We handle most unaligned accesses in hardware. On the other hand
@@ -239,9 +241,14 @@ static inline unsigned long read_cr3_pa(void)
return __read_cr3() & CR3_ADDR_MASK;
}
+static inline unsigned long native_read_cr3_pa(void)
+{
+ return __native_read_cr3() & CR3_ADDR_MASK;
+}
+
static inline void load_cr3(pgd_t *pgdir)
{
- write_cr3(__pa(pgdir));
+ write_cr3(__sme_pa(pgdir));
}
#ifdef CONFIG_X86_32
@@ -661,7 +668,7 @@ static inline void sync_core(void)
* In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an
* unconditional jump. That sequence also works on all CPUs,
- * but it will fault at CPL3 (i.e. Xen PV and lguest).
+ * but it will fault at CPL3 (i.e. Xen PV).
*
* CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a
@@ -684,6 +691,7 @@ static inline void sync_core(void)
unsigned int tmp;
asm volatile (
+ UNWIND_HINT_SAVE
"mov %%ss, %0\n\t"
"pushq %q0\n\t"
"pushq %%rsp\n\t"
@@ -693,6 +701,7 @@ static inline void sync_core(void)
"pushq %q0\n\t"
"pushq $1f\n\t"
"iretq\n\t"
+ UNWIND_HINT_RESTORE
"1:"
: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
#endif
@@ -802,7 +811,9 @@ static inline void spin_lock_prefetch(const void *x)
*/
#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
+#define TASK_SIZE_LOW TASK_SIZE
#define TASK_SIZE_MAX TASK_SIZE
+#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
@@ -842,7 +853,9 @@ static inline void spin_lock_prefetch(const void *x)
* particular problem by preventing anything from being mapped
* at the maximum canonical address.
*/
-#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
+
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
@@ -850,12 +863,14 @@ static inline void spin_lock_prefetch(const void *x)
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
0xc0000000 : 0xFFFFe000)
+#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
+ IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define STACK_TOP TASK_SIZE
+#define STACK_TOP TASK_SIZE_LOW
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
@@ -876,7 +891,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
* space during mmap's.
*/
#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
-#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 8d3964fc5f91..b408b1886195 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+void xen_entry_INT80_compat(void);
+#endif
#endif
void x86_configure_nx(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 2b5d686ea9f3..91c04c8e67fa 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -9,6 +9,20 @@
#ifdef __i386__
struct pt_regs {
+ /*
+ * NB: 32-bit x86 CPUs are inconsistent as what happens in the
+ * following cases (where %seg represents a segment register):
+ *
+ * - pushl %seg: some do a 16-bit write and leave the high
+ * bits alone
+ * - movl %seg, [mem]: some do a 16-bit write despite the movl
+ * - IDT entry: some (e.g. 486) will leave the high bits of CS
+ * and (if applicable) SS undefined.
+ *
+ * Fortunately, x86-32 doesn't read the high bits on POP or IRET,
+ * so we can just treat all of the segment registers as 16-bit
+ * values.
+ */
unsigned long bx;
unsigned long cx;
unsigned long dx;
@@ -16,16 +30,22 @@ struct pt_regs {
unsigned long di;
unsigned long bp;
unsigned long ax;
- unsigned long ds;
- unsigned long es;
- unsigned long fs;
- unsigned long gs;
+ unsigned short ds;
+ unsigned short __dsh;
+ unsigned short es;
+ unsigned short __esh;
+ unsigned short fs;
+ unsigned short __fsh;
+ unsigned short gs;
+ unsigned short __gsh;
unsigned long orig_ax;
unsigned long ip;
- unsigned long cs;
+ unsigned short cs;
+ unsigned short __csh;
unsigned long flags;
unsigned long sp;
- unsigned long ss;
+ unsigned short ss;
+ unsigned short __ssh;
};
#else /* __i386__ */
@@ -176,6 +196,17 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (offset == offsetof(struct pt_regs, sp) &&
regs->cs == __KERNEL_CS)
return kernel_stack_pointer(regs);
+
+ /* The selector fields are 16-bit. */
+ if (offset == offsetof(struct pt_regs, cs) ||
+ offset == offsetof(struct pt_regs, ss) ||
+ offset == offsetof(struct pt_regs, ds) ||
+ offset == offsetof(struct pt_regs, es) ||
+ offset == offsetof(struct pt_regs, fs) ||
+ offset == offsetof(struct pt_regs, gs)) {
+ return *(u16 *)((unsigned long)regs + offset);
+
+ }
#endif
return *(unsigned long *)((unsigned long)regs + offset);
}
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 230e1903acf0..90d91520c13a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -1,6 +1,15 @@
#ifndef _ARCH_X86_REALMODE_H
#define _ARCH_X86_REALMODE_H
+/*
+ * Flag bit definitions for use with the flags field of the trampoline header
+ * in the CONFIG_X86_64 variant.
+ */
+#define TH_FLAGS_SME_ACTIVE_BIT 0
+#define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT)
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <asm/io.h>
@@ -38,6 +47,7 @@ struct trampoline_header {
u64 start;
u64 efer;
u32 cr4;
+ u32 flags;
#endif
};
@@ -69,4 +79,6 @@ static inline size_t real_mode_size_needed(void)
void set_real_mode_mem(phys_addr_t mem, size_t size);
void reserve_real_mode(void);
+#endif /* __ASSEMBLY__ */
+
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
new file mode 100644
index 000000000000..ff871210b9f2
--- /dev/null
+++ b/arch/x86/include/asm/refcount.h
@@ -0,0 +1,109 @@
+#ifndef __ASM_X86_REFCOUNT_H
+#define __ASM_X86_REFCOUNT_H
+/*
+ * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
+ * PaX/grsecurity.
+ */
+#include <linux/refcount.h>
+
+/*
+ * This is the first portion of the refcount error handling, which lives in
+ * .text.unlikely, and is jumped to from the CPU flag check (in the
+ * following macros). This saves the refcount value location into CX for
+ * the exception handler to use (in mm/extable.c), and then triggers the
+ * central refcount exception. The fixup address for the exception points
+ * back to the regular execution flow in .text.
+ */
+#define _REFCOUNT_EXCEPTION \
+ ".pushsection .text.unlikely\n" \
+ "111:\tlea %[counter], %%" _ASM_CX "\n" \
+ "112:\t" ASM_UD0 "\n" \
+ ASM_UNREACHABLE \
+ ".popsection\n" \
+ "113:\n" \
+ _ASM_EXTABLE_REFCOUNT(112b, 113b)
+
+/* Trigger refcount exception if refcount result is negative. */
+#define REFCOUNT_CHECK_LT_ZERO \
+ "js 111f\n\t" \
+ _REFCOUNT_EXCEPTION
+
+/* Trigger refcount exception if refcount result is zero or negative. */
+#define REFCOUNT_CHECK_LE_ZERO \
+ "jz 111f\n\t" \
+ REFCOUNT_CHECK_LT_ZERO
+
+/* Trigger refcount exception unconditionally. */
+#define REFCOUNT_ERROR \
+ "jmp 111f\n\t" \
+ _REFCOUNT_EXCEPTION
+
+static __always_inline void refcount_add(unsigned int i, refcount_t *r)
+{
+ asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
+ REFCOUNT_CHECK_LT_ZERO
+ : [counter] "+m" (r->refs.counter)
+ : "ir" (i)
+ : "cc", "cx");
+}
+
+static __always_inline void refcount_inc(refcount_t *r)
+{
+ asm volatile(LOCK_PREFIX "incl %0\n\t"
+ REFCOUNT_CHECK_LT_ZERO
+ : [counter] "+m" (r->refs.counter)
+ : : "cc", "cx");
+}
+
+static __always_inline void refcount_dec(refcount_t *r)
+{
+ asm volatile(LOCK_PREFIX "decl %0\n\t"
+ REFCOUNT_CHECK_LE_ZERO
+ : [counter] "+m" (r->refs.counter)
+ : : "cc", "cx");
+}
+
+static __always_inline __must_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+ GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
+ r->refs.counter, "er", i, "%0", e);
+}
+
+static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+ GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
+ r->refs.counter, "%0", e);
+}
+
+static __always_inline __must_check
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+ int c, result;
+
+ c = atomic_read(&(r->refs));
+ do {
+ if (unlikely(c == 0))
+ return false;
+
+ result = c + i;
+
+ /* Did we try to increment from/to an undesirable state? */
+ if (unlikely(c < 0 || c == INT_MAX || result < c)) {
+ asm volatile(REFCOUNT_ERROR
+ : : [counter] "m" (r->refs.counter)
+ : "cc", "cx");
+ break;
+ }
+
+ } while (!atomic_try_cmpxchg(&(r->refs), &c, result));
+
+ return c != 0;
+}
+
+static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+ return refcount_add_not_zero(1, r);
+}
+
+#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 661dd305694a..045f99211a99 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,45 +1,56 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
+#define __CLOBBERS_MEM "memory"
+#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
+
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
/* Use asm goto */
-#define __GEN_RMWcc(fullop, var, cc, ...) \
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
- : : "m" (var), ## __VA_ARGS__ \
- : "memory" : cc_label); \
+ : : [counter] "m" (var), ## __VA_ARGS__ \
+ : clobbers : cc_label); \
return 0; \
cc_label: \
return 1; \
} while (0)
-#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc)
+#define __BINARY_RMWcc_ARG " %1, "
-#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
- __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
/* Use flags output or a set instruction */
-#define __GEN_RMWcc(fullop, var, cc, ...) \
+#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
bool c; \
asm volatile (fullop ";" CC_SET(cc) \
- : "+m" (var), CC_OUT(cc) (c) \
- : __VA_ARGS__ : "memory"); \
+ : [counter] "+m" (var), CC_OUT(cc) (c) \
+ : __VA_ARGS__ : clobbers); \
return c; \
} while (0)
+#define __BINARY_RMWcc_ARG " %2, "
+
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc)
+ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM_CC_CX)
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
- __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
+ __CLOBBERS_MEM, vcon (val))
-#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
+ __CLOBBERS_MEM_CC_CX, vcon (val))
#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 1549caa098f0..066aaf813141 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -238,9 +238,7 @@
#ifndef __ASSEMBLY__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
-#ifdef CONFIG_TRACING
-# define trace_early_idt_handler_array early_idt_handler_array
-#endif
+extern void early_ignore_irq(void);
/*
* Load a segment. Fall back on loading the zero segment if something goes
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index eaec6c364e42..cd71273ec49d 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -11,6 +11,7 @@
* Executability : eXeutable, NoteXecutable
* Read/Write : ReadOnly, ReadWrite
* Presence : NotPresent
+ * Encryption : Encrypted, Decrypted
*
* Within a category, the attributes are mutually exclusive.
*
@@ -42,6 +43,8 @@ int set_memory_wt(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
int set_memory_array_uc(unsigned long *addr, int addrinarray);
int set_memory_array_wc(unsigned long *addr, int addrinarray);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index e4585a393965..a65cf544686a 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -39,6 +39,7 @@ static inline void vsmp_init(void) { }
#endif
void setup_bios_corruption_check(void);
+void early_platform_quirks(void);
extern unsigned long saved_video_mode;
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index e9ee84873de5..e371e7229042 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -340,6 +340,30 @@ extern void *memset(void *, int, size_t);
#endif
#endif /* !CONFIG_FORTIFY_SOURCE */
+#define __HAVE_ARCH_MEMSET16
+static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
+{
+ int d0, d1;
+ asm volatile("rep\n\t"
+ "stosw"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (v), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
+#define __HAVE_ARCH_MEMSET32
+static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
+{
+ int d0, d1;
+ asm volatile("rep\n\t"
+ "stosl"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (v), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
/*
* find the first occurrence of byte 'c', or 1 past the area if none
*/
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 2a8c822de1fc..f372a70a523f 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -58,6 +58,42 @@ extern void *__memcpy(void *to, const void *from, size_t len);
void *memset(void *s, int c, size_t n);
void *__memset(void *s, int c, size_t n);
+#define __HAVE_ARCH_MEMSET16
+static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
+{
+ long d0, d1;
+ asm volatile("rep\n\t"
+ "stosw"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (v), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
+#define __HAVE_ARCH_MEMSET32
+static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
+{
+ long d0, d1;
+ asm volatile("rep\n\t"
+ "stosl"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (v), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
+#define __HAVE_ARCH_MEMSET64
+static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
+{
+ long d0, d1;
+ asm volatile("rep\n\t"
+ "stosq"
+ : "=&c" (d0), "=&D" (d1)
+ : "a" (v), "1" (s), "0" (n)
+ : "memory");
+ return s;
+}
+
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t count);
void *__memmove(void *dest, const void *src, size_t count);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 58fffe79e417..14835dd205a5 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_IRQ_SHIFT 8
#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
@@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
+#define V_GIF_ENABLE_SHIFT 25
+#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
+
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e00e1bd6e7b3..5161da1a0fa0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -98,6 +98,7 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
#define TIF_X32 30 /* 32-bit native x86-64 binary */
+#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -122,6 +123,7 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
+#define _TIF_FSCHECK (1 << TIF_FSCHECK)
/*
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
@@ -137,7 +139,8 @@ struct thread_info {
(_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
_TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \
_TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \
- _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT)
+ _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_FSCHECK)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index c7797307fc2b..79a4ca6a9606 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -15,4 +15,18 @@
#include <asm-generic/tlb.h>
+/*
+ * While x86 architecture in general requires an IPI to perform TLB
+ * shootdown, enablement code for several hypervisors overrides
+ * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
+ * a hypercall. To keep software pagetable walkers safe in this case we
+ * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment
+ * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * for more details.
+ */
+static inline void __tlb_remove_table(void *table)
+{
+ free_page_and_swap_cache(table);
+}
+
#endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 50ea3482e1d1..4893abf7f74f 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -57,6 +57,23 @@ static inline void invpcid_flush_all_nonglobals(void)
__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
}
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+ u64 new_tlb_gen;
+
+ /*
+ * Bump the generation count. This also serves as a full barrier
+ * that synchronizes with switch_mm(): callers are required to order
+ * their read of mm_cpumask after their writes to the paging
+ * structures.
+ */
+ smp_mb__before_atomic();
+ new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
+ smp_mb__after_atomic();
+
+ return new_tlb_gen;
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -65,6 +82,17 @@ static inline void invpcid_flush_all_nonglobals(void)
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#endif
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in
+ * two cache lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
+struct tlb_context {
+ u64 ctx_id;
+ u64 tlb_gen;
+};
+
struct tlb_state {
/*
* cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
@@ -73,13 +101,35 @@ struct tlb_state {
* mode even if we've already switched back to swapper_pg_dir.
*/
struct mm_struct *loaded_mm;
- int state;
+ u16 loaded_mm_asid;
+ u16 next_asid;
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
unsigned long cr4;
+
+ /*
+ * This is a list of all contexts that might exist in the TLB.
+ * There is one per ASID that we use, and the ASID (what the
+ * CPU calls PCID) is the index into ctxts.
+ *
+ * For each context, ctx_id indicates which mm the TLB's user
+ * entries came from. As an invariant, the TLB will never
+ * contain entries that are out-of-date as when that mm reached
+ * the tlb_gen in the list.
+ *
+ * To be clear, this means that it's legal for the TLB code to
+ * flush the TLB without updating tlb_gen. This can happen
+ * (for now, at least) due to paravirt remote flushes.
+ *
+ * NB: context 0 is a bit special, since it's also used by
+ * various bits of init code. This is fine -- code that
+ * isn't aware of PCID will end up harmlessly flushing
+ * context 0.
+ */
+ struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
@@ -148,6 +198,8 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
cr4_set_bits(mask);
}
+extern void initialize_tlbstate_and_flush(void);
+
static inline void __native_flush_tlb(void)
{
/*
@@ -207,6 +259,14 @@ static inline void __flush_tlb_all(void)
__flush_tlb_global();
else
__flush_tlb();
+
+ /*
+ * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ * we'd end up flushing kernel translations for the current ASID but
+ * we might fail to flush kernel translations for other cached ASIDs.
+ *
+ * To avoid this issue, we force PCID off if PGE is off.
+ */
}
static inline void __flush_tlb_one(unsigned long addr)
@@ -231,9 +291,26 @@ static inline void __flush_tlb_one(unsigned long addr)
* and page-granular flushes are available only on i486 and up.
*/
struct flush_tlb_info {
- struct mm_struct *mm;
- unsigned long start;
- unsigned long end;
+ /*
+ * We support several kinds of flushes.
+ *
+ * - Fully flush a single mm. .mm will be set, .end will be
+ * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
+ * which the IPI sender is trying to catch us up.
+ *
+ * - Partially flush a single mm. .mm will be set, .start and
+ * .end will indicate the range, and .new_tlb_gen will be set
+ * such that the changes between generation .new_tlb_gen-1 and
+ * .new_tlb_gen are entirely contained in the indicated range.
+ *
+ * - Fully flush all mms whose tlb_gens have been updated. .mm
+ * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
+ * will be zero.
+ */
+ struct mm_struct *mm;
+ unsigned long start;
+ unsigned long end;
+ u64 new_tlb_gen;
};
#define local_flush_tlb() __flush_tlb()
@@ -256,12 +333,10 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
-#define TLBSTATE_OK 1
-#define TLBSTATE_LAZY 2
-
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{
+ inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
}
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6358a85e2270..c1d2a9892352 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -75,12 +75,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
extern void setup_node_to_cpumask_map(void);
-/*
- * Returns the number of the node containing Node 'node'. This
- * architecture is flat, so it is a pretty simple function!
- */
-#define parent_node(node) (node)
-
#define pcibus_to_node(bus) __pcibus_to_node(bus)
extern int __node_distance(int, int);
diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h
new file mode 100644
index 000000000000..57c8da027d99
--- /dev/null
+++ b/arch/x86/include/asm/trace/common.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_TRACE_COMMON_H
+#define _ASM_TRACE_COMMON_H
+
+#ifdef CONFIG_TRACING
+DECLARE_STATIC_KEY_FALSE(trace_pagefault_key);
+#define trace_pagefault_enabled() \
+ static_branch_unlikely(&trace_pagefault_key)
+DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key);
+#define trace_resched_ipi_enabled() \
+ static_branch_unlikely(&trace_resched_ipi_key)
+#else
+static inline bool trace_pagefault_enabled(void) { return false; }
+static inline bool trace_resched_ipi_enabled(void) { return false; }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index 2422b14c50a7..5665bf205b8d 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -5,9 +5,10 @@
#define _TRACE_PAGE_FAULT_H
#include <linux/tracepoint.h>
+#include <asm/trace/common.h>
-extern int trace_irq_vector_regfunc(void);
-extern void trace_irq_vector_unregfunc(void);
+extern int trace_pagefault_reg(void);
+extern void trace_pagefault_unreg(void);
DECLARE_EVENT_CLASS(x86_exceptions,
@@ -37,8 +38,7 @@ DEFINE_EVENT_FN(x86_exceptions, name, \
TP_PROTO(unsigned long address, struct pt_regs *regs, \
unsigned long error_code), \
TP_ARGS(address, regs, error_code), \
- trace_irq_vector_regfunc, \
- trace_irq_vector_unregfunc);
+ trace_pagefault_reg, trace_pagefault_unreg);
DEFINE_PAGE_FAULT_EVENT(page_fault_user);
DEFINE_PAGE_FAULT_EVENT(page_fault_kernel);
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
new file mode 100644
index 000000000000..4253bca99989
--- /dev/null
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hyperv
+
+#if !defined(_TRACE_HYPERV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HYPERV_H
+
+#include <linux/tracepoint.h>
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+ TP_PROTO(const struct cpumask *cpus,
+ const struct flush_tlb_info *info),
+ TP_ARGS(cpus, info),
+ TP_STRUCT__entry(
+ __field(unsigned int, ncpus)
+ __field(struct mm_struct *, mm)
+ __field(unsigned long, addr)
+ __field(unsigned long, end)
+ ),
+ TP_fast_assign(__entry->ncpus = cpumask_weight(cpus);
+ __entry->mm = info->mm;
+ __entry->addr = info->start;
+ __entry->end = info->end;
+ ),
+ TP_printk("ncpus %d mm %p addr %lx, end %lx",
+ __entry->ncpus, __entry->mm,
+ __entry->addr, __entry->end)
+ );
+
+#endif /* CONFIG_HYPERV */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hyperv
+#endif /* _TRACE_HYPERV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 32dd6a9e343c..1599d394c8c1 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -5,9 +5,12 @@
#define _TRACE_IRQ_VECTORS_H
#include <linux/tracepoint.h>
+#include <asm/trace/common.h>
-extern int trace_irq_vector_regfunc(void);
-extern void trace_irq_vector_unregfunc(void);
+#ifdef CONFIG_X86_LOCAL_APIC
+
+extern int trace_resched_ipi_reg(void);
+extern void trace_resched_ipi_unreg(void);
DECLARE_EVENT_CLASS(x86_irq_vector,
@@ -28,15 +31,22 @@ DECLARE_EVENT_CLASS(x86_irq_vector,
#define DEFINE_IRQ_VECTOR_EVENT(name) \
DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
TP_PROTO(int vector), \
+ TP_ARGS(vector), NULL, NULL); \
+DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
+ TP_PROTO(int vector), \
+ TP_ARGS(vector), NULL, NULL);
+
+#define DEFINE_RESCHED_IPI_EVENT(name) \
+DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \
+ TP_PROTO(int vector), \
TP_ARGS(vector), \
- trace_irq_vector_regfunc, \
- trace_irq_vector_unregfunc); \
+ trace_resched_ipi_reg, \
+ trace_resched_ipi_unreg); \
DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
TP_PROTO(int vector), \
TP_ARGS(vector), \
- trace_irq_vector_regfunc, \
- trace_irq_vector_unregfunc);
-
+ trace_resched_ipi_reg, \
+ trace_resched_ipi_unreg);
/*
* local_timer - called when entering/exiting a local timer interrupt
@@ -45,11 +55,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \
DEFINE_IRQ_VECTOR_EVENT(local_timer);
/*
- * reschedule - called when entering/exiting a reschedule vector handler
- */
-DEFINE_IRQ_VECTOR_EVENT(reschedule);
-
-/*
* spurious_apic - called when entering/exiting a spurious apic vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(spurious_apic);
@@ -65,6 +70,7 @@ DEFINE_IRQ_VECTOR_EVENT(error_apic);
*/
DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi);
+#ifdef CONFIG_IRQ_WORK
/*
* irq_work - called when entering/exiting a irq work interrupt
* vector handler
@@ -81,6 +87,18 @@ DEFINE_IRQ_VECTOR_EVENT(irq_work);
* 4) goto 1
*/
TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0);
+#endif
+
+/*
+ * The ifdef is required because that tracepoint macro hell emits tracepoint
+ * code in files which include this header even if the tracepoint is not
+ * enabled. Brilliant stuff that.
+ */
+#ifdef CONFIG_SMP
+/*
+ * reschedule - called when entering/exiting a reschedule vector handler
+ */
+DEFINE_RESCHED_IPI_EVENT(reschedule);
/*
* call_function - called when entering/exiting a call function interrupt
@@ -93,24 +111,33 @@ DEFINE_IRQ_VECTOR_EVENT(call_function);
* single interrupt vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(call_function_single);
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
/*
* threshold_apic - called when entering/exiting a threshold apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(threshold_apic);
+#endif
+#ifdef CONFIG_X86_MCE_AMD
/*
* deferred_error_apic - called when entering/exiting a deferred apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic);
+#endif
+#ifdef CONFIG_X86_THERMAL_VECTOR
/*
* thermal_apic - called when entering/exiting a thermal apic interrupt
* vector handler
*/
DEFINE_IRQ_VECTOR_EVENT(thermal_apic);
+#endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7f48cd..5545f6459bf5 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
-asmlinkage void xen_debug(void);
-asmlinkage void xen_int3(void);
-asmlinkage void xen_stack_segment(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
@@ -38,22 +35,29 @@ asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
-#ifdef CONFIG_TRACING
-asmlinkage void trace_page_fault(void);
-#define trace_stack_segment stack_segment
-#define trace_divide_error divide_error
-#define trace_bounds bounds
-#define trace_invalid_op invalid_op
-#define trace_device_not_available device_not_available
-#define trace_coprocessor_segment_overrun coprocessor_segment_overrun
-#define trace_invalid_TSS invalid_TSS
-#define trace_segment_not_present segment_not_present
-#define trace_general_protection general_protection
-#define trace_spurious_interrupt_bug spurious_interrupt_bug
-#define trace_coprocessor_error coprocessor_error
-#define trace_alignment_check alignment_check
-#define trace_simd_coprocessor_error simd_coprocessor_error
-#define trace_async_page_fault async_page_fault
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xendebug(void);
+asmlinkage void xen_xenint3(void);
+asmlinkage void xen_nmi(void);
+asmlinkage void xen_overflow(void);
+asmlinkage void xen_bounds(void);
+asmlinkage void xen_invalid_op(void);
+asmlinkage void xen_device_not_available(void);
+asmlinkage void xen_double_fault(void);
+asmlinkage void xen_coprocessor_segment_overrun(void);
+asmlinkage void xen_invalid_TSS(void);
+asmlinkage void xen_segment_not_present(void);
+asmlinkage void xen_stack_segment(void);
+asmlinkage void xen_general_protection(void);
+asmlinkage void xen_page_fault(void);
+asmlinkage void xen_spurious_interrupt_bug(void);
+asmlinkage void xen_coprocessor_error(void);
+asmlinkage void xen_alignment_check(void);
+#ifdef CONFIG_X86_MCE
+asmlinkage void xen_machine_check(void);
+#endif /* CONFIG_X86_MCE */
+asmlinkage void xen_simd_coprocessor_error(void);
#endif
dotraplinkage void do_divide_error(struct pt_regs *, long);
@@ -74,14 +78,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
-#ifdef CONFIG_TRACING
-dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
-#else
-static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error)
-{
- do_page_fault(regs, error);
-}
-#endif
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
dotraplinkage void do_alignment_check(struct pt_regs *, long);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 30269dafec47..184eb9894dae 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -26,7 +26,12 @@
#define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.addr_limit)
-#define set_fs(x) (current->thread.addr_limit = (x))
+static inline void set_fs(mm_segment_t fs)
+{
+ current->thread.addr_limit = fs;
+ /* On user-mode return, check fs is correct */
+ set_thread_flag(TIF_FSCHECK);
+}
#define segment_eq(a, b) ((a).seg == (b).seg)
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e6676495b125..e9f793e2df7a 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,11 +12,14 @@ struct unwind_state {
struct task_struct *task;
int graph_idx;
bool error;
-#ifdef CONFIG_FRAME_POINTER
+#if defined(CONFIG_ORC_UNWINDER)
+ bool signal, full_regs;
+ unsigned long sp, bp, ip;
+ struct pt_regs *regs;
+#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
bool got_irq;
- unsigned long *bp, *orig_sp;
+ unsigned long *bp, *orig_sp, ip;
struct pt_regs *regs;
- unsigned long ip;
#else
unsigned long *sp;
#endif
@@ -24,41 +27,30 @@ struct unwind_state {
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame);
-
bool unwind_next_frame(struct unwind_state *state);
-
unsigned long unwind_get_return_address(struct unwind_state *state);
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state);
static inline bool unwind_done(struct unwind_state *state)
{
return state->stack_info.type == STACK_TYPE_UNKNOWN;
}
-static inline
-void unwind_start(struct unwind_state *state, struct task_struct *task,
- struct pt_regs *regs, unsigned long *first_frame)
-{
- first_frame = first_frame ? : get_stack_pointer(task, regs);
-
- __unwind_start(state, task, regs, first_frame);
-}
-
static inline bool unwind_error(struct unwind_state *state)
{
return state->error;
}
-#ifdef CONFIG_FRAME_POINTER
-
static inline
-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame)
{
- if (unwind_done(state))
- return NULL;
+ first_frame = first_frame ? : get_stack_pointer(task, regs);
- return state->regs ? &state->regs->ip : state->bp + 1;
+ __unwind_start(state, task, regs, first_frame);
}
+#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
@@ -66,20 +58,46 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
return state->regs;
}
-
-#else /* !CONFIG_FRAME_POINTER */
-
-static inline
-unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+#else
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
return NULL;
}
+#endif
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+#ifdef CONFIG_ORC_UNWINDER
+void unwind_init(void);
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+ void *orc, size_t orc_size);
+#else
+static inline void unwind_init(void) {}
+static inline
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
+ void *orc, size_t orc_size) {}
+#endif
+
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x) \
+({ \
+ unsigned long val; \
+ if (task == current) \
+ val = READ_ONCE(x); \
+ else \
+ val = READ_ONCE_NOCHECK(x); \
+ val; \
+})
+
+static inline bool task_on_another_cpu(struct task_struct *task)
{
- return NULL;
+#ifdef CONFIG_SMP
+ return task != current && task->on_cpu;
+#else
+ return false;
+#endif
}
-#endif /* CONFIG_FRAME_POINTER */
-
#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..bae46fc6b9de
--- /dev/null
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -0,0 +1,105 @@
+#ifndef _ASM_X86_UNWIND_HINTS_H
+#define _ASM_X86_UNWIND_HINTS_H
+
+#include "orc_types.h"
+
+#ifdef __ASSEMBLY__
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest. The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack. Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * category. In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer. Such
+ * code needs to be annotated such that objtool can understand it. The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction. Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
+#ifdef CONFIG_STACK_VALIDATION
+.Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+ .long .Lunwind_hint_ip_\@ - .
+ .short \sp_offset
+ .byte \sp_reg
+ .byte \type
+ .popsection
+#endif
+.endm
+
+.macro UNWIND_HINT_EMPTY
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+ .if \base == %rsp
+ .if \indirect
+ .set sp_reg, ORC_REG_SP_INDIRECT
+ .else
+ .set sp_reg, ORC_REG_SP
+ .endif
+ .elseif \base == %rbp
+ .set sp_reg, ORC_REG_BP
+ .elseif \base == %rdi
+ .set sp_reg, ORC_REG_DI
+ .elseif \base == %rdx
+ .set sp_reg, ORC_REG_DX
+ .elseif \base == %r10
+ .set sp_reg, ORC_REG_R10
+ .else
+ .error "UNWIND_HINT_REGS: bad base register"
+ .endif
+
+ .set sp_offset, \offset
+
+ .if \iret
+ .set type, ORC_TYPE_REGS_IRET
+ .elseif \extra == 0
+ .set type, ORC_TYPE_REGS_IRET
+ .set sp_offset, \offset + (16*8)
+ .else
+ .set type, ORC_TYPE_REGS
+ .endif
+
+ UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+.endm
+
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
+ UNWIND_HINT_REGS base=\base offset=\offset iret=1
+.endm
+
+.macro UNWIND_HINT_FUNC sp_offset=8
+ UNWIND_HINT sp_offset=\sp_offset
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define UNWIND_HINT(sp_reg, sp_offset, type) \
+ "987: \n\t" \
+ ".pushsection .discard.unwind_hints\n\t" \
+ /* struct unwind_hint */ \
+ ".long 987b - .\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
+ ".byte " __stringify(sp_reg) "\n\t" \
+ ".byte " __stringify(type) "\n\t" \
+ ".popsection\n\t"
+
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
+
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h
index c4b9dc2f67c5..9f42beefc67a 100644
--- a/arch/x86/include/asm/vga.h
+++ b/arch/x86/include/asm/vga.h
@@ -7,12 +7,24 @@
#ifndef _ASM_X86_VGA_H
#define _ASM_X86_VGA_H
+#include <asm/set_memory.h>
+
/*
* On the PC, we can just recalculate addresses and then
* access the videoram directly without any black magic.
+ * To support memory encryption however, we need to access
+ * the videoram as decrypted memory.
*/
-#define VGA_MAP_MEM(x, s) (unsigned long)phys_to_virt(x)
+#define VGA_MAP_MEM(x, s) \
+({ \
+ unsigned long start = (unsigned long)phys_to_virt(x); \
+ \
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) \
+ set_memory_decrypted(start, (s) >> PAGE_SHIFT); \
+ \
+ start; \
+})
#define vga_readb(x) (*(x))
#define vga_writeb(x, y) (*(y) = (x))
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 35cd06f636ab..caec8417539f 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,6 +72,7 @@
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
@@ -114,6 +115,10 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_EPTP_ENTRIES 512
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -187,6 +192,8 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
POSTED_INTR_DESC_ADDR = 0x00002016,
POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ VM_FUNCTION_CONTROL = 0x00002018,
+ VM_FUNCTION_CONTROL_HIGH = 0x00002019,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
EOI_EXIT_BITMAP0 = 0x0000201c,
@@ -197,6 +204,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ EPTP_LIST_ADDRESS = 0x00002024,
+ EPTP_LIST_ADDRESS_HIGH = 0x00002025,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
@@ -444,6 +453,7 @@ enum vmcs_field {
#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
+#define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7)
#define VMX_EPTP_UC_BIT (1ull << 8)
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
@@ -459,12 +469,14 @@ enum vmcs_field {
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
-#define VMX_EPT_DEFAULT_GAW 3
-#define VMX_EPT_MAX_GAW 0x4
#define VMX_EPT_MT_EPTE_SHIFT 3
-#define VMX_EPT_GAW_EPTP_SHIFT 3
-#define VMX_EPT_AD_ENABLE_BIT (1ull << 6)
-#define VMX_EPT_DEFAULT_MT 0x6ull
+#define VMX_EPTP_PWL_MASK 0x38ull
+#define VMX_EPTP_PWL_4 0x18ull
+#define VMX_EPTP_PWL_5 0x20ull
+#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
+#define VMX_EPTP_MT_MASK 0x7ull
+#define VMX_EPTP_MT_WB 0x6ull
+#define VMX_EPTP_MT_UC 0x0ull
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 11071fcd630e..9606688caa4b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -552,6 +552,8 @@ static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
+ u32 *p = (u32 *) &desc;
+
mcl->op = __HYPERVISOR_update_descriptor;
if (sizeof(maddr) == sizeof(long)) {
mcl->args[0] = maddr;
@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
} else {
mcl->args[0] = maddr;
mcl->args[1] = maddr >> 32;
- mcl->args[2] = desc.a;
- mcl->args[3] = desc.b;
+ mcl->args[2] = *p++;
+ mcl->args[3] = *p;
}
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 8417ef7c3885..07b6531813c4 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -158,9 +158,6 @@ static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
unsigned long pfn;
int ret;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return mfn;
-
if (unlikely(mfn >= machine_to_phys_nr))
return ~0;
@@ -317,8 +314,6 @@ static inline pte_t __pte_ma(pteval_t x)
#define p4d_val_ma(x) ((x).p4d)
#endif
-void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
-
xmaddr_t arbitrary_virt_to_machine(void *address);
unsigned long arbitrary_virt_to_mfn(void *vaddr);
void make_lowmem_page_readonly(void *vaddr);
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index ddef37b16af2..66b8f93333d1 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -201,7 +201,7 @@ struct boot_params {
*
* @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard
* PC mechanisms (PCI, ACPI) and doesn't need a special boot flow.
- * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
+ * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest, deprecated
* @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
* which start at asm startup_xen() entry point and later jump to the C
* xen_start_kernel() entry point. Both domU and dom0 type of guests are
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 127ddadee1a5..7032f4d8dff3 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -149,6 +149,9 @@
*/
#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
+/* Recommend using the newer ExProcessorMasks interface */
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
+
/*
* HV_VP_SET available
*/
@@ -242,7 +245,11 @@
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
@@ -259,6 +266,16 @@
#define HV_PROCESSOR_POWER_STATE_C2 2
#define HV_PROCESSOR_POWER_STATE_C3 3
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARCE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 39bca7fac087..3be08f07695c 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,9 +3,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
-#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
-
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
/*
* Take the 4 protection key bits out of the vma->vm_flags
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a01892bdd61a..fd0a7895b63f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -42,7 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
obj-$(CONFIG_COMPAT) += signal_compat.o
-obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o dumpstack.o nmi.o
obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
@@ -111,6 +111,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
+obj-$(CONFIG_EISA) += eisa.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -126,11 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
-ifdef CONFIG_FRAME_POINTER
-obj-y += unwind_frame.o
-else
-obj-y += unwind_guess.o
-endif
+obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
+obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
+obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7491e73d9253..f8ae286c1502 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -115,24 +115,24 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
#define ACPI_INVALID_GSI INT_MIN
/*
- * This is just a simple wrapper around early_ioremap(),
+ * This is just a simple wrapper around early_memremap(),
* with sanity checks for phys == 0 and size == 0.
*/
-char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
{
if (!phys || !size)
return NULL;
- return early_ioremap(phys, size);
+ return early_memremap(phys, size);
}
-void __init __acpi_unmap_table(char *map, unsigned long size)
+void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
{
if (!map || !size)
return;
- early_iounmap(map, size);
+ early_memunmap(map, size);
}
#ifdef CONFIG_X86_LOCAL_APIC
@@ -199,8 +199,10 @@ static int __init
acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
+#ifdef CONFIG_X86_X2APIC
int apic_id;
u8 enabled;
+#endif
processor = (struct acpi_madt_local_x2apic *)header;
@@ -209,9 +211,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
acpi_table_print_madt_entry(header);
+#ifdef CONFIG_X86_X2APIC
apic_id = processor->local_apic_id;
enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
-#ifdef CONFIG_X86_X2APIC
+
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
@@ -1083,7 +1086,7 @@ static void __init mp_config_acpi_legacy_irqs(void)
mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
#endif
set_bit(MP_ISA_BUS, mp_bus_not_pci);
- pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
+ pr_debug("Bus #%d is ISA (nIRQs: %d)\n", MP_ISA_BUS, nr_legacy_irqs());
/*
* Use the default configuration for the IRQs 0-15. Unless
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 32e14d137416..3344d3382e91 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -742,7 +742,16 @@ static void *bp_int3_handler, *bp_int3_addr;
int poke_int3_handler(struct pt_regs *regs)
{
- /* bp_patching_in_progress */
+ /*
+ * Having observed our INT3 instruction, we now must observe
+ * bp_patching_in_progress.
+ *
+ * in_progress = TRUE INT3
+ * WMB RMB
+ * write INT3 if (in_progress)
+ *
+ * Idem for bp_int3_handler.
+ */
smp_rmb();
if (likely(!bp_patching_in_progress))
@@ -788,9 +797,8 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
bp_int3_addr = (u8 *)addr + sizeof(int3);
bp_patching_in_progress = true;
/*
- * Corresponding read barrier in int3 notifier for
- * making sure the in_progress flags is correctly ordered wrt.
- * patching
+ * Corresponding read barrier in int3 notifier for making sure the
+ * in_progress and handler are correctly ordered wrt. patching.
*/
smp_wmb();
@@ -815,9 +823,11 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
text_poke(addr, opcode, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
-
+ /*
+ * sync_core() implies an smp_mb() and orders this store against
+ * the writing of the new instruction.
+ */
bp_patching_in_progress = false;
- smp_wmb();
return addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 98b3dd8cf2bf..8315e2f517a7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -177,8 +177,6 @@ static int disable_apic_timer __initdata;
int local_apic_timer_c2_ok;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-int first_system_vector = FIRST_SYSTEM_VECTOR;
-
/*
* Debug level, exported for io_apic.c
*/
@@ -599,9 +597,13 @@ static const struct x86_cpu_id deadline_match[] = {
static void apic_check_deadline_errata(void)
{
- const struct x86_cpu_id *m = x86_match_cpu(deadline_match);
+ const struct x86_cpu_id *m;
u32 rev;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return;
+
+ m = x86_match_cpu(deadline_match);
if (!m)
return;
@@ -990,8 +992,7 @@ void setup_secondary_APIC_clock(void)
*/
static void local_apic_timer_interrupt(void)
{
- int cpu = smp_processor_id();
- struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+ struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
/*
* Normally we should not be here till LAPIC has been initialized but
@@ -1005,7 +1006,8 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
+ smp_processor_id());
/* Switch it off */
lapic_timer_shutdown(evt);
return;
@@ -1040,25 +1042,6 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
* interrupt lock, which is the WrongThing (tm) to do.
*/
entering_ack_irq();
- local_apic_timer_interrupt();
- exiting_irq();
-
- set_irq_regs(old_regs);
-}
-
-__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- /*
- * NOTE! We'd better ACK the irq immediately,
- * because timer handling can be slow.
- *
- * update_process_times() expects us to have done irq_enter().
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- entering_ack_irq();
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
@@ -1920,10 +1903,14 @@ void __init register_lapic_address(unsigned long address)
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
-static void __smp_spurious_interrupt(u8 vector)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
{
+ u8 vector = ~regs->orig_ax;
u32 v;
+ entering_irq();
+ trace_spurious_apic_entry(vector);
+
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1938,22 +1925,7 @@ static void __smp_spurious_interrupt(u8 vector)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
"should never happen.\n", vector, smp_processor_id());
-}
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- __smp_spurious_interrupt(~regs->orig_ax);
- exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
-{
- u8 vector = ~regs->orig_ax;
-
- entering_irq();
- trace_spurious_apic_entry(vector);
- __smp_spurious_interrupt(vector);
trace_spurious_apic_exit(vector);
exiting_irq();
}
@@ -1961,10 +1933,8 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
-static void __smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
{
- u32 v;
- u32 i = 0;
static const char * const error_interrupt_reason[] = {
"Send CS error", /* APIC Error Bit 0 */
"Receive CS error", /* APIC Error Bit 1 */
@@ -1975,6 +1945,10 @@ static void __smp_error_interrupt(struct pt_regs *regs)
"Received illegal vector", /* APIC Error Bit 6 */
"Illegal register address", /* APIC Error Bit 7 */
};
+ u32 v, i = 0;
+
+ entering_irq();
+ trace_error_apic_entry(ERROR_APIC_VECTOR);
/* First tickle the hardware, only then report what went on. -- REW */
if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
@@ -1996,20 +1970,6 @@ static void __smp_error_interrupt(struct pt_regs *regs)
apic_printk(APIC_DEBUG, KERN_CONT "\n");
-}
-
-__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- __smp_error_interrupt(regs);
- exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- trace_error_apic_entry(ERROR_APIC_VECTOR);
- __smp_error_interrupt(regs);
trace_error_apic_exit(ERROR_APIC_VECTOR);
exiting_irq();
}
@@ -2137,7 +2097,7 @@ static int allocate_logical_cpuid(int apicid)
/* Allocate a new cpuid. */
if (nr_logical_cpuids >= nr_cpu_ids) {
- WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
+ WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
"Processor %d/0x%x and the rest are ignored.\n",
nr_cpu_ids, nr_logical_cpuids, apicid);
return -EINVAL;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 237e9c2341c7..70e48aa6af98 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1243,7 +1243,7 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
entry.vector, entry.irr, entry.delivery_status);
if (ir_entry->format)
printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n",
- buf, (ir_entry->index << 15) | ir_entry->index,
+ buf, (ir_entry->index2 << 15) | ir_entry->index,
ir_entry->zero);
else
printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index b3af457ed667..88c214e75a6b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -166,7 +166,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
offset = current_offset;
next:
vector += 16;
- if (vector >= first_system_vector) {
+ if (vector >= FIRST_SYSTEM_VECTOR) {
offset = (offset + 1) % 16;
vector = FIRST_EXTERNAL_VECTOR + offset;
}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 880aa093268d..710edab9e644 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -4,9 +4,6 @@
#include <asm/ucontext.h>
-#include <linux/lguest.h>
-#include "../../../drivers/lguest/lg.h"
-
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
@@ -62,23 +59,6 @@ void foo(void)
OFFSET(stack_canary_offset, stack_canary, canary);
#endif
-#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
- BLANK();
- OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
- OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
-
- BLANK();
- OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
- OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
- OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
- OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp);
- OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc);
- OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc);
- OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt);
- OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum);
- OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
- OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
-#endif
BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
DEFINE(NR_syscalls, sizeof(syscalls));
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 99332f550c48..cf42206926af 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
int main(void)
{
#ifdef CONFIG_PARAVIRT
- OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cdf82492b770..e17942c131c8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o
+obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3b9e220621f8..9862e2cd6d93 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -297,13 +297,29 @@ static int nearby_node(int apicid)
}
#endif
+#ifdef CONFIG_SMP
+/*
+ * Fix up cpu_core_id for pre-F17h systems to be in the
+ * [0 .. cores_per_node - 1] range. Not really needed but
+ * kept so as not to break existing setups.
+ */
+static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
+{
+ u32 cus_per_node;
+
+ if (c->x86 >= 0x17)
+ return;
+
+ cus_per_node = c->x86_max_cores / nodes_per_socket;
+ c->cpu_core_id %= cus_per_node;
+}
+
/*
* Fixup core topology information for
* (1) AMD multi-node processors
* Assumption: Number of cores in each internal node is the same.
* (2) AMD processors supporting compute units
*/
-#ifdef CONFIG_SMP
static void amd_get_topology(struct cpuinfo_x86 *c)
{
u8 node_id;
@@ -354,15 +370,9 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
} else
return;
- /* fixup multi-node processor information */
if (nodes_per_socket > 1) {
- u32 cus_per_node;
-
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
- cus_per_node = c->x86_max_cores / nodes_per_socket;
-
- /* core id has to be in the [0 .. cores_per_node - 1] range */
- c->cpu_core_id %= cus_per_node;
+ legacy_fixup_core_id(c);
}
}
#endif
@@ -548,8 +558,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
static void early_init_amd(struct cpuinfo_x86 *c)
{
+ u32 dummy;
+
early_init_amd_mc(c);
+ rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
/*
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
* with P/T states and does not stop in deep C-states
@@ -612,6 +626,27 @@ static void early_init_amd(struct cpuinfo_x86 *c)
*/
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
+
+ /*
+ * BIOS support is required for SME. If BIOS has enabled SME then
+ * adjust x86_phys_bits by the SME physical address space reduction
+ * value. If BIOS has not enabled SME then don't advertise the
+ * feature (set in scattered.c). Also, since the SME support requires
+ * long mode, don't advertise the feature under CONFIG_X86_32.
+ */
+ if (cpu_has(c, X86_FEATURE_SME)) {
+ u64 msr;
+
+ /* Check if SME is enabled */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
+ c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+ if (IS_ENABLED(CONFIG_X86_32))
+ clear_cpu_cap(c, X86_FEATURE_SME);
+ } else {
+ clear_cpu_cap(c, X86_FEATURE_SME);
+ }
+ }
}
static void init_amd_k8(struct cpuinfo_x86 *c)
@@ -730,8 +765,6 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd(struct cpuinfo_x86 *c)
{
- u32 dummy;
-
early_init_amd(c);
/*
@@ -793,8 +826,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
- rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
-
/* 3DNow or LM implies PREFETCHW */
if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 7cf7c70b6ef2..0ee83321a313 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -40,13 +40,16 @@ static void aperfmperf_snapshot_khz(void *dummy)
struct aperfmperf_sample *s = this_cpu_ptr(&samples);
ktime_t now = ktime_get();
s64 time_delta = ktime_ms_delta(now, s->time);
+ unsigned long flags;
/* Don't bother re-computing within the cache threshold time. */
if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
return;
+ local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
+ local_irq_restore(flags);
aperf_delta = aperf - s->aperf;
mperf_delta = mperf - s->mperf;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0af86d9242da..db684880d74a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,6 +21,14 @@
void __init check_bugs(void)
{
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
+
identify_boot_cpu();
if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8b39870f33e..fb1d3358a4af 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -168,6 +168,24 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+#ifdef CONFIG_X86_64
+static int __init x86_pcid_setup(char *s)
+{
+ /* require an exact match without trailing characters */
+ if (strlen(s))
+ return 0;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return 1;
+
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ pr_info("nopcid: PCID feature disabled\n");
+ return 1;
+}
+__setup("nopcid", x86_pcid_setup);
+#endif
+
static int __init x86_noinvpcid_setup(char *s)
{
/* noinvpcid doesn't accept parameters */
@@ -311,6 +329,38 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
+static void setup_pcid(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_PCID)) {
+ if (cpu_has(c, X86_FEATURE_PGE)) {
+ /*
+ * We'd like to use cr4_set_bits_and_update_boot(),
+ * but we can't. CR4.PCIDE is special and can only
+ * be set in long mode, and the early CPU init code
+ * doesn't know this and would try to restore CR4.PCIDE
+ * prior to entering long mode.
+ *
+ * Instead, we rely on the fact that hotplug, resume,
+ * etc all fully restore CR4 before they write anything
+ * that could have nonzero PCID bits to CR3. CR4.PCIDE
+ * has no effect on the page tables themselves, so we
+ * don't need it to be restored early.
+ */
+ cr4_set_bits(X86_CR4_PCIDE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't
+ * work if PCID is on but PGE is not. Since that
+ * combination doesn't exist on real hardware, there's
+ * no reason to try to fully support it, but it's
+ * polite to avoid corrupting data if we're on
+ * an improperly configured VM.
+ */
+ clear_cpu_cap(c, X86_FEATURE_PCID);
+ }
+ }
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -1125,6 +1175,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smep(c);
setup_smap(c);
+ /* Set up PCID */
+ setup_pcid(c);
+
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -1289,15 +1342,6 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-struct desc_ptr idt_descr __ro_after_init = {
- .size = NR_VECTORS * 16 - 1,
- .address = (unsigned long) idt_table,
-};
-const struct desc_ptr debug_idt_descr = {
- .size = NR_VECTORS * 16 - 1,
- .address = (unsigned long) debug_idt_table,
-};
-
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1552,6 +1596,7 @@ void cpu_init(void)
mmgrab(&init_mm);
me->active_mm = &init_mm;
BUG_ON(me->mm);
+ initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, me);
load_sp0(t, &current->thread);
@@ -1606,6 +1651,7 @@ void cpu_init(void)
mmgrab(&init_mm);
curr->active_mm = &init_mm;
BUG_ON(curr->mm);
+ initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, curr);
load_sp0(t, thread);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c55fb2cb2acc..24f749324c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -811,7 +811,24 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
struct cacheinfo *this_leaf;
int i, sibling;
- if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ /*
+ * For L3, always use the pre-calculated cpu_llc_shared_mask
+ * to derive shared_cpu_map.
+ */
+ if (index == 3) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
+ this_cpu_ci = get_cpu_cacheinfo(i);
+ if (!this_cpu_ci->info_list)
+ continue;
+ this_leaf = this_cpu_ci->info_list + index;
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
+ if (!cpu_online(sibling))
+ continue;
+ cpumask_set_cpu(sibling,
+ &this_leaf->shared_cpu_map);
+ }
+ }
+ } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
unsigned int apicid, nshared, first, last;
this_leaf = this_cpu_ci->info_list + index;
@@ -839,19 +856,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
&this_leaf->shared_cpu_map);
}
}
- } else if (index == 3) {
- for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
- this_cpu_ci = get_cpu_cacheinfo(i);
- if (!this_cpu_ci->info_list)
- continue;
- this_leaf = this_cpu_ci->info_list + index;
- for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
- if (!cpu_online(sibling))
- continue;
- cpumask_set_cpu(sibling,
- &this_leaf->shared_cpu_map);
- }
- }
} else
return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 5b366462f579..cd5fc61ba450 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -30,7 +30,8 @@
#include <linux/cpuhotplug.h>
#include <asm/intel-family.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
+#include "intel_rdt.h"
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
@@ -38,7 +39,13 @@
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Functions which modify the state
+ * are called with interrupts disabled and no preemption, which
+ * is sufficient for the protection.
+ */
+DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
/*
* Used to store the max resource name width and max resource data width
@@ -46,6 +53,12 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
*/
int max_name_width, max_data_width;
+/*
+ * Global boolean for rdt_alloc which is true if any
+ * resource allocation is enabled.
+ */
+bool rdt_alloc_capable;
+
static void
mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
static void
@@ -54,7 +67,9 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
struct rdt_resource rdt_resources_all[] = {
+ [RDT_RESOURCE_L3] =
{
+ .rid = RDT_RESOURCE_L3,
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
@@ -67,8 +82,11 @@ struct rdt_resource rdt_resources_all[] = {
},
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_L3DATA] =
{
+ .rid = RDT_RESOURCE_L3DATA,
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
@@ -81,8 +99,11 @@ struct rdt_resource rdt_resources_all[] = {
},
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_L3CODE] =
{
+ .rid = RDT_RESOURCE_L3CODE,
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
@@ -95,8 +116,11 @@ struct rdt_resource rdt_resources_all[] = {
},
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_L2] =
{
+ .rid = RDT_RESOURCE_L2,
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
@@ -109,8 +133,11 @@ struct rdt_resource rdt_resources_all[] = {
},
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
+ .fflags = RFTYPE_RES_CACHE,
},
+ [RDT_RESOURCE_MBA] =
{
+ .rid = RDT_RESOURCE_MBA,
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
.msr_base = IA32_MBA_THRTL_BASE,
@@ -118,6 +145,7 @@ struct rdt_resource rdt_resources_all[] = {
.cache_level = 3,
.parse_ctrlval = parse_bw,
.format_str = "%d=%*d",
+ .fflags = RFTYPE_RES_MB,
},
};
@@ -144,33 +172,28 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
* is always 20 on hsw server parts. The minimum cache bitmask length
* allowed for HSW server is always 2 bits. Hardcode all of them.
*/
-static inline bool cache_alloc_hsw_probe(void)
+static inline void cache_alloc_hsw_probe(void)
{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) {
- struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
- u32 l, h, max_cbm = BIT_MASK(20) - 1;
-
- if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
- return false;
- rdmsr(IA32_L3_CBM_BASE, l, h);
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ u32 l, h, max_cbm = BIT_MASK(20) - 1;
- /* If all the bits were set in MSR, return success */
- if (l != max_cbm)
- return false;
+ if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
+ return;
+ rdmsr(IA32_L3_CBM_BASE, l, h);
- r->num_closid = 4;
- r->default_ctrl = max_cbm;
- r->cache.cbm_len = 20;
- r->cache.min_cbm_bits = 2;
- r->capable = true;
- r->enabled = true;
+ /* If all the bits were set in MSR, return success */
+ if (l != max_cbm)
+ return;
- return true;
- }
+ r->num_closid = 4;
+ r->default_ctrl = max_cbm;
+ r->cache.cbm_len = 20;
+ r->cache.shareable_bits = 0xc0000;
+ r->cache.min_cbm_bits = 2;
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
- return false;
+ rdt_alloc_capable = true;
}
/*
@@ -213,15 +236,14 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return false;
}
r->data_width = 3;
- rdt_get_mba_infofile(r);
- r->capable = true;
- r->enabled = true;
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
return true;
}
-static void rdt_get_cache_config(int idx, struct rdt_resource *r)
+static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
union cpuid_0x10_x_edx edx;
@@ -231,10 +253,10 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r)
r->num_closid = edx.split.cos_max + 1;
r->cache.cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
+ r->cache.shareable_bits = ebx & r->default_ctrl;
r->data_width = (r->cache.cbm_len + 3) / 4;
- rdt_get_cache_infofile(r);
- r->capable = true;
- r->enabled = true;
+ r->alloc_capable = true;
+ r->alloc_enabled = true;
}
static void rdt_get_cdp_l3_config(int type)
@@ -246,12 +268,12 @@ static void rdt_get_cdp_l3_config(int type)
r->cache.cbm_len = r_l3->cache.cbm_len;
r->default_ctrl = r_l3->default_ctrl;
r->data_width = (r->cache.cbm_len + 3) / 4;
- r->capable = true;
+ r->alloc_capable = true;
/*
* By default, CDP is disabled. CDP can be enabled by mount parameter
* "cdp" during resctrl file system mount time.
*/
- r->enabled = false;
+ r->alloc_enabled = false;
}
static int get_cache_id(int cpu, int level)
@@ -300,6 +322,19 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
}
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+
+ list_for_each_entry(d, &r->domains, list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = arg;
@@ -307,12 +342,10 @@ void rdt_ctrl_update(void *arg)
int cpu = smp_processor_id();
struct rdt_domain *d;
- list_for_each_entry(d, &r->domains, list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
- r->msr_update(d, m, r);
- return;
- }
+ d = get_domain_from_cpu(cpu, r);
+ if (d) {
+ r->msr_update(d, m, r);
+ return;
}
pr_warn_once("cpu %d not found in any domain for resource %s\n",
cpu, r->name);
@@ -326,8 +359,8 @@ void rdt_ctrl_update(void *arg)
* caller, return the first domain whose id is bigger than the input id.
* The domain list is sorted by id in ascending order.
*/
-static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos)
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos)
{
struct rdt_domain *d;
struct list_head *l;
@@ -377,6 +410,44 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+{
+ size_t tsize;
+
+ if (is_llc_occupancy_enabled()) {
+ d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!d->rmid_busy_llc)
+ return -ENOMEM;
+ INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
+ }
+ if (is_mbm_total_enabled()) {
+ tsize = sizeof(*d->mbm_total);
+ d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ if (!d->mbm_total) {
+ kfree(d->rmid_busy_llc);
+ return -ENOMEM;
+ }
+ }
+ if (is_mbm_local_enabled()) {
+ tsize = sizeof(*d->mbm_local);
+ d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ if (!d->mbm_local) {
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ return -ENOMEM;
+ }
+ }
+
+ if (is_mbm_enabled()) {
+ INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
+ mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+ }
+
+ return 0;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -412,14 +483,26 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
d->id = id;
+ cpumask_set_cpu(cpu, &d->cpu_mask);
- if (domain_setup_ctrlval(r, d)) {
+ if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
+ kfree(d);
+ return;
+ }
+
+ if (r->mon_capable && domain_setup_mon_state(r, d)) {
kfree(d);
return;
}
- cpumask_set_cpu(cpu, &d->cpu_mask);
list_add_tail(&d->list, add_pos);
+
+ /*
+ * If resctrl is mounted, add
+ * per domain monitor data directories.
+ */
+ if (static_branch_unlikely(&rdt_mon_enable_key))
+ mkdir_mondata_subdir_allrdtgrp(r, d);
}
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
@@ -435,19 +518,58 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
+ /*
+ * If resctrl is mounted, remove all the
+ * per domain monitor data directories.
+ */
+ if (static_branch_unlikely(&rdt_mon_enable_key))
+ rmdir_mondata_subdir_allrdtgrp(r, d->id);
kfree(d->ctrl_val);
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
list_del(&d->list);
+ if (is_mbm_enabled())
+ cancel_delayed_work(&d->mbm_over);
+ if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
+ /*
+ * When a package is going down, forcefully
+ * decrement rmid->ebusy. There is no way to know
+ * that the L3 was flushed and hence may lead to
+ * incorrect counts in rare scenarios, but leaving
+ * the RMID as busy creates RMID leaks if the
+ * package never comes back.
+ */
+ __check_limbo(d, true);
+ cancel_delayed_work(&d->cqm_limbo);
+ }
+
kfree(d);
+ return;
+ }
+
+ if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(r, d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0);
+ }
}
}
-static void clear_closid(int cpu)
+static void clear_closid_rmid(int cpu)
{
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
- per_cpu(cpu_closid, cpu) = 0;
- state->closid = 0;
- wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+ state->default_closid = 0;
+ state->default_rmid = 0;
+ state->cur_closid = 0;
+ state->cur_rmid = 0;
+ wrmsr(IA32_PQR_ASSOC, 0, 0);
}
static int intel_rdt_online_cpu(unsigned int cpu)
@@ -459,12 +581,23 @@ static int intel_rdt_online_cpu(unsigned int cpu)
domain_add_cpu(cpu, r);
/* The cpu is set in default rdtgroup after online. */
cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
- clear_closid(cpu);
+ clear_closid_rmid(cpu);
mutex_unlock(&rdtgroup_mutex);
return 0;
}
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+ struct rdtgroup *cr;
+
+ list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) {
+ break;
+ }
+ }
+}
+
static int intel_rdt_offline_cpu(unsigned int cpu)
{
struct rdtgroup *rdtgrp;
@@ -474,10 +607,12 @@ static int intel_rdt_offline_cpu(unsigned int cpu)
for_each_capable_rdt_resource(r)
domain_remove_cpu(cpu, r);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
- if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
+ if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+ clear_childcpus(rdtgrp, cpu);
break;
+ }
}
- clear_closid(cpu);
+ clear_closid_rmid(cpu);
mutex_unlock(&rdtgroup_mutex);
return 0;
@@ -492,7 +627,7 @@ static __init void rdt_init_padding(void)
struct rdt_resource *r;
int cl;
- for_each_capable_rdt_resource(r) {
+ for_each_alloc_capable_rdt_resource(r) {
cl = strlen(r->name);
if (cl > max_name_width)
max_name_width = cl;
@@ -502,38 +637,153 @@ static __init void rdt_init_padding(void)
}
}
-static __init bool get_rdt_resources(void)
+enum {
+ RDT_FLAG_CMT,
+ RDT_FLAG_MBM_TOTAL,
+ RDT_FLAG_MBM_LOCAL,
+ RDT_FLAG_L3_CAT,
+ RDT_FLAG_L3_CDP,
+ RDT_FLAG_L2_CAT,
+ RDT_FLAG_MBA,
+};
+
+#define RDT_OPT(idx, n, f) \
+[idx] = { \
+ .name = n, \
+ .flag = f \
+}
+
+struct rdt_options {
+ char *name;
+ int flag;
+ bool force_off, force_on;
+};
+
+static struct rdt_options rdt_options[] __initdata = {
+ RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC),
+ RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
+ RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
+ RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3),
+ RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3),
+ RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2),
+ RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA),
+};
+#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
+
+static int __init set_rdt_options(char *str)
+{
+ struct rdt_options *o;
+ bool force_off;
+ char *tok;
+
+ if (*str == '=')
+ str++;
+ while ((tok = strsep(&str, ",")) != NULL) {
+ force_off = *tok == '!';
+ if (force_off)
+ tok++;
+ for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
+ if (strcmp(tok, o->name) == 0) {
+ if (force_off)
+ o->force_off = true;
+ else
+ o->force_on = true;
+ break;
+ }
+ }
+ }
+ return 1;
+}
+__setup("rdt", set_rdt_options);
+
+static bool __init rdt_cpu_has(int flag)
+{
+ bool ret = boot_cpu_has(flag);
+ struct rdt_options *o;
+
+ if (!ret)
+ return ret;
+
+ for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
+ if (flag == o->flag) {
+ if (o->force_off)
+ ret = false;
+ if (o->force_on)
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static __init bool get_rdt_alloc_resources(void)
{
bool ret = false;
- if (cache_alloc_hsw_probe())
+ if (rdt_alloc_capable)
return true;
if (!boot_cpu_has(X86_FEATURE_RDT_A))
return false;
- if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
- if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
+ if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
+ rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ if (rdt_cpu_has(X86_FEATURE_CDP_L3)) {
rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
}
ret = true;
}
- if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
+ if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]);
ret = true;
}
- if (boot_cpu_has(X86_FEATURE_MBA)) {
+ if (rdt_cpu_has(X86_FEATURE_MBA)) {
if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
ret = true;
}
-
return ret;
}
+static __init bool get_rdt_mon_resources(void)
+{
+ if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
+ rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
+ rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
+ if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
+ rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
+
+ if (!rdt_mon_features)
+ return false;
+
+ return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]);
+}
+
+static __init void rdt_quirks(void)
+{
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_HASWELL_X:
+ if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
+ cache_alloc_hsw_probe();
+ break;
+ case INTEL_FAM6_SKYLAKE_X:
+ if (boot_cpu_data.x86_mask <= 4)
+ set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
+ }
+}
+
+static __init bool get_rdt_resources(void)
+{
+ rdt_quirks();
+ rdt_alloc_capable = get_rdt_alloc_resources();
+ rdt_mon_capable = get_rdt_mon_resources();
+
+ return (rdt_mon_capable || rdt_alloc_capable);
+}
+
static int __init intel_rdt_late_init(void)
{
struct rdt_resource *r;
@@ -556,9 +806,12 @@ static int __init intel_rdt_late_init(void)
return ret;
}
- for_each_capable_rdt_resource(r)
+ for_each_alloc_capable_rdt_resource(r)
pr_info("Intel RDT %s allocation detected\n", r->name);
+ for_each_mon_capable_rdt_resource(r)
+ pr_info("Intel RDT %s monitoring detected\n", r->name);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
new file mode 100644
index 000000000000..ebaddaeef023
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -0,0 +1,440 @@
+#ifndef _ASM_X86_INTEL_RDT_H
+#define _ASM_X86_INTEL_RDT_H
+
+#include <linux/sched.h>
+#include <linux/kernfs.h>
+#include <linux/jump_label.h>
+
+#define IA32_L3_QOS_CFG 0xc81
+#define IA32_L3_CBM_BASE 0xc90
+#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50
+
+#define L3_QOS_CDP_ENABLE 0x01ULL
+
+/*
+ * Event IDs are used to program IA32_QM_EVTSEL before reading event
+ * counter from IA32_QM_CTR
+ */
+#define QOS_L3_OCCUP_EVENT_ID 0x01
+#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02
+#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03
+
+#define CQM_LIMBOCHECK_INTERVAL 1000
+
+#define MBM_CNTR_WIDTH 24
+#define MBM_OVERFLOW_INTERVAL 1000
+
+#define RMID_VAL_ERROR BIT_ULL(63)
+#define RMID_VAL_UNAVAIL BIT_ULL(62)
+
+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+/**
+ * struct mon_evt - Entry in the event list of a resource
+ * @evtid: event id
+ * @name: name of the event
+ */
+struct mon_evt {
+ u32 evtid;
+ char *name;
+ struct list_head list;
+};
+
+/**
+ * struct mon_data_bits - Monitoring details for each event file
+ * @rid: Resource id associated with the event file.
+ * @evtid: Event id associated with the event file
+ * @domid: The domain to which the event file belongs
+ */
+union mon_data_bits {
+ void *priv;
+ struct {
+ unsigned int rid : 10;
+ unsigned int evtid : 8;
+ unsigned int domid : 14;
+ } u;
+};
+
+struct rmid_read {
+ struct rdtgroup *rgrp;
+ struct rdt_domain *d;
+ int evtid;
+ bool first;
+ u64 val;
+};
+
+extern unsigned int intel_cqm_threshold;
+extern bool rdt_alloc_capable;
+extern bool rdt_mon_capable;
+extern unsigned int rdt_mon_features;
+
+enum rdt_group_type {
+ RDTCTRL_GROUP = 0,
+ RDTMON_GROUP,
+ RDT_NUM_GROUP,
+};
+
+/**
+ * struct mongroup - store mon group's data in resctrl fs.
+ * @mon_data_kn kernlfs node for the mon_data directory
+ * @parent: parent rdtgrp
+ * @crdtgrp_list: child rdtgroup node list
+ * @rmid: rmid for this rdtgroup
+ */
+struct mongroup {
+ struct kernfs_node *mon_data_kn;
+ struct rdtgroup *parent;
+ struct list_head crdtgrp_list;
+ u32 rmid;
+};
+
+/**
+ * struct rdtgroup - store rdtgroup's data in resctrl file system.
+ * @kn: kernfs node
+ * @rdtgroup_list: linked list for all rdtgroups
+ * @closid: closid for this rdtgroup
+ * @cpu_mask: CPUs assigned to this rdtgroup
+ * @flags: status bits
+ * @waitcount: how many cpus expect to find this
+ * group when they acquire rdtgroup_mutex
+ * @type: indicates type of this rdtgroup - either
+ * monitor only or ctrl_mon group
+ * @mon: mongroup related data
+ */
+struct rdtgroup {
+ struct kernfs_node *kn;
+ struct list_head rdtgroup_list;
+ u32 closid;
+ struct cpumask cpu_mask;
+ int flags;
+ atomic_t waitcount;
+ enum rdt_group_type type;
+ struct mongroup mon;
+};
+
+/* rdtgroup.flags */
+#define RDT_DELETED 1
+
+/* rftype.flags */
+#define RFTYPE_FLAGS_CPUS_LIST 1
+
+/*
+ * Define the file type flags for base and info directories.
+ */
+#define RFTYPE_INFO BIT(0)
+#define RFTYPE_BASE BIT(1)
+#define RF_CTRLSHIFT 4
+#define RF_MONSHIFT 5
+#define RFTYPE_CTRL BIT(RF_CTRLSHIFT)
+#define RFTYPE_MON BIT(RF_MONSHIFT)
+#define RFTYPE_RES_CACHE BIT(8)
+#define RFTYPE_RES_MB BIT(9)
+#define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
+#define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
+#define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
+
+/* List of all resource groups */
+extern struct list_head rdt_all_groups;
+
+extern int max_name_width, max_data_width;
+
+int __init rdtgroup_init(void);
+
+/**
+ * struct rftype - describe each file in the resctrl file system
+ * @name: File name
+ * @mode: Access mode
+ * @kf_ops: File operations
+ * @flags: File specific RFTYPE_FLAGS_* flags
+ * @fflags: File specific RF_* or RFTYPE_* flags
+ * @seq_show: Show content of the file
+ * @write: Write to the file
+ */
+struct rftype {
+ char *name;
+ umode_t mode;
+ struct kernfs_ops *kf_ops;
+ unsigned long flags;
+ unsigned long fflags;
+
+ int (*seq_show)(struct kernfs_open_file *of,
+ struct seq_file *sf, void *v);
+ /*
+ * write() is the generic write callback which maps directly to
+ * kernfs write operation and overrides all other operations.
+ * Maximum write size is determined by ->max_write_len.
+ */
+ ssize_t (*write)(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+};
+
+/**
+ * struct mbm_state - status for each MBM counter in each domain
+ * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
+ * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
+ */
+struct mbm_state {
+ u64 chunks;
+ u64 prev_msr;
+};
+
+/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which cpus share this resource
+ * @rmid_busy_llc:
+ * bitmap of which limbo RMIDs are above threshold
+ * @mbm_total: saved state for MBM total bandwidth
+ * @mbm_local: saved state for MBM local bandwidth
+ * @mbm_over: worker to periodically read MBM h/w counters
+ * @cqm_limbo: worker to periodically read CQM h/w counters
+ * @mbm_work_cpu:
+ * worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ * worker cpu for CQM h/w counters
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+ unsigned long *rmid_busy_llc;
+ struct mbm_state *mbm_total;
+ struct mbm_state *mbm_local;
+ struct delayed_work mbm_over;
+ struct delayed_work cqm_limbo;
+ int mbm_work_cpu;
+ int cqm_work_cpu;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res: The resource to use
+ * @low: Beginning index from base MSR
+ * @high: End index
+ */
+struct msr_param {
+ struct rdt_resource *res;
+ int low;
+ int high;
+};
+
+/**
+ * struct rdt_cache - Cache allocation related data
+ * @cbm_len: Length of the cache bit mask
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * @cbm_idx_mult: Multiplier of CBM index
+ * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
+ * closid * cbm_idx_multi + cbm_idx_offset
+ * in a cache bit mask
+ * @shareable_bits: Bitmask of shareable resource with other
+ * executing entities
+ */
+struct rdt_cache {
+ unsigned int cbm_len;
+ unsigned int min_cbm_bits;
+ unsigned int cbm_idx_mult;
+ unsigned int cbm_idx_offset;
+ unsigned int shareable_bits;
+};
+
+/**
+ * struct rdt_membw - Memory bandwidth allocation related data
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * representation for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user can request
+ * @bw_gran: Granularity at which the memory bandwidth is allocated
+ * @delay_linear: True if memory B/W delay is in linear scale
+ * @mb_map: Mapping of memory B/W percentage to memory B/W delay
+ */
+struct rdt_membw {
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+};
+
+static inline bool is_llc_occupancy_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
+}
+
+static inline bool is_mbm_total_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
+}
+
+static inline bool is_mbm_local_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
+}
+
+static inline bool is_mbm_enabled(void)
+{
+ return (is_mbm_total_enabled() || is_mbm_local_enabled());
+}
+
+static inline bool is_mbm_event(int e)
+{
+ return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
+ e <= QOS_L3_MBM_LOCAL_EVENT_ID);
+}
+
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @rid: The index of the resource
+ * @alloc_enabled: Is allocation enabled on this machine
+ * @mon_enabled: Is monitoring enabled for this feature
+ * @alloc_capable: Is allocation available on this machine
+ * @mon_capable: Is monitor feature available on this machine
+ * @name: Name to use in "schemata" file
+ * @num_closid: Number of CLOSIDs available
+ * @cache_level: Which cache level defines scope of this resource
+ * @default_ctrl: Specifies default cache cbm or memory B/W percent.
+ * @msr_base: Base MSR address for CBMs
+ * @msr_update: Function pointer to update QOS MSRs
+ * @data_width: Character width of data when displaying
+ * @domains: All domains for this resource
+ * @cache: Cache allocation related data
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse control values
+ * @evt_list: List of monitoring events
+ * @num_rmid: Number of RMIDs available
+ * @mon_scale: cqm counter * mon_scale = occupancy in bytes
+ * @fflags: flags to choose base and info files
+ */
+struct rdt_resource {
+ int rid;
+ bool alloc_enabled;
+ bool mon_enabled;
+ bool alloc_capable;
+ bool mon_capable;
+ char *name;
+ int num_closid;
+ int cache_level;
+ u32 default_ctrl;
+ unsigned int msr_base;
+ void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
+ int data_width;
+ struct list_head domains;
+ struct rdt_cache cache;
+ struct rdt_membw membw;
+ const char *format_str;
+ int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
+ struct rdt_domain *d);
+ struct list_head evt_list;
+ int num_rmid;
+ unsigned int mon_scale;
+ unsigned long fflags;
+};
+
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+
+extern struct mutex rdtgroup_mutex;
+
+extern struct rdt_resource rdt_resources_all[];
+extern struct rdtgroup rdtgroup_default;
+DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+
+int __init rdtgroup_init(void);
+
+enum {
+ RDT_RESOURCE_L3,
+ RDT_RESOURCE_L3DATA,
+ RDT_RESOURCE_L3CODE,
+ RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
+
+ /* Must be the last */
+ RDT_NUM_RESOURCES,
+};
+
+#define for_each_capable_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->alloc_capable || r->mon_capable)
+
+#define for_each_alloc_capable_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->alloc_capable)
+
+#define for_each_mon_capable_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->mon_capable)
+
+#define for_each_alloc_enabled_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->alloc_enabled)
+
+#define for_each_mon_enabled_rdt_resource(r) \
+ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
+ r++) \
+ if (r->mon_enabled)
+
+/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
+union cpuid_0x10_1_eax {
+ struct {
+ unsigned int cbm_len:5;
+ } split;
+ unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+ struct {
+ unsigned int max_delay:12;
+ } split;
+ unsigned int full;
+};
+
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
+ struct {
+ unsigned int cos_max:16;
+ } split;
+ unsigned int full;
+};
+
+void rdt_ctrl_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
+struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
+ struct list_head **pos);
+ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off);
+int rdtgroup_schemata_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v);
+struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+int alloc_rmid(void);
+void free_rmid(u32 rmid);
+int rdt_get_mon_l3_config(struct rdt_resource *r);
+void mon_event_count(void *info);
+int rdtgroup_mondata_show(struct seq_file *m, void *arg);
+void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+ unsigned int dom_id);
+void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+ struct rdt_domain *d);
+void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
+ struct rdtgroup *rdtgrp, int evtid, int first);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+ unsigned long delay_ms);
+void mbm_handle_overflow(struct work_struct *work);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
+
+#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 406d7a6532f9..f6ea94f8954a 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -26,7 +26,7 @@
#include <linux/kernfs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
-#include <asm/intel_rdt.h>
+#include "intel_rdt.h"
/*
* Check whether MBA bandwidth percentage value is correct. The value is
@@ -192,7 +192,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
{
struct rdt_resource *r;
- for_each_enabled_rdt_resource(r) {
+ for_each_alloc_enabled_rdt_resource(r) {
if (!strcmp(resname, r->name) && closid < r->num_closid)
return parse_line(tok, r);
}
@@ -221,7 +221,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
closid = rdtgrp->closid;
- for_each_enabled_rdt_resource(r) {
+ for_each_alloc_enabled_rdt_resource(r) {
list_for_each_entry(dom, &r->domains, list)
dom->have_new_ctrl = false;
}
@@ -237,7 +237,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out;
}
- for_each_enabled_rdt_resource(r) {
+ for_each_alloc_enabled_rdt_resource(r) {
ret = update_domains(r, closid);
if (ret)
goto out;
@@ -269,12 +269,13 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
{
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- int closid, ret = 0;
+ int ret = 0;
+ u32 closid;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
closid = rdtgrp->closid;
- for_each_enabled_rdt_resource(r) {
+ for_each_alloc_enabled_rdt_resource(r) {
if (closid < r->num_closid)
show_doms(s, r, closid);
}
@@ -284,3 +285,57 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
rdtgroup_kn_unlock(of->kn);
return ret;
}
+
+void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
+ struct rdtgroup *rdtgrp, int evtid, int first)
+{
+ /*
+ * setup the parameters to send to the IPI to read the data.
+ */
+ rr->rgrp = rdtgrp;
+ rr->evtid = evtid;
+ rr->d = d;
+ rr->val = 0;
+ rr->first = first;
+
+ smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+}
+
+int rdtgroup_mondata_show(struct seq_file *m, void *arg)
+{
+ struct kernfs_open_file *of = m->private;
+ u32 resid, evtid, domid;
+ struct rdtgroup *rdtgrp;
+ struct rdt_resource *r;
+ union mon_data_bits md;
+ struct rdt_domain *d;
+ struct rmid_read rr;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+
+ md.priv = of->kn->priv;
+ resid = md.u.rid;
+ domid = md.u.domid;
+ evtid = md.u.evtid;
+
+ r = &rdt_resources_all[resid];
+ d = rdt_find_domain(r, domid, NULL);
+ if (!d) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ mon_event_read(&rr, d, rdtgrp, evtid, false);
+
+ if (rr.val & RMID_VAL_ERROR)
+ seq_puts(m, "Error\n");
+ else if (rr.val & RMID_VAL_UNAVAIL)
+ seq_puts(m, "Unavailable\n");
+ else
+ seq_printf(m, "%llu\n", rr.val * r->mon_scale);
+
+out:
+ rdtgroup_kn_unlock(of->kn);
+ return ret;
+}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
new file mode 100644
index 000000000000..30827510094b
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -0,0 +1,499 @@
+/*
+ * Resource Director Technology(RDT)
+ * - Monitoring code
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ * Vikas Shivappa <vikas.shivappa@intel.com>
+ *
+ * This replaces the cqm.c based on perf but we reuse a lot of
+ * code and datastructures originally from Peter Zijlstra and Matt Fleming.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * More information about RDT be found in the Intel (R) x86 Architecture
+ * Software Developer Manual June 2016, volume 3, section 17.17.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "intel_rdt.h"
+
+#define MSR_IA32_QM_CTR 0x0c8e
+#define MSR_IA32_QM_EVTSEL 0x0c8d
+
+struct rmid_entry {
+ u32 rmid;
+ int busy;
+ struct list_head list;
+};
+
+/**
+ * @rmid_free_lru A least recently used list of free RMIDs
+ * These RMIDs are guaranteed to have an occupancy less than the
+ * threshold occupancy
+ */
+static LIST_HEAD(rmid_free_lru);
+
+/**
+ * @rmid_limbo_count count of currently unused but (potentially)
+ * dirty RMIDs.
+ * This counts RMIDs that no one is currently using but that
+ * may have a occupancy value > intel_cqm_threshold. User can change
+ * the threshold occupancy value.
+ */
+unsigned int rmid_limbo_count;
+
+/**
+ * @rmid_entry - The entry in the limbo and free lists.
+ */
+static struct rmid_entry *rmid_ptrs;
+
+/*
+ * Global boolean for rdt_monitor which is true if any
+ * resource monitoring is enabled.
+ */
+bool rdt_mon_capable;
+
+/*
+ * Global to indicate which monitoring events are enabled.
+ */
+unsigned int rdt_mon_features;
+
+/*
+ * This is the threshold cache occupancy at which we will consider an
+ * RMID available for re-allocation.
+ */
+unsigned int intel_cqm_threshold;
+
+static inline struct rmid_entry *__rmid_entry(u32 rmid)
+{
+ struct rmid_entry *entry;
+
+ entry = &rmid_ptrs[rmid];
+ WARN_ON(entry->rmid != rmid);
+
+ return entry;
+}
+
+static u64 __rmid_read(u32 rmid, u32 eventid)
+{
+ u64 val;
+
+ /*
+ * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
+ * with a valid event code for supported resource type and the bits
+ * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
+ * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
+ * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
+ * are error bits.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, val);
+
+ return val;
+}
+
+static bool rmid_dirty(struct rmid_entry *entry)
+{
+ u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+
+ return val >= intel_cqm_threshold;
+}
+
+/*
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
+ */
+void __check_limbo(struct rdt_domain *d, bool force_free)
+{
+ struct rmid_entry *entry;
+ struct rdt_resource *r;
+ u32 crmid = 1, nrmid;
+
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+ /*
+ * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+ * are marked as busy for occupancy < threshold. If the occupancy
+ * is less than the threshold decrement the busy counter of the
+ * RMID and move it to the free list when the counter reaches 0.
+ */
+ for (;;) {
+ nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
+ if (nrmid >= r->num_rmid)
+ break;
+
+ entry = __rmid_entry(nrmid);
+ if (force_free || !rmid_dirty(entry)) {
+ clear_bit(entry->rmid, d->rmid_busy_llc);
+ if (!--entry->busy) {
+ rmid_limbo_count--;
+ list_add_tail(&entry->list, &rmid_free_lru);
+ }
+ }
+ crmid = nrmid + 1;
+ }
+}
+
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+{
+ return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+}
+
+/*
+ * As of now the RMIDs allocation is global.
+ * However we keep track of which packages the RMIDs
+ * are used to optimize the limbo list management.
+ */
+int alloc_rmid(void)
+{
+ struct rmid_entry *entry;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (list_empty(&rmid_free_lru))
+ return rmid_limbo_count ? -EBUSY : -ENOSPC;
+
+ entry = list_first_entry(&rmid_free_lru,
+ struct rmid_entry, list);
+ list_del(&entry->list);
+
+ return entry->rmid;
+}
+
+static void add_rmid_to_limbo(struct rmid_entry *entry)
+{
+ struct rdt_resource *r;
+ struct rdt_domain *d;
+ int cpu;
+ u64 val;
+
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+ entry->busy = 0;
+ cpu = get_cpu();
+ list_for_each_entry(d, &r->domains, list) {
+ if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
+ val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+ if (val <= intel_cqm_threshold)
+ continue;
+ }
+
+ /*
+ * For the first limbo RMID in the domain,
+ * setup up the limbo worker.
+ */
+ if (!has_busy_rmid(r, d))
+ cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
+ set_bit(entry->rmid, d->rmid_busy_llc);
+ entry->busy++;
+ }
+ put_cpu();
+
+ if (entry->busy)
+ rmid_limbo_count++;
+ else
+ list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+void free_rmid(u32 rmid)
+{
+ struct rmid_entry *entry;
+
+ if (!rmid)
+ return;
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ entry = __rmid_entry(rmid);
+
+ if (is_llc_occupancy_enabled())
+ add_rmid_to_limbo(entry);
+ else
+ list_add_tail(&entry->list, &rmid_free_lru);
+}
+
+static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+{
+ u64 chunks, shift, tval;
+ struct mbm_state *m;
+
+ tval = __rmid_read(rmid, rr->evtid);
+ if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
+ rr->val = tval;
+ return -EINVAL;
+ }
+ switch (rr->evtid) {
+ case QOS_L3_OCCUP_EVENT_ID:
+ rr->val += tval;
+ return 0;
+ case QOS_L3_MBM_TOTAL_EVENT_ID:
+ m = &rr->d->mbm_total[rmid];
+ break;
+ case QOS_L3_MBM_LOCAL_EVENT_ID:
+ m = &rr->d->mbm_local[rmid];
+ break;
+ default:
+ /*
+ * Code would never reach here because
+ * an invalid event id would fail the __rmid_read.
+ */
+ return -EINVAL;
+ }
+
+ if (rr->first) {
+ m->prev_msr = tval;
+ m->chunks = 0;
+ return 0;
+ }
+
+ shift = 64 - MBM_CNTR_WIDTH;
+ chunks = (tval << shift) - (m->prev_msr << shift);
+ chunks >>= shift;
+ m->chunks += chunks;
+ m->prev_msr = tval;
+
+ rr->val += m->chunks;
+ return 0;
+}
+
+/*
+ * This is called via IPI to read the CQM/MBM counters
+ * on a domain.
+ */
+void mon_event_count(void *info)
+{
+ struct rdtgroup *rdtgrp, *entry;
+ struct rmid_read *rr = info;
+ struct list_head *head;
+
+ rdtgrp = rr->rgrp;
+
+ if (__mon_event_count(rdtgrp->mon.rmid, rr))
+ return;
+
+ /*
+ * For Ctrl groups read data from child monitor groups.
+ */
+ head = &rdtgrp->mon.crdtgrp_list;
+
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ if (__mon_event_count(entry->mon.rmid, rr))
+ return;
+ }
+ }
+}
+
+static void mbm_update(struct rdt_domain *d, int rmid)
+{
+ struct rmid_read rr;
+
+ rr.first = false;
+ rr.d = d;
+
+ /*
+ * This is protected from concurrent reads from user
+ * as both the user and we hold the global mutex.
+ */
+ if (is_mbm_total_enabled()) {
+ rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
+ __mon_event_count(rmid, &rr);
+ }
+ if (is_mbm_local_enabled()) {
+ rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+ __mon_event_count(rmid, &rr);
+ }
+}
+
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+ unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+ int cpu = smp_processor_id();
+ struct rdt_resource *r;
+ struct rdt_domain *d;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+ d = get_domain_from_cpu(cpu, r);
+
+ if (!d) {
+ pr_warn_once("Failure to get domain for limbo worker\n");
+ goto out_unlock;
+ }
+
+ __check_limbo(d, false);
+
+ if (has_busy_rmid(r, d))
+ schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+ unsigned long delay = msecs_to_jiffies(delay_ms);
+ struct rdt_resource *r;
+ int cpu;
+
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+ cpu = cpumask_any(&dom->cpu_mask);
+ dom->cqm_work_cpu = cpu;
+
+ schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
+void mbm_handle_overflow(struct work_struct *work)
+{
+ unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
+ struct rdtgroup *prgrp, *crgrp;
+ int cpu = smp_processor_id();
+ struct list_head *head;
+ struct rdt_domain *d;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if (!static_branch_likely(&rdt_enable_key))
+ goto out_unlock;
+
+ d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
+ if (!d)
+ goto out_unlock;
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ mbm_update(d, prgrp->mon.rmid);
+
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list)
+ mbm_update(d, crgrp->mon.rmid);
+ }
+
+ schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+ unsigned long delay = msecs_to_jiffies(delay_ms);
+ int cpu;
+
+ if (!static_branch_likely(&rdt_enable_key))
+ return;
+ cpu = cpumask_any(&dom->cpu_mask);
+ dom->mbm_work_cpu = cpu;
+ schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
+}
+
+static int dom_data_init(struct rdt_resource *r)
+{
+ struct rmid_entry *entry = NULL;
+ int i, nr_rmids;
+
+ nr_rmids = r->num_rmid;
+ rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+ if (!rmid_ptrs)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_rmids; i++) {
+ entry = &rmid_ptrs[i];
+ INIT_LIST_HEAD(&entry->list);
+
+ entry->rmid = i;
+ list_add_tail(&entry->list, &rmid_free_lru);
+ }
+
+ /*
+ * RMID 0 is special and is always allocated. It's used for all
+ * tasks that are not monitored.
+ */
+ entry = __rmid_entry(0);
+ list_del(&entry->list);
+
+ return 0;
+}
+
+static struct mon_evt llc_occupancy_event = {
+ .name = "llc_occupancy",
+ .evtid = QOS_L3_OCCUP_EVENT_ID,
+};
+
+static struct mon_evt mbm_total_event = {
+ .name = "mbm_total_bytes",
+ .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
+};
+
+static struct mon_evt mbm_local_event = {
+ .name = "mbm_local_bytes",
+ .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
+};
+
+/*
+ * Initialize the event list for the resource.
+ *
+ * Note that MBM events are also part of RDT_RESOURCE_L3 resource
+ * because as per the SDM the total and local memory bandwidth
+ * are enumerated as part of L3 monitoring.
+ */
+static void l3_mon_evt_init(struct rdt_resource *r)
+{
+ INIT_LIST_HEAD(&r->evt_list);
+
+ if (is_llc_occupancy_enabled())
+ list_add_tail(&llc_occupancy_event.list, &r->evt_list);
+ if (is_mbm_total_enabled())
+ list_add_tail(&mbm_total_event.list, &r->evt_list);
+ if (is_mbm_local_enabled())
+ list_add_tail(&mbm_local_event.list, &r->evt_list);
+}
+
+int rdt_get_mon_l3_config(struct rdt_resource *r)
+{
+ int ret;
+
+ r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
+ r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+
+ /*
+ * A reasonable upper limit on the max threshold is the number
+ * of lines tagged per RMID if all RMIDs have the same number of
+ * lines tagged in the LLC.
+ *
+ * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+ */
+ intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
+
+ /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */
+ intel_cqm_threshold /= r->mon_scale;
+
+ ret = dom_data_init(r);
+ if (ret)
+ return ret;
+
+ l3_mon_evt_init(r);
+
+ r->mon_capable = true;
+ r->mon_enabled = true;
+
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 9257bd9dc664..a869d4a073c5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -32,17 +32,25 @@
#include <uapi/linux/magic.h>
-#include <asm/intel_rdt.h>
-#include <asm/intel_rdt_common.h>
+#include <asm/intel_rdt_sched.h>
+#include "intel_rdt.h"
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
-struct kernfs_root *rdt_root;
+DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
/* Kernel fs node for "info" directory under root */
static struct kernfs_node *kn_info;
+/* Kernel fs node for "mon_groups" directory under root */
+static struct kernfs_node *kn_mongrp;
+
+/* Kernel fs node for "mon_data" directory under root */
+static struct kernfs_node *kn_mondata;
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -66,7 +74,7 @@ static void closid_init(void)
int rdt_min_closid = 32;
/* Compute rdt_min_closid across all resources */
- for_each_enabled_rdt_resource(r)
+ for_each_alloc_enabled_rdt_resource(r)
rdt_min_closid = min(rdt_min_closid, r->num_closid);
closid_free_map = BIT_MASK(rdt_min_closid) - 1;
@@ -75,9 +83,9 @@ static void closid_init(void)
closid_free_map &= ~1;
}
-int closid_alloc(void)
+static int closid_alloc(void)
{
- int closid = ffs(closid_free_map);
+ u32 closid = ffs(closid_free_map);
if (closid == 0)
return -ENOSPC;
@@ -125,28 +133,6 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
return 0;
}
-static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
- int len)
-{
- struct rftype *rft;
- int ret;
-
- lockdep_assert_held(&rdtgroup_mutex);
-
- for (rft = rfts; rft < rfts + len; rft++) {
- ret = rdtgroup_add_file(kn, rft);
- if (ret)
- goto error;
- }
-
- return 0;
-error:
- pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
- while (--rft >= rfts)
- kernfs_remove_by_name(kn, rft->name);
- return ret;
-}
-
static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
@@ -174,6 +160,11 @@ static struct kernfs_ops rdtgroup_kf_single_ops = {
.seq_show = rdtgroup_seqfile_show,
};
+static struct kernfs_ops kf_mondata_ops = {
+ .atomic_write_len = PAGE_SIZE,
+ .seq_show = rdtgroup_mondata_show,
+};
+
static bool is_cpu_list(struct kernfs_open_file *of)
{
struct rftype *rft = of->kn->priv;
@@ -203,13 +194,18 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
/*
* This is safe against intel_rdt_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
- * from rdt_update_closid() is proteced against __switch_to() because
+ * from update_closid_rmid() is proteced against __switch_to() because
* preemption is disabled.
*/
-static void rdt_update_cpu_closid(void *closid)
+static void update_cpu_closid_rmid(void *info)
{
- if (closid)
- this_cpu_write(cpu_closid, *(int *)closid);
+ struct rdtgroup *r = info;
+
+ if (r) {
+ this_cpu_write(pqr_state.default_closid, r->closid);
+ this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
+ }
+
/*
* We cannot unconditionally write the MSR because the current
* executing task might have its own closid selected. Just reuse
@@ -221,28 +217,128 @@ static void rdt_update_cpu_closid(void *closid)
/*
* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
*
- * Per task closids must have been set up before calling this function.
- *
- * The per cpu closids are updated with the smp function call, when @closid
- * is not NULL. If @closid is NULL then all affected percpu closids must
- * have been set up before calling this function.
+ * Per task closids/rmids must have been set up before calling this function.
*/
static void
-rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
+update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
{
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_update_cpu_closid(closid);
- smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
+ update_cpu_closid_rmid(r);
+ smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
put_cpu();
}
+static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask)
+{
+ struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
+ struct list_head *head;
+
+ /* Check whether cpus belong to parent ctrl group */
+ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
+ if (cpumask_weight(tmpmask))
+ return -EINVAL;
+
+ /* Check whether cpus are dropped from this group */
+ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+ if (cpumask_weight(tmpmask)) {
+ /* Give any dropped cpus to parent rdtgroup */
+ cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
+ update_closid_rmid(tmpmask, prgrp);
+ }
+
+ /*
+ * If we added cpus, remove them from previous group that owned them
+ * and update per-cpu rmid
+ */
+ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+ if (cpumask_weight(tmpmask)) {
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ if (crgrp == rdtgrp)
+ continue;
+ cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
+ tmpmask);
+ }
+ update_closid_rmid(tmpmask, rdtgrp);
+ }
+
+ /* Done pushing/pulling - update this group with new mask */
+ cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+ return 0;
+}
+
+static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
+{
+ struct rdtgroup *crgrp;
+
+ cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
+ /* update the child mon group masks as well*/
+ list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
+ cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
+}
+
+static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
+ cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
+{
+ struct rdtgroup *r, *crgrp;
+ struct list_head *head;
+
+ /* Check whether cpus are dropped from this group */
+ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
+ if (cpumask_weight(tmpmask)) {
+ /* Can't drop from default group */
+ if (rdtgrp == &rdtgroup_default)
+ return -EINVAL;
+
+ /* Give any dropped cpus to rdtgroup_default */
+ cpumask_or(&rdtgroup_default.cpu_mask,
+ &rdtgroup_default.cpu_mask, tmpmask);
+ update_closid_rmid(tmpmask, &rdtgroup_default);
+ }
+
+ /*
+ * If we added cpus, remove them from previous group and
+ * the prev group's child groups that owned them
+ * and update per-cpu closid/rmid.
+ */
+ cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
+ if (cpumask_weight(tmpmask)) {
+ list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
+ if (r == rdtgrp)
+ continue;
+ cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
+ if (cpumask_weight(tmpmask1))
+ cpumask_rdtgrp_clear(r, tmpmask1);
+ }
+ update_closid_rmid(tmpmask, rdtgrp);
+ }
+
+ /* Done pushing/pulling - update this group with new mask */
+ cpumask_copy(&rdtgrp->cpu_mask, newmask);
+
+ /*
+ * Clear child mon group masks since there is a new parent mask
+ * now and update the rmid for the cpus the child lost.
+ */
+ head = &rdtgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
+ update_closid_rmid(tmpmask, rdtgrp);
+ cpumask_clear(&crgrp->cpu_mask);
+ }
+
+ return 0;
+}
+
static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
- cpumask_var_t tmpmask, newmask;
- struct rdtgroup *rdtgrp, *r;
+ cpumask_var_t tmpmask, newmask, tmpmask1;
+ struct rdtgroup *rdtgrp;
int ret;
if (!buf)
@@ -254,6 +350,11 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
free_cpumask_var(tmpmask);
return -ENOMEM;
}
+ if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
+ free_cpumask_var(tmpmask);
+ free_cpumask_var(newmask);
+ return -ENOMEM;
+ }
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -276,41 +377,18 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
goto unlock;
}
- /* Check whether cpus are dropped from this group */
- cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
- if (cpumask_weight(tmpmask)) {
- /* Can't drop from default group */
- if (rdtgrp == &rdtgroup_default) {
- ret = -EINVAL;
- goto unlock;
- }
- /* Give any dropped cpus to rdtgroup_default */
- cpumask_or(&rdtgroup_default.cpu_mask,
- &rdtgroup_default.cpu_mask, tmpmask);
- rdt_update_closid(tmpmask, &rdtgroup_default.closid);
- }
-
- /*
- * If we added cpus, remove them from previous group that owned them
- * and update per-cpu closid
- */
- cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
- if (cpumask_weight(tmpmask)) {
- list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
- if (r == rdtgrp)
- continue;
- cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
- }
- rdt_update_closid(tmpmask, &rdtgrp->closid);
- }
-
- /* Done pushing/pulling - update this group with new mask */
- cpumask_copy(&rdtgrp->cpu_mask, newmask);
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
+ else if (rdtgrp->type == RDTMON_GROUP)
+ ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
+ else
+ ret = -EINVAL;
unlock:
rdtgroup_kn_unlock(of->kn);
free_cpumask_var(tmpmask);
free_cpumask_var(newmask);
+ free_cpumask_var(tmpmask1);
return ret ?: nbytes;
}
@@ -336,6 +414,7 @@ static void move_myself(struct callback_head *head)
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
(rdtgrp->flags & RDT_DELETED)) {
current->closid = 0;
+ current->rmid = 0;
kfree(rdtgrp);
}
@@ -374,7 +453,20 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
} else {
- tsk->closid = rdtgrp->closid;
+ /*
+ * For ctrl_mon groups move both closid and rmid.
+ * For monitor groups, can move the tasks only from
+ * their parent CTRL group.
+ */
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ tsk->closid = rdtgrp->closid;
+ tsk->rmid = rdtgrp->mon.rmid;
+ } else if (rdtgrp->type == RDTMON_GROUP) {
+ if (rdtgrp->mon.parent->closid == tsk->closid)
+ tsk->rmid = rdtgrp->mon.rmid;
+ else
+ ret = -EINVAL;
+ }
}
return ret;
}
@@ -454,7 +546,8 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
rcu_read_lock();
for_each_process_thread(p, t) {
- if (t->closid == r->closid)
+ if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
+ (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
seq_printf(s, "%d\n", t->pid);
}
rcu_read_unlock();
@@ -476,39 +569,6 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
return ret;
}
-/* Files in each rdtgroup */
-static struct rftype rdtgroup_base_files[] = {
- {
- .name = "cpus",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_cpus_write,
- .seq_show = rdtgroup_cpus_show,
- },
- {
- .name = "cpus_list",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_cpus_write,
- .seq_show = rdtgroup_cpus_show,
- .flags = RFTYPE_FLAGS_CPUS_LIST,
- },
- {
- .name = "tasks",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_tasks_write,
- .seq_show = rdtgroup_tasks_show,
- },
- {
- .name = "schemata",
- .mode = 0644,
- .kf_ops = &rdtgroup_kf_single_ops,
- .write = rdtgroup_schemata_write,
- .seq_show = rdtgroup_schemata_show,
- },
-};
-
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -536,6 +596,15 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
return 0;
}
+static int rdt_shareable_bits_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%x\n", r->cache.shareable_bits);
+ return 0;
+}
+
static int rdt_min_bw_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -545,6 +614,28 @@ static int rdt_min_bw_show(struct kernfs_open_file *of,
return 0;
}
+static int rdt_num_rmids_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->num_rmid);
+
+ return 0;
+}
+
+static int rdt_mon_features_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+ struct mon_evt *mevt;
+
+ list_for_each_entry(mevt, &r->evt_list, list)
+ seq_printf(seq, "%s\n", mevt->name);
+
+ return 0;
+}
+
static int rdt_bw_gran_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -563,74 +654,200 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of,
return 0;
}
+static int max_threshold_occ_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale);
+
+ return 0;
+}
+
+static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+ unsigned int bytes;
+ int ret;
+
+ ret = kstrtouint(buf, 0, &bytes);
+ if (ret)
+ return ret;
+
+ if (bytes > (boot_cpu_data.x86_cache_size * 1024))
+ return -EINVAL;
+
+ intel_cqm_threshold = bytes / r->mon_scale;
+
+ return nbytes;
+}
+
/* rdtgroup information files for one cache resource. */
-static struct rftype res_cache_info_files[] = {
+static struct rftype res_common_files[] = {
{
.name = "num_closids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_num_closids_show,
+ .fflags = RF_CTRL_INFO,
+ },
+ {
+ .name = "mon_features",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_mon_features_show,
+ .fflags = RF_MON_INFO,
+ },
+ {
+ .name = "num_rmids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_rmids_show,
+ .fflags = RF_MON_INFO,
},
{
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_default_ctrl_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "min_cbm_bits",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_min_cbm_bits_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
-};
-
-/* rdtgroup information files for memory bandwidth. */
-static struct rftype res_mba_info_files[] = {
{
- .name = "num_closids",
+ .name = "shareable_bits",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_num_closids_show,
+ .seq_show = rdt_shareable_bits_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "min_bandwidth",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_min_bw_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "bandwidth_gran",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_bw_gran_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "delay_linear",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_delay_linear_show,
+ .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
+ },
+ {
+ .name = "max_threshold_occupancy",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = max_threshold_occ_write,
+ .seq_show = max_threshold_occ_show,
+ .fflags = RF_MON_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "cpus",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_cpus_write,
+ .seq_show = rdtgroup_cpus_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "cpus_list",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_cpus_write,
+ .seq_show = rdtgroup_cpus_show,
+ .flags = RFTYPE_FLAGS_CPUS_LIST,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "tasks",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_tasks_write,
+ .seq_show = rdtgroup_tasks_show,
+ .fflags = RFTYPE_BASE,
+ },
+ {
+ .name = "schemata",
+ .mode = 0644,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .write = rdtgroup_schemata_write,
+ .seq_show = rdtgroup_schemata_show,
+ .fflags = RF_CTRL_BASE,
},
};
-void rdt_get_mba_infofile(struct rdt_resource *r)
+static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
{
- r->info_files = res_mba_info_files;
- r->nr_info_files = ARRAY_SIZE(res_mba_info_files);
+ struct rftype *rfts, *rft;
+ int ret, len;
+
+ rfts = res_common_files;
+ len = ARRAY_SIZE(res_common_files);
+
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ for (rft = rfts; rft < rfts + len; rft++) {
+ if ((fflags & rft->fflags) == rft->fflags) {
+ ret = rdtgroup_add_file(kn, rft);
+ if (ret)
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
+ while (--rft >= rfts) {
+ if ((fflags & rft->fflags) == rft->fflags)
+ kernfs_remove_by_name(kn, rft->name);
+ }
+ return ret;
}
-void rdt_get_cache_infofile(struct rdt_resource *r)
+static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
+ unsigned long fflags)
{
- r->info_files = res_cache_info_files;
- r->nr_info_files = ARRAY_SIZE(res_cache_info_files);
+ struct kernfs_node *kn_subdir;
+ int ret;
+
+ kn_subdir = kernfs_create_dir(kn_info, name,
+ kn_info->mode, r);
+ if (IS_ERR(kn_subdir))
+ return PTR_ERR(kn_subdir);
+
+ kernfs_get(kn_subdir);
+ ret = rdtgroup_kn_set_ugid(kn_subdir);
+ if (ret)
+ return ret;
+
+ ret = rdtgroup_add_files(kn_subdir, fflags);
+ if (!ret)
+ kernfs_activate(kn_subdir);
+
+ return ret;
}
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
- struct kernfs_node *kn_subdir;
- struct rftype *res_info_files;
struct rdt_resource *r;
- int ret, len;
+ unsigned long fflags;
+ char name[32];
+ int ret;
/* create the directory */
kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
@@ -638,25 +855,19 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
return PTR_ERR(kn_info);
kernfs_get(kn_info);
- for_each_enabled_rdt_resource(r) {
- kn_subdir = kernfs_create_dir(kn_info, r->name,
- kn_info->mode, r);
- if (IS_ERR(kn_subdir)) {
- ret = PTR_ERR(kn_subdir);
- goto out_destroy;
- }
- kernfs_get(kn_subdir);
- ret = rdtgroup_kn_set_ugid(kn_subdir);
+ for_each_alloc_enabled_rdt_resource(r) {
+ fflags = r->fflags | RF_CTRL_INFO;
+ ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
if (ret)
goto out_destroy;
+ }
- res_info_files = r->info_files;
- len = r->nr_info_files;
-
- ret = rdtgroup_add_files(kn_subdir, res_info_files, len);
+ for_each_mon_enabled_rdt_resource(r) {
+ fflags = r->fflags | RF_MON_INFO;
+ sprintf(name, "%s_MON", r->name);
+ ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
if (ret)
goto out_destroy;
- kernfs_activate(kn_subdir);
}
/*
@@ -678,6 +889,39 @@ out_destroy:
return ret;
}
+static int
+mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
+ char *name, struct kernfs_node **dest_kn)
+{
+ struct kernfs_node *kn;
+ int ret;
+
+ /* create the directory */
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ if (dest_kn)
+ *dest_kn = kn;
+
+ /*
+ * This extra ref will be put in kernfs_remove() and guarantees
+ * that @rdtgrp->kn is always accessible.
+ */
+ kernfs_get(kn);
+
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+
+ kernfs_activate(kn);
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
static void l3_qos_cfg_update(void *arg)
{
bool *enable = arg;
@@ -718,14 +962,15 @@ static int cdp_enable(void)
struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
int ret;
- if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
+ if (!r_l3->alloc_capable || !r_l3data->alloc_capable ||
+ !r_l3code->alloc_capable)
return -EINVAL;
ret = set_l3_qos_cfg(r_l3, true);
if (!ret) {
- r_l3->enabled = false;
- r_l3data->enabled = true;
- r_l3code->enabled = true;
+ r_l3->alloc_enabled = false;
+ r_l3data->alloc_enabled = true;
+ r_l3code->alloc_enabled = true;
}
return ret;
}
@@ -734,11 +979,11 @@ static void cdp_disable(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
- r->enabled = r->capable;
+ r->alloc_enabled = r->alloc_capable;
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
- rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
- rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
+ if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) {
+ rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false;
+ rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false;
set_l3_qos_cfg(r, false);
}
}
@@ -823,10 +1068,16 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
}
}
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+ struct rdtgroup *prgrp,
+ struct kernfs_node **mon_data_kn);
+
static struct dentry *rdt_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
+ struct rdt_domain *dom;
+ struct rdt_resource *r;
struct dentry *dentry;
int ret;
@@ -853,15 +1104,54 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
goto out_cdp;
}
+ if (rdt_mon_capable) {
+ ret = mongroup_create_dir(rdtgroup_default.kn,
+ NULL, "mon_groups",
+ &kn_mongrp);
+ if (ret) {
+ dentry = ERR_PTR(ret);
+ goto out_info;
+ }
+ kernfs_get(kn_mongrp);
+
+ ret = mkdir_mondata_all(rdtgroup_default.kn,
+ &rdtgroup_default, &kn_mondata);
+ if (ret) {
+ dentry = ERR_PTR(ret);
+ goto out_mongrp;
+ }
+ kernfs_get(kn_mondata);
+ rdtgroup_default.mon.mon_data_kn = kn_mondata;
+ }
+
dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
- goto out_destroy;
+ goto out_mondata;
+
+ if (rdt_alloc_capable)
+ static_branch_enable(&rdt_alloc_enable_key);
+ if (rdt_mon_capable)
+ static_branch_enable(&rdt_mon_enable_key);
+
+ if (rdt_alloc_capable || rdt_mon_capable)
+ static_branch_enable(&rdt_enable_key);
+
+ if (is_mbm_enabled()) {
+ r = &rdt_resources_all[RDT_RESOURCE_L3];
+ list_for_each_entry(dom, &r->domains, list)
+ mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+ }
- static_branch_enable(&rdt_enable_key);
goto out;
-out_destroy:
+out_mondata:
+ if (rdt_mon_capable)
+ kernfs_remove(kn_mondata);
+out_mongrp:
+ if (rdt_mon_capable)
+ kernfs_remove(kn_mongrp);
+out_info:
kernfs_remove(kn_info);
out_cdp:
cdp_disable();
@@ -909,6 +1199,18 @@ static int reset_all_ctrls(struct rdt_resource *r)
return 0;
}
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (rdt_alloc_capable &&
+ (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (rdt_mon_capable &&
+ (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+}
+
/*
* Move tasks from one to the other group. If @from is NULL, then all tasks
* in the systems are moved unconditionally (used for teardown).
@@ -924,8 +1226,11 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
- if (!from || t->closid == from->closid) {
+ if (!from || is_closid_match(t, from) ||
+ is_rmid_match(t, from)) {
t->closid = to->closid;
+ t->rmid = to->mon.rmid;
+
#ifdef CONFIG_SMP
/*
* This is safe on x86 w/o barriers as the ordering
@@ -944,6 +1249,19 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
read_unlock(&tasklist_lock);
}
+static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
+{
+ struct rdtgroup *sentry, *stmp;
+ struct list_head *head;
+
+ head = &rdtgrp->mon.crdtgrp_list;
+ list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
+ free_rmid(sentry->mon.rmid);
+ list_del(&sentry->mon.crdtgrp_list);
+ kfree(sentry);
+ }
+}
+
/*
* Forcibly remove all of subdirectories under root.
*/
@@ -955,6 +1273,9 @@ static void rmdir_all_sub(void)
rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
+ /* Free any child rmids */
+ free_all_child_rdtgrp(rdtgrp);
+
/* Remove each rdtgroup other than root */
if (rdtgrp == &rdtgroup_default)
continue;
@@ -967,16 +1288,20 @@ static void rmdir_all_sub(void)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
+ free_rmid(rdtgrp->mon.rmid);
+
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
get_online_cpus();
- rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
+ update_closid_rmid(cpu_online_mask, &rdtgroup_default);
put_online_cpus();
kernfs_remove(kn_info);
+ kernfs_remove(kn_mongrp);
+ kernfs_remove(kn_mondata);
}
static void rdt_kill_sb(struct super_block *sb)
@@ -986,10 +1311,12 @@ static void rdt_kill_sb(struct super_block *sb)
mutex_lock(&rdtgroup_mutex);
/*Put everything back to default values. */
- for_each_enabled_rdt_resource(r)
+ for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
cdp_disable();
rmdir_all_sub();
+ static_branch_disable(&rdt_alloc_enable_key);
+ static_branch_disable(&rdt_mon_enable_key);
static_branch_disable(&rdt_enable_key);
kernfs_kill_sb(sb);
mutex_unlock(&rdtgroup_mutex);
@@ -1001,46 +1328,223 @@ static struct file_system_type rdt_fs_type = {
.kill_sb = rdt_kill_sb,
};
-static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
- umode_t mode)
+static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
+ void *priv)
{
- struct rdtgroup *parent, *rdtgrp;
struct kernfs_node *kn;
- int ret, closid;
+ int ret = 0;
- /* Only allow mkdir in the root directory */
- if (parent_kn != rdtgroup_default.kn)
- return -EPERM;
+ kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+ &kf_mondata_ops, priv, NULL, NULL);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
- /* Do not accept '\n' to avoid unparsable situation. */
- if (strchr(name, '\n'))
- return -EINVAL;
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret) {
+ kernfs_remove(kn);
+ return ret;
+ }
- parent = rdtgroup_kn_lock_live(parent_kn);
- if (!parent) {
- ret = -ENODEV;
- goto out_unlock;
+ return ret;
+}
+
+/*
+ * Remove all subdirectories of mon_data of ctrl_mon groups
+ * and monitor groups with given domain id.
+ */
+void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
+{
+ struct rdtgroup *prgrp, *crgrp;
+ char name[32];
+
+ if (!r->mon_enabled)
+ return;
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ sprintf(name, "mon_%s_%02d", r->name, dom_id);
+ kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+
+ list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+ kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
}
+}
- ret = closid_alloc();
- if (ret < 0)
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+ struct rdt_domain *d,
+ struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+ union mon_data_bits priv;
+ struct kernfs_node *kn;
+ struct mon_evt *mevt;
+ struct rmid_read rr;
+ char name[32];
+ int ret;
+
+ sprintf(name, "mon_%s_%02d", r->name, d->id);
+ /* create the directory */
+ kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+ if (IS_ERR(kn))
+ return PTR_ERR(kn);
+
+ /*
+ * This extra ref will be put in kernfs_remove() and guarantees
+ * that kn is always accessible.
+ */
+ kernfs_get(kn);
+ ret = rdtgroup_kn_set_ugid(kn);
+ if (ret)
+ goto out_destroy;
+
+ if (WARN_ON(list_empty(&r->evt_list))) {
+ ret = -EPERM;
+ goto out_destroy;
+ }
+
+ priv.u.rid = r->rid;
+ priv.u.domid = d->id;
+ list_for_each_entry(mevt, &r->evt_list, list) {
+ priv.u.evtid = mevt->evtid;
+ ret = mon_addfile(kn, mevt->name, priv.priv);
+ if (ret)
+ goto out_destroy;
+
+ if (is_mbm_event(mevt->evtid))
+ mon_event_read(&rr, d, prgrp, mevt->evtid, true);
+ }
+ kernfs_activate(kn);
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+/*
+ * Add all subdirectories of mon_data for "ctrl_mon" groups
+ * and "monitor" groups with given domain id.
+ */
+void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+ struct rdt_domain *d)
+{
+ struct kernfs_node *parent_kn;
+ struct rdtgroup *prgrp, *crgrp;
+ struct list_head *head;
+
+ if (!r->mon_enabled)
+ return;
+
+ list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+ parent_kn = prgrp->mon.mon_data_kn;
+ mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+
+ head = &prgrp->mon.crdtgrp_list;
+ list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
+ parent_kn = crgrp->mon.mon_data_kn;
+ mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+ }
+ }
+}
+
+static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
+ struct rdt_resource *r,
+ struct rdtgroup *prgrp)
+{
+ struct rdt_domain *dom;
+ int ret;
+
+ list_for_each_entry(dom, &r->domains, list) {
+ ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * This creates a directory mon_data which contains the monitored data.
+ *
+ * mon_data has one directory for each domain whic are named
+ * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
+ * with L3 domain looks as below:
+ * ./mon_data:
+ * mon_L3_00
+ * mon_L3_01
+ * mon_L3_02
+ * ...
+ *
+ * Each domain directory has one file per event:
+ * ./mon_L3_00/:
+ * llc_occupancy
+ *
+ */
+static int mkdir_mondata_all(struct kernfs_node *parent_kn,
+ struct rdtgroup *prgrp,
+ struct kernfs_node **dest_kn)
+{
+ struct rdt_resource *r;
+ struct kernfs_node *kn;
+ int ret;
+
+ /*
+ * Create the mon_data directory first.
+ */
+ ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ if (ret)
+ return ret;
+
+ if (dest_kn)
+ *dest_kn = kn;
+
+ /*
+ * Create the subdirectories for each domain. Note that all events
+ * in a domain like L3 are grouped into a resource whose domain is L3
+ */
+ for_each_mon_enabled_rdt_resource(r) {
+ ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
+ if (ret)
+ goto out_destroy;
+ }
+
+ return 0;
+
+out_destroy:
+ kernfs_remove(kn);
+ return ret;
+}
+
+static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
+ struct kernfs_node *prgrp_kn,
+ const char *name, umode_t mode,
+ enum rdt_group_type rtype, struct rdtgroup **r)
+{
+ struct rdtgroup *prdtgrp, *rdtgrp;
+ struct kernfs_node *kn;
+ uint files = 0;
+ int ret;
+
+ prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ if (!prdtgrp) {
+ ret = -ENODEV;
goto out_unlock;
- closid = ret;
+ }
/* allocate the rdtgroup. */
rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
if (!rdtgrp) {
ret = -ENOSPC;
- goto out_closid_free;
+ goto out_unlock;
}
- rdtgrp->closid = closid;
- list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+ *r = rdtgrp;
+ rdtgrp->mon.parent = prdtgrp;
+ rdtgrp->type = rtype;
+ INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
/* kernfs creates the directory for rdtgrp */
- kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
+ kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
- goto out_cancel_ref;
+ goto out_free_rgrp;
}
rdtgrp->kn = kn;
@@ -1056,43 +1560,211 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
if (ret)
goto out_destroy;
- ret = rdtgroup_add_files(kn, rdtgroup_base_files,
- ARRAY_SIZE(rdtgroup_base_files));
+ files = RFTYPE_BASE | RFTYPE_CTRL;
+ files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
+ ret = rdtgroup_add_files(kn, files);
if (ret)
goto out_destroy;
+ if (rdt_mon_capable) {
+ ret = alloc_rmid();
+ if (ret < 0)
+ goto out_destroy;
+ rdtgrp->mon.rmid = ret;
+
+ ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+ if (ret)
+ goto out_idfree;
+ }
kernfs_activate(kn);
- ret = 0;
- goto out_unlock;
+ /*
+ * The caller unlocks the prgrp_kn upon success.
+ */
+ return 0;
+out_idfree:
+ free_rmid(rdtgrp->mon.rmid);
out_destroy:
kernfs_remove(rdtgrp->kn);
-out_cancel_ref:
- list_del(&rdtgrp->rdtgroup_list);
+out_free_rgrp:
kfree(rdtgrp);
-out_closid_free:
- closid_free(closid);
out_unlock:
- rdtgroup_kn_unlock(parent_kn);
+ rdtgroup_kn_unlock(prgrp_kn);
return ret;
}
-static int rdtgroup_rmdir(struct kernfs_node *kn)
+static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
+{
+ kernfs_remove(rgrp->kn);
+ free_rmid(rgrp->mon.rmid);
+ kfree(rgrp);
+}
+
+/*
+ * Create a monitor group under "mon_groups" directory of a control
+ * and monitor group(ctrl_mon). This is a resource group
+ * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
+ */
+static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
+ struct kernfs_node *prgrp_kn,
+ const char *name,
+ umode_t mode)
+{
+ struct rdtgroup *rdtgrp, *prgrp;
+ int ret;
+
+ ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
+ &rdtgrp);
+ if (ret)
+ return ret;
+
+ prgrp = rdtgrp->mon.parent;
+ rdtgrp->closid = prgrp->closid;
+
+ /*
+ * Add the rdtgrp to the list of rdtgrps the parent
+ * ctrl_mon group has to track.
+ */
+ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+
+ rdtgroup_kn_unlock(prgrp_kn);
+ return ret;
+}
+
+/*
+ * These are rdtgroups created under the root directory. Can be used
+ * to allocate and monitor resources.
+ */
+static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
+ struct kernfs_node *prgrp_kn,
+ const char *name, umode_t mode)
{
- int ret, cpu, closid = rdtgroup_default.closid;
struct rdtgroup *rdtgrp;
- cpumask_var_t tmpmask;
+ struct kernfs_node *kn;
+ u32 closid;
+ int ret;
- if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
- return -ENOMEM;
+ ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
+ &rdtgrp);
+ if (ret)
+ return ret;
- rdtgrp = rdtgroup_kn_lock_live(kn);
- if (!rdtgrp) {
- ret = -EPERM;
- goto out;
+ kn = rdtgrp->kn;
+ ret = closid_alloc();
+ if (ret < 0)
+ goto out_common_fail;
+ closid = ret;
+
+ rdtgrp->closid = closid;
+ list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+
+ if (rdt_mon_capable) {
+ /*
+ * Create an empty mon_groups directory to hold the subset
+ * of tasks and cpus to monitor.
+ */
+ ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ if (ret)
+ goto out_id_free;
}
+ goto out_unlock;
+
+out_id_free:
+ closid_free(closid);
+ list_del(&rdtgrp->rdtgroup_list);
+out_common_fail:
+ mkdir_rdt_prepare_clean(rdtgrp);
+out_unlock:
+ rdtgroup_kn_unlock(prgrp_kn);
+ return ret;
+}
+
+/*
+ * We allow creating mon groups only with in a directory called "mon_groups"
+ * which is present in every ctrl_mon group. Check if this is a valid
+ * "mon_groups" directory.
+ *
+ * 1. The directory should be named "mon_groups".
+ * 2. The mon group itself should "not" be named "mon_groups".
+ * This makes sure "mon_groups" directory always has a ctrl_mon group
+ * as parent.
+ */
+static bool is_mon_groups(struct kernfs_node *kn, const char *name)
+{
+ return (!strcmp(kn->name, "mon_groups") &&
+ strcmp(name, "mon_groups"));
+}
+
+static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+ umode_t mode)
+{
+ /* Do not accept '\n' to avoid unparsable situation. */
+ if (strchr(name, '\n'))
+ return -EINVAL;
+
+ /*
+ * If the parent directory is the root directory and RDT
+ * allocation is supported, add a control and monitoring
+ * subdirectory
+ */
+ if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
+ return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
+
+ /*
+ * If RDT monitoring is supported and the parent directory is a valid
+ * "mon_groups" directory, add a monitoring subdirectory.
+ */
+ if (rdt_mon_capable && is_mon_groups(parent_kn, name))
+ return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
+
+ return -EPERM;
+}
+
+static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
+ cpumask_var_t tmpmask)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ int cpu;
+
+ /* Give any tasks back to the parent group */
+ rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
+
+ /* Update per cpu rmid of the moved CPUs first */
+ for_each_cpu(cpu, &rdtgrp->cpu_mask)
+ per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
+ /*
+ * Update the MSR on moved CPUs and CPUs which have moved
+ * task running on them.
+ */
+ cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
+ update_closid_rmid(tmpmask, NULL);
+
+ rdtgrp->flags = RDT_DELETED;
+ free_rmid(rdtgrp->mon.rmid);
+
+ /*
+ * Remove the rdtgrp from the parent ctrl_mon group's list
+ */
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_del(&rdtgrp->mon.crdtgrp_list);
+
+ /*
+ * one extra hold on this, will drop when we kfree(rdtgrp)
+ * in rdtgroup_kn_unlock()
+ */
+ kernfs_get(kn);
+ kernfs_remove(rdtgrp->kn);
+
+ return 0;
+}
+
+static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
+ cpumask_var_t tmpmask)
+{
+ int cpu;
+
/* Give any tasks back to the default group */
rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
@@ -1100,18 +1772,28 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
- /* Update per cpu closid of the moved CPUs first */
- for_each_cpu(cpu, &rdtgrp->cpu_mask)
- per_cpu(cpu_closid, cpu) = closid;
+ /* Update per cpu closid and rmid of the moved CPUs first */
+ for_each_cpu(cpu, &rdtgrp->cpu_mask) {
+ per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
+ per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
+ }
+
/*
* Update the MSR on moved CPUs and CPUs which have moved
* task running on them.
*/
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
- rdt_update_closid(tmpmask, NULL);
+ update_closid_rmid(tmpmask, NULL);
rdtgrp->flags = RDT_DELETED;
closid_free(rdtgrp->closid);
+ free_rmid(rdtgrp->mon.rmid);
+
+ /*
+ * Free all the child monitor group rmids.
+ */
+ free_all_child_rdtgrp(rdtgrp);
+
list_del(&rdtgrp->rdtgroup_list);
/*
@@ -1120,7 +1802,41 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
*/
kernfs_get(kn);
kernfs_remove(rdtgrp->kn);
- ret = 0;
+
+ return 0;
+}
+
+static int rdtgroup_rmdir(struct kernfs_node *kn)
+{
+ struct kernfs_node *parent_kn = kn->parent;
+ struct rdtgroup *rdtgrp;
+ cpumask_var_t tmpmask;
+ int ret = 0;
+
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ rdtgrp = rdtgroup_kn_lock_live(kn);
+ if (!rdtgrp) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
+ * If the rdtgroup is a ctrl_mon group and parent directory
+ * is the root directory, remove the ctrl_mon group.
+ *
+ * If the rdtgroup is a mon group and parent directory
+ * is a valid "mon_groups" directory, remove the mon group.
+ */
+ if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn)
+ ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
+ else if (rdtgrp->type == RDTMON_GROUP &&
+ is_mon_groups(parent_kn, kn->name))
+ ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
+ else
+ ret = -EPERM;
+
out:
rdtgroup_kn_unlock(kn);
free_cpumask_var(tmpmask);
@@ -1129,7 +1845,7 @@ out:
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
- if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled)
+ if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
seq_puts(seq, ",cdp");
return 0;
}
@@ -1153,10 +1869,13 @@ static int __init rdtgroup_setup_root(void)
mutex_lock(&rdtgroup_mutex);
rdtgroup_default.closid = 0;
+ rdtgroup_default.mon.rmid = 0;
+ rdtgroup_default.type = RDTCTRL_GROUP;
+ INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
+
list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
- ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
- ARRAY_SIZE(rdtgroup_base_files));
+ ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
if (ret) {
kernfs_destroy_root(rdt_root);
goto out;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6dde0497efc7..3b413065c613 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -51,6 +51,7 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/reboot.h>
+#include <asm/set_memory.h>
#include "mce-internal.h"
@@ -1051,6 +1052,48 @@ static int do_memory_failure(struct mce *m)
return ret;
}
+#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
+
+void arch_unmap_kpfn(unsigned long pfn)
+{
+ unsigned long decoy_addr;
+
+ /*
+ * Unmap this page from the kernel 1:1 mappings to make sure
+ * we don't log more errors because of speculative access to
+ * the page.
+ * We would like to just call:
+ * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
+ * but doing that would radically increase the odds of a
+ * speculative access to the posion page because we'd have
+ * the virtual address of the kernel 1:1 mapping sitting
+ * around in registers.
+ * Instead we get tricky. We create a non-canonical address
+ * that looks just like the one we want, but has bit 63 flipped.
+ * This relies on set_memory_np() not checking whether we passed
+ * a legal address.
+ */
+
+/*
+ * Build time check to see if we have a spare virtual bit. Don't want
+ * to leave this until run time because most developers don't have a
+ * system that can exercise this code path. This will only become a
+ * problem if/when we move beyond 5-level page tables.
+ *
+ * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
+ */
+#if PGDIR_SHIFT + 9 < 63
+ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+#else
+#error "no unused virtual bit available"
+#endif
+
+ if (set_memory_np(decoy_addr, 1))
+ pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+
+}
+#endif
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9e314bcf67cc..40e28ed77fbf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -201,8 +201,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
- /* Collect bank_info using CPU 0 for now. */
- if (cpu)
+ /* Return early if this bank was already initialized. */
+ if (smca_banks[bank].hwid)
return;
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
@@ -216,11 +216,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
s_hwid = &smca_hwid_mcatypes[i];
if (hwid_mcatype == s_hwid->hwid_mcatype) {
-
- WARN(smca_banks[bank].hwid,
- "Bank %s already initialized!\n",
- smca_get_name(s_hwid->bank_type));
-
smca_banks[bank].hwid = s_hwid;
smca_banks[bank].id = low;
smca_banks[bank].sysfs_id = s_hwid->count++;
@@ -776,24 +771,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
mce_log(&m);
}
-static inline void __smp_deferred_error_interrupt(void)
-{
- inc_irq_stat(irq_deferred_error_count);
- deferred_error_int_vector();
-}
-
asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
{
entering_irq();
- __smp_deferred_error_interrupt();
- exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
-{
- entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
- __smp_deferred_error_interrupt();
+ inc_irq_stat(irq_deferred_error_count);
+ deferred_error_int_vector();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
exiting_ack_irq();
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d7cc190ae457..2da67b70ba98 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -122,7 +122,7 @@ static struct attribute *thermal_throttle_attrs[] = {
NULL
};
-static struct attribute_group thermal_attr_group = {
+static const struct attribute_group thermal_attr_group = {
.attrs = thermal_throttle_attrs,
.name = "thermal_throttle"
};
@@ -390,26 +390,12 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-static inline void __smp_thermal_interrupt(void)
-{
- inc_irq_stat(irq_thermal_count);
- smp_thermal_vector();
-}
-
-asmlinkage __visible void __irq_entry
-smp_thermal_interrupt(struct pt_regs *regs)
-{
- entering_irq();
- __smp_thermal_interrupt();
- exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry
-smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
- __smp_thermal_interrupt();
+ inc_irq_stat(irq_thermal_count);
+ smp_thermal_vector();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
exiting_ack_irq();
}
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index bb0e75eed10a..5e7249e42f8f 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,24 +17,12 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-static inline void __smp_threshold_interrupt(void)
-{
- inc_irq_stat(irq_threshold_count);
- mce_threshold_vector();
-}
-
asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
{
entering_irq();
- __smp_threshold_interrupt();
- exiting_ack_irq();
-}
-
-asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
-{
- entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
- __smp_threshold_interrupt();
+ inc_irq_stat(irq_threshold_count);
+ mce_threshold_vector();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
exiting_ack_irq();
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 21b185793c80..c6daec4bdba5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -400,9 +400,12 @@ static void update_cache(struct ucode_patch *new_patch)
list_for_each_entry(p, &microcode_cache, plist) {
if (p->equiv_cpu == new_patch->equiv_cpu) {
- if (p->patch_id >= new_patch->patch_id)
+ if (p->patch_id >= new_patch->patch_id) {
/* we already have the latest patch */
+ kfree(new_patch->data);
+ kfree(new_patch);
return;
+ }
list_replace(&p->plist, &new_patch->plist);
kfree(p->data);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 9cb98ee103db..86e8f0b2537b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -561,7 +561,7 @@ static struct attribute *mc_default_attrs[] = {
NULL
};
-static struct attribute_group mc_attr_group = {
+static const struct attribute_group mc_attr_group = {
.attrs = mc_default_attrs,
.name = "microcode",
};
@@ -707,7 +707,7 @@ static struct attribute *cpu_root_microcode_attrs[] = {
NULL
};
-static struct attribute_group cpu_root_microcode_group = {
+static const struct attribute_group cpu_root_microcode_group = {
.name = "microcode",
.attrs = cpu_root_microcode_attrs,
};
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 59edbe9d4ccb..8f7a9bbad514 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -146,18 +146,18 @@ static bool microcode_matches(struct microcode_header_intel *mc_header,
return false;
}
-static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
+static struct ucode_patch *memdup_patch(void *data, unsigned int size)
{
struct ucode_patch *p;
p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL);
if (!p)
- return ERR_PTR(-ENOMEM);
+ return NULL;
p->data = kmemdup(data, size, GFP_KERNEL);
if (!p->data) {
kfree(p);
- return ERR_PTR(-ENOMEM);
+ return NULL;
}
return p;
@@ -183,8 +183,8 @@ static void save_microcode_patch(void *data, unsigned int size)
if (mc_hdr->rev <= mc_saved_hdr->rev)
continue;
- p = __alloc_microcode_buf(data, size);
- if (IS_ERR(p))
+ p = memdup_patch(data, size);
+ if (!p)
pr_err("Error allocating buffer %p\n", data);
else
list_replace(&iter->plist, &p->plist);
@@ -196,24 +196,25 @@ static void save_microcode_patch(void *data, unsigned int size)
* newly found.
*/
if (!prev_found) {
- p = __alloc_microcode_buf(data, size);
- if (IS_ERR(p))
+ p = memdup_patch(data, size);
+ if (!p)
pr_err("Error allocating buffer for %p\n", data);
else
list_add_tail(&p->plist, &microcode_cache);
}
+ if (!p)
+ return;
+
/*
* Save for early loading. On 32-bit, that needs to be a physical
* address as the APs are running from physical addresses, before
* paging has been enabled.
*/
- if (p) {
- if (IS_ENABLED(CONFIG_X86_32))
- intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
- else
- intel_ucode_patch = p->data;
- }
+ if (IS_ENABLED(CONFIG_X86_32))
+ intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
+ else
+ intel_ucode_patch = p->data;
}
static int microcode_sanity_check(void *mc, int print_err)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 70e717fccdd6..3b3f713e15e5 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,13 +59,8 @@ void hyperv_vector_handler(struct pt_regs *regs)
void hv_setup_vmbus_irq(void (*handler)(void))
{
vmbus_handler = handler;
- /*
- * Setup the IDT for hypervisor callback. Prevent reallocation
- * at module reload.
- */
- if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
- hyperv_callback_vector);
+ /* Setup the IDT for hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
}
void hv_remove_vmbus_irq(void)
@@ -184,9 +179,15 @@ static void __init ms_hyperv_init_platform(void)
ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
- pr_info("HyperV: features 0x%x, hints 0x%x\n",
+ pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
+ ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
+ ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+
+ pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
+ ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
+
/*
* Extract host information.
*/
@@ -219,7 +220,7 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
lapic_timer_frequency = hv_lapic_frequency;
- pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+ pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
lapic_timer_frequency);
}
@@ -249,11 +250,12 @@ static void __init ms_hyperv_init_platform(void)
* Setup the hook to get control post apic initialization.
*/
x86_platform.apic_post_init = hyperv_init;
+ hyperv_setup_mmu_ops();
#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
- .name = "Microsoft HyperV",
+ .name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
.init_platform = ms_hyperv_init_platform,
};
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c5bb63be4ba1..40d5a8a75212 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
}
+static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ struct set_mtrr_data data = { .smp_reg = reg,
+ .smp_base = base,
+ .smp_size = size,
+ .smp_type = type
+ };
+
+ stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
+}
+
static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
{
@@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* Search for an empty MTRR */
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
- set_mtrr(i, base, size, type);
+ set_mtrr_cpuslocked(i, base, size, type);
if (likely(replace < 0)) {
mtrr_usage_table[i] = 1;
} else {
@@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (increment)
mtrr_usage_table[i]++;
if (unlikely(replace != i)) {
- set_mtrr(replace, 0, 0, 0);
+ set_mtrr_cpuslocked(replace, 0, 0, 0);
mtrr_usage_table[replace] = 0;
}
}
@@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
goto out;
}
if (--mtrr_usage_table[reg] < 1)
- set_mtrr(reg, 0, 0, 0);
+ set_mtrr_cpuslocked(reg, 0, 0, 0);
error = reg;
out:
mutex_unlock(&mtrr_mutex);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 23c23508c012..05459ad3db46 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index dbce3cca94cb..f13b4c00a5de 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -94,6 +94,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name);
+ if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
+ __show_regs(regs, 0);
+
/*
* Scan the stack, printing any text addresses we find. At the
* same time, follow proper stack frames with the unwinder.
@@ -118,10 +121,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* Don't print regs->ip again if it was already printed
* by __show_regs() below.
*/
- if (regs && stack == &regs->ip) {
- unwind_next_frame(&state);
- continue;
- }
+ if (regs && stack == &regs->ip)
+ goto next;
if (stack == ret_addr_p)
reliable = 1;
@@ -144,6 +145,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (!reliable)
continue;
+next:
/*
* Get the next frame from the unwinder. No need to
* check for an error: if anything goes wrong, the rest
@@ -153,7 +155,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
- if (regs)
+ if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
__show_regs(regs, 0);
}
@@ -265,7 +267,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_X86_32
if (user_mode(regs)) {
sp = regs->sp;
- ss = regs->ss & 0xffff;
+ ss = regs->ss;
} else {
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e5f0b40e66d2..4f0481474903 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
@@ -62,7 +62,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 3e1471d57487..225af4184f06 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -55,7 +55,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
begin = end - (exception_stack_sizes[k] / sizeof(long));
regs = (struct pt_regs *)end - 1;
- if (stack < begin || stack >= end)
+ if (stack <= begin || stack >= end)
continue;
info->type = STACK_TYPE_EXCEPTION + k;
@@ -78,7 +78,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack < begin || stack > end)
+ if (stack <= begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 532da61d605c..71c11ad5643e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -96,7 +96,8 @@ EXPORT_SYMBOL_GPL(e820__mapped_any);
* Note: this function only works correctly once the E820 table is sorted and
* not-overlapping (at least for the range specified), which is the case normally.
*/
-bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+static struct e820_entry *__e820__mapped_all(u64 start, u64 end,
+ enum e820_type type)
{
int i;
@@ -122,9 +123,28 @@ bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
* coverage of the desired range exists:
*/
if (start >= end)
- return 1;
+ return entry;
}
- return 0;
+
+ return NULL;
+}
+
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ */
+bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type)
+{
+ return __e820__mapped_all(start, end, type);
+}
+
+/*
+ * This function returns the type associated with the range <start,end>.
+ */
+int e820__get_entry_type(u64 start, u64 end)
+{
+ struct e820_entry *entry = __e820__mapped_all(start, end, 0);
+
+ return entry ? entry->type : -EINVAL;
}
/*
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index d907c3d8633f..927abeaf63e2 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -12,10 +12,10 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/delay.h>
-#include <linux/dmi.h>
#include <linux/pci_ids.h>
#include <linux/bcma/bcma.h>
#include <linux/bcma/bcma_regs.h>
+#include <linux/platform_data/x86/apple.h>
#include <drm/i915_drm.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
@@ -527,6 +527,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
INTEL_GLK_IDS(&gen9_early_ops),
+ INTEL_CNL_IDS(&gen9_early_ops),
};
static void __init
@@ -593,7 +594,7 @@ static void __init apple_airport_reset(int bus, int slot, int func)
u64 addr;
int i;
- if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+ if (!x86_apple_machine)
return;
/* Card may have been put into PCI_D3hot by grub quirk */
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
new file mode 100644
index 000000000000..f260e452e4f8
--- /dev/null
+++ b/arch/x86/kernel/eisa.c
@@ -0,0 +1,19 @@
+/*
+ * EISA specific code
+ *
+ * This file is licensed under the GPL V2
+ */
+#include <linux/ioport.h>
+#include <linux/eisa.h>
+#include <linux/io.h>
+
+static __init int eisa_bus_probe(void)
+{
+ void __iomem *p = ioremap(0x0FFFD9, 4);
+
+ if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+ EISA_bus = 1;
+ iounmap(p);
+ return 0;
+}
+subsys_initcall(eisa_bus_probe);
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 6b91e2eb8d3f..9c4e7ba6870c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -195,7 +195,7 @@ void init_espfix_ap(int cpu)
pte_p = pte_offset_kernel(&pmd, addr);
stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
- pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
+ pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 538ec012b371..cf2ce063f65a 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/memblock.h>
+#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/e820/api.h>
@@ -30,6 +31,9 @@ static void __init i386_default_early_setup(void)
asmlinkage __visible void __init i386_start_kernel(void)
{
cr4_init_shadow();
+
+ idt_setup_early_handler();
+
sanitize_boot_params(&boot_params);
x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 46c3c73e7f43..bab4fa579450 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -14,6 +14,7 @@
#include <linux/start_kernel.h>
#include <linux/io.h>
#include <linux/memblock.h>
+#include <linux/mem_encrypt.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -33,7 +34,6 @@
/*
* Manage page tables very early on.
*/
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
@@ -45,14 +45,17 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr;
}
-void __head __startup_64(unsigned long physaddr)
+unsigned long __head __startup_64(unsigned long physaddr,
+ struct boot_params *bp)
{
unsigned long load_delta, *p;
+ unsigned long pgtable_flags;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
int i;
+ unsigned int *next_pgt_ptr;
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
@@ -68,6 +71,12 @@ void __head __startup_64(unsigned long physaddr)
if (load_delta & ~PMD_PAGE_MASK)
for (;;);
+ /* Activate Secure Memory Encryption (SME) if supported and enabled */
+ sme_enable(bp);
+
+ /* Include the SME encryption mask in the fixup value */
+ load_delta += sme_get_me_mask();
+
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
@@ -92,30 +101,34 @@ void __head __startup_64(unsigned long physaddr)
* it avoids problems around wraparound.
*/
- pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
- pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+ next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr);
+ pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
+ pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr);
+
+ pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
- pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
- pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+ pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
+ pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
- p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
- p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+ p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
+ p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
- pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
- pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+ pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
+ pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
- pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
- pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+ pud[i + 0] = (pudval_t)pmd + pgtable_flags;
+ pud[i + 1] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+ pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
@@ -136,9 +149,30 @@ void __head __startup_64(unsigned long physaddr)
pmd[i] += load_delta;
}
- /* Fixup phys_base */
+ /*
+ * Fixup phys_base - remove the memory encryption mask to obtain
+ * the true physical address.
+ */
p = fixup_pointer(&phys_base, physaddr);
- *p += load_delta;
+ *p += load_delta - sme_get_me_mask();
+
+ /* Encrypt the kernel (if SME is active) */
+ sme_encrypt_kernel();
+
+ /*
+ * Return the SME encryption mask (if SME is active) to be used as a
+ * modifier for the initial pgdir entry programmed into CR3.
+ */
+ return sme_get_me_mask();
+}
+
+unsigned long __startup_secondary_64(void)
+{
+ /*
+ * Return the SME encryption mask (if SME is active) to be used as a
+ * modifier for the initial pgdir entry programmed into CR3.
+ */
+ return sme_get_me_mask();
}
/* Wipe all early page tables except for the kernel symbol map */
@@ -146,17 +180,17 @@ static void __init reset_early_page_tables(void)
{
memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
next_early_pgt = 0;
- write_cr3(__pa_nodebug(early_top_pgt));
+ write_cr3(__sme_pa_nodebug(early_top_pgt));
}
/* Create a new PMD entry */
-int __init early_make_pgtable(unsigned long address)
+int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
p4dval_t p4d, *p4d_p;
pudval_t pud, *pud_p;
- pmdval_t pmd, *pmd_p;
+ pmdval_t *pmd_p;
/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
@@ -215,12 +249,21 @@ again:
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
- pmd = (physaddr & PMD_MASK) + early_pmd_flags;
pmd_p[pmd_index(address)] = pmd;
return 0;
}
+int __init early_make_pgtable(unsigned long address)
+{
+ unsigned long physaddr = address - __PAGE_OFFSET;
+ pmdval_t pmd;
+
+ pmd = (physaddr & PMD_MASK) + early_pmd_flags;
+
+ return __early_make_pgtable(address, pmd);
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -243,6 +286,12 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
unsigned long cmd_line_ptr;
+ /*
+ * If SME is active, this will create decrypted mappings of the
+ * boot data in advance of the copy operations.
+ */
+ sme_map_bootdata(real_mode_data);
+
memcpy(&boot_params, real_mode_data, sizeof boot_params);
sanitize_boot_params(&boot_params);
cmd_line_ptr = get_cmd_line_ptr();
@@ -250,12 +299,18 @@ static void __init copy_bootdata(char *real_mode_data)
command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
+
+ /*
+ * The old boot data is no longer needed and won't be reserved,
+ * freeing up that memory for use by the system. If SME is active,
+ * we need to remove the mappings that were created so that the
+ * memory doesn't remain mapped as decrypted.
+ */
+ sme_unmap_bootdata(real_mode_data);
}
asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
{
- int i;
-
/*
* Build-time sanity checks on the kernel image and module
* area mappings. (these are purely build-time and produce no code)
@@ -279,11 +334,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_page(init_top_pgt);
+ /*
+ * SME support may update early_pmd_flags to include the memory
+ * encryption mask, so it needs to be called before anything
+ * that may generate a page fault.
+ */
+ sme_early_init();
+
kasan_early_init();
- for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
- set_intr_gate(i, early_idt_handler_array[i]);
- load_idt((const struct desc_ptr *)&idt_descr);
+ idt_setup_early_handler();
copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 1f85ee8f9439..9ed3074d0d27 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -155,7 +155,6 @@ ENTRY(startup_32)
jmp *%eax
.Lbad_subarch:
-WEAK(lguest_entry)
WEAK(xen_entry)
/* Unknown implementation; there's really
nothing we can do at this point. */
@@ -165,7 +164,6 @@ WEAK(xen_entry)
subarch_entries:
.long .Ldefault_entry /* normal x86/PC */
- .long lguest_entry /* lguest hypervisor */
.long xen_entry /* Xen hypervisor */
.long .Ldefault_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
@@ -347,7 +345,6 @@ ENTRY(startup_32_smp)
movl %eax,%cr0
lgdt early_gdt_descr
- lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
@@ -380,37 +377,6 @@ ENDPROC(startup_32_smp)
*/
__INIT
setup_once:
- /*
- * Set up a idt with 256 interrupt gates that push zero if there
- * is no error code and then jump to early_idt_handler_common.
- * It doesn't actually load the idt - that needs to be done on
- * each CPU. Interrupts are enabled elsewhere, when we can be
- * relatively sure everything is ok.
- */
-
- movl $idt_table,%edi
- movl $early_idt_handler_array,%eax
- movl $NUM_EXCEPTION_VECTORS,%ecx
-1:
- movl %eax,(%edi)
- movl %eax,4(%edi)
- /* interrupt gate, dpl=0, present */
- movl $(0x8E000000 + __KERNEL_CS),2(%edi)
- addl $EARLY_IDT_HANDLER_SIZE,%eax
- addl $8,%edi
- loop 1b
-
- movl $256 - NUM_EXCEPTION_VECTORS,%ecx
- movl $ignore_int,%edx
- movl $(__KERNEL_CS << 16),%eax
- movw %dx,%ax /* selector = 0x0010 = cs */
- movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
-2:
- movl %eax,(%edi)
- movl %edx,4(%edi)
- addl $8,%edi
- loop 2b
-
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* Configure the stack canary. The linker can't handle this by
@@ -457,12 +423,9 @@ early_idt_handler_common:
/* The vector number is in pt_regs->gs */
cld
- pushl %fs /* pt_regs->fs */
- movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
- pushl %es /* pt_regs->es */
- movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
- pushl %ds /* pt_regs->ds */
- movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %fs /* pt_regs->fs (__fsh varies by model) */
+ pushl %es /* pt_regs->es (__esh varies by model) */
+ pushl %ds /* pt_regs->ds (__dsh varies by model) */
pushl %eax /* pt_regs->ax */
pushl %ebp /* pt_regs->bp */
pushl %edi /* pt_regs->di */
@@ -479,9 +442,8 @@ early_idt_handler_common:
/* Load the vector number into EDX */
movl PT_GS(%esp), %edx
- /* Load GS into pt_regs->gs and clear high bits */
+ /* Load GS into pt_regs->gs (and maybe clobber __gsh) */
movw %gs, PT_GS(%esp)
- movw $0, PT_GS+2(%esp)
movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */
call early_fixup_exception
@@ -493,18 +455,17 @@ early_idt_handler_common:
popl %edi /* pt_regs->di */
popl %ebp /* pt_regs->bp */
popl %eax /* pt_regs->ax */
- popl %ds /* pt_regs->ds */
- popl %es /* pt_regs->es */
- popl %fs /* pt_regs->fs */
- popl %gs /* pt_regs->gs */
+ popl %ds /* pt_regs->ds (always ignores __dsh) */
+ popl %es /* pt_regs->es (always ignores __esh) */
+ popl %fs /* pt_regs->fs (always ignores __fsh) */
+ popl %gs /* pt_regs->gs (always ignores __gsh) */
decl %ss:early_recursion_flag
addl $4, %esp /* pop pt_regs->orig_ax */
iret
ENDPROC(early_idt_handler_common)
/* This is the default interrupt "handler" :-) */
- ALIGN
-ignore_int:
+ENTRY(early_ignore_irq)
cld
#ifdef CONFIG_PRINTK
pushl %eax
@@ -539,7 +500,8 @@ ignore_int:
hlt_loop:
hlt
jmp hlt_loop
-ENDPROC(ignore_int)
+ENDPROC(early_ignore_irq)
+
__INITDATA
.align 4
GLOBAL(early_recursion_flag)
@@ -628,7 +590,6 @@ int_msg:
.data
.globl boot_gdt_descr
-.globl idt_descr
ALIGN
# early boot GDT descriptor (must use 1:1 address mapping)
@@ -637,11 +598,6 @@ boot_gdt_descr:
.word __BOOT_DS+7
.long boot_gdt - __PAGE_OFFSET
- .word 0 # 32-bit align idt_desc.address
-idt_descr:
- .word IDT_ENTRIES*8-1 # idt contains 256 entries
- .long idt_table
-
# boot GDT descriptor (later on used by CPU#0):
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6225550883df..513cbb012ecc 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -73,12 +73,19 @@ startup_64:
/* Sanitize CPU configuration */
call verify_cpu
+ /*
+ * Perform pagetable fixups. Additionally, if SME is active, encrypt
+ * the kernel and retrieve the modifier (SME encryption mask if SME
+ * is active) to be added to the initial pgdir entry that will be
+ * programmed into CR3.
+ */
leaq _text(%rip), %rdi
pushq %rsi
call __startup_64
popq %rsi
- movq $(early_top_pgt - __START_KERNEL_map), %rax
+ /* Form the CR3 value being sure to include the CR3 modifier */
+ addq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
/*
@@ -98,7 +105,16 @@ ENTRY(secondary_startup_64)
/* Sanitize CPU configuration */
call verify_cpu
- movq $(init_top_pgt - __START_KERNEL_map), %rax
+ /*
+ * Retrieve the modifier (SME encryption mask if SME is active) to be
+ * added to the initial pgdir entry that will be programmed into CR3.
+ */
+ pushq %rsi
+ call __startup_secondary_64
+ popq %rsi
+
+ /* Form the CR3 value being sure to include the CR3 modifier */
+ addq $(init_top_pgt - __START_KERNEL_map), %rax
1:
/* Enable PAE mode, PGE and LA57 */
@@ -335,9 +351,9 @@ GLOBAL(name)
NEXT_PAGE(early_top_pgt)
.fill 511,8,0
#ifdef CONFIG_X86_5LEVEL
- .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
NEXT_PAGE(early_dynamic_pgts)
@@ -350,15 +366,15 @@ NEXT_PAGE(init_top_pgt)
.fill 512,8,0
#else
NEXT_PAGE(init_top_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
NEXT_PAGE(level3_ident_pgt)
- .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.fill 511, 8, 0
NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
@@ -370,14 +386,14 @@ NEXT_PAGE(level2_ident_pgt)
#ifdef CONFIG_X86_5LEVEL
NEXT_PAGE(level4_kernel_pgt)
.fill 511,8,0
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
NEXT_PAGE(level2_kernel_pgt)
/*
@@ -395,7 +411,7 @@ NEXT_PAGE(level2_kernel_pgt)
NEXT_PAGE(level2_fixmap_pgt)
.fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
+ .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
.fill 5,8,0
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
new file mode 100644
index 000000000000..6107ee1cb8d5
--- /dev/null
+++ b/arch/x86/kernel/idt.c
@@ -0,0 +1,371 @@
+/*
+ * Interrupt descriptor table related code
+ *
+ * This file is licensed under the GPL V2
+ */
+#include <linux/interrupt.h>
+
+#include <asm/traps.h>
+#include <asm/proto.h>
+#include <asm/desc.h>
+
+struct idt_data {
+ unsigned int vector;
+ unsigned int segment;
+ struct idt_bits bits;
+ const void *addr;
+};
+
+#define DPL0 0x0
+#define DPL3 0x3
+
+#define DEFAULT_STACK 0
+
+#define G(_vector, _addr, _ist, _type, _dpl, _segment) \
+ { \
+ .vector = _vector, \
+ .bits.ist = _ist, \
+ .bits.type = _type, \
+ .bits.dpl = _dpl, \
+ .bits.p = 1, \
+ .addr = _addr, \
+ .segment = _segment, \
+ }
+
+/* Interrupt gate */
+#define INTG(_vector, _addr) \
+ G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* System interrupt gate */
+#define SYSG(_vector, _addr) \
+ G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+
+/* Interrupt gate with interrupt stack */
+#define ISTG(_vector, _addr, _ist) \
+ G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* System interrupt gate with interrupt stack */
+#define SISTG(_vector, _addr, _ist) \
+ G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+
+/* Task gate */
+#define TSKG(_vector, _gdt) \
+ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
+
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_idts[] = {
+ INTG(X86_TRAP_DB, debug),
+ SYSG(X86_TRAP_BP, int3),
+#ifdef CONFIG_X86_32
+ INTG(X86_TRAP_PF, page_fault),
+#endif
+};
+
+/*
+ * The default IDT entries which are set up in trap_init() before
+ * cpu_init() is invoked. Interrupt stacks cannot be used at that point and
+ * the traps which use them are reinitialized with IST after cpu_init() has
+ * set up TSS.
+ */
+static const __initdata struct idt_data def_idts[] = {
+ INTG(X86_TRAP_DE, divide_error),
+ INTG(X86_TRAP_NMI, nmi),
+ INTG(X86_TRAP_BR, bounds),
+ INTG(X86_TRAP_UD, invalid_op),
+ INTG(X86_TRAP_NM, device_not_available),
+ INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun),
+ INTG(X86_TRAP_TS, invalid_TSS),
+ INTG(X86_TRAP_NP, segment_not_present),
+ INTG(X86_TRAP_SS, stack_segment),
+ INTG(X86_TRAP_GP, general_protection),
+ INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug),
+ INTG(X86_TRAP_MF, coprocessor_error),
+ INTG(X86_TRAP_AC, alignment_check),
+ INTG(X86_TRAP_XF, simd_coprocessor_error),
+
+#ifdef CONFIG_X86_32
+ TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS),
+#else
+ INTG(X86_TRAP_DF, double_fault),
+#endif
+ INTG(X86_TRAP_DB, debug),
+ INTG(X86_TRAP_NMI, nmi),
+ INTG(X86_TRAP_BP, int3),
+
+#ifdef CONFIG_X86_MCE
+ INTG(X86_TRAP_MC, &machine_check),
+#endif
+
+ SYSG(X86_TRAP_OF, overflow),
+#if defined(CONFIG_IA32_EMULATION)
+ SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat),
+#elif defined(CONFIG_X86_32)
+ SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32),
+#endif
+};
+
+/*
+ * The APIC and SMP idt entries
+ */
+static const __initdata struct idt_data apic_idts[] = {
+#ifdef CONFIG_SMP
+ INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
+ INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
+ INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
+ INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt),
+ INTG(REBOOT_VECTOR, reboot_interrupt),
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+ INTG(THERMAL_APIC_VECTOR, thermal_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+ INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+ INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt),
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt),
+ INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi),
+# ifdef CONFIG_HAVE_KVM
+ INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi),
+ INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
+ INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+# endif
+# ifdef CONFIG_IRQ_WORK
+ INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
+# endif
+ INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
+ INTG(ERROR_APIC_VECTOR, error_interrupt),
+#endif
+};
+
+#ifdef CONFIG_X86_64
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_pf_idts[] = {
+ INTG(X86_TRAP_PF, page_fault),
+};
+
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
+static const __initdata struct idt_data dbg_idts[] = {
+ INTG(X86_TRAP_DB, debug),
+ INTG(X86_TRAP_BP, int3),
+};
+#endif
+
+/* Must be page-aligned because the real IDT is used in a fixmap. */
+gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
+
+struct desc_ptr idt_descr __ro_after_init = {
+ .size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1,
+ .address = (unsigned long) idt_table,
+};
+
+#ifdef CONFIG_X86_64
+/* No need to be aligned, but done to keep all IDTs defined the same way. */
+gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
+
+/*
+ * The exceptions which use Interrupt stacks. They are setup after
+ * cpu_init() when the TSS has been initialized.
+ */
+static const __initdata struct idt_data ist_idts[] = {
+ ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
+ ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
+ SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
+ ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
+#ifdef CONFIG_X86_MCE
+ ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
+#endif
+};
+
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
+const struct desc_ptr debug_idt_descr = {
+ .size = IDT_ENTRIES * 16 - 1,
+ .address = (unsigned long) debug_idt_table,
+};
+#endif
+
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
+{
+ unsigned long addr = (unsigned long) d->addr;
+
+ gate->offset_low = (u16) addr;
+ gate->segment = (u16) d->segment;
+ gate->bits = d->bits;
+ gate->offset_middle = (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+ gate->offset_high = (u32) (addr >> 32);
+ gate->reserved = 0;
+#endif
+}
+
+static void
+idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
+{
+ gate_desc desc;
+
+ for (; size > 0; t++, size--) {
+ idt_init_desc(&desc, t);
+ write_idt_entry(idt, t->vector, &desc);
+ if (sys)
+ set_bit(t->vector, used_vectors);
+ }
+}
+
+static void set_intr_gate(unsigned int n, const void *addr)
+{
+ struct idt_data data;
+
+ BUG_ON(n > 0xFF);
+
+ memset(&data, 0, sizeof(data));
+ data.vector = n;
+ data.addr = addr;
+ data.segment = __KERNEL_CS;
+ data.bits.type = GATE_INTERRUPT;
+ data.bits.p = 1;
+
+ idt_setup_from_table(idt_table, &data, 1, false);
+}
+
+/**
+ * idt_setup_early_traps - Initialize the idt table with early traps
+ *
+ * On X8664 these traps do not use interrupt stacks as they can't work
+ * before cpu_init() is invoked and sets up TSS. The IST variants are
+ * installed after that.
+ */
+void __init idt_setup_early_traps(void)
+{
+ idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts),
+ true);
+ load_idt(&idt_descr);
+}
+
+/**
+ * idt_setup_traps - Initialize the idt table with default traps
+ */
+void __init idt_setup_traps(void)
+{
+ idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true);
+}
+
+#ifdef CONFIG_X86_64
+/**
+ * idt_setup_early_pf - Initialize the idt table with early pagefault handler
+ *
+ * On X8664 this does not use interrupt stacks as they can't work before
+ * cpu_init() is invoked and sets up TSS. The IST variant is installed
+ * after that.
+ *
+ * FIXME: Why is 32bit and 64bit installing the PF handler at different
+ * places in the early setup code?
+ */
+void __init idt_setup_early_pf(void)
+{
+ idt_setup_from_table(idt_table, early_pf_idts,
+ ARRAY_SIZE(early_pf_idts), true);
+}
+
+/**
+ * idt_setup_ist_traps - Initialize the idt table with traps using IST
+ */
+void __init idt_setup_ist_traps(void)
+{
+ idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
+}
+
+/**
+ * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
+ */
+void __init idt_setup_debugidt_traps(void)
+{
+ memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
+
+ idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false);
+}
+#endif
+
+/**
+ * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
+ */
+void __init idt_setup_apic_and_irq_gates(void)
+{
+ int i = FIRST_EXTERNAL_VECTOR;
+ void *entry;
+
+ idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
+
+ for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+ entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+ set_intr_gate(i, entry);
+ }
+
+ for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifdef CONFIG_X86_LOCAL_APIC
+ set_bit(i, used_vectors);
+ set_intr_gate(i, spurious_interrupt);
+#else
+ entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+ set_intr_gate(i, entry);
+#endif
+ }
+}
+
+/**
+ * idt_setup_early_handler - Initializes the idt table with early handlers
+ */
+void __init idt_setup_early_handler(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
+ set_intr_gate(i, early_idt_handler_array[i]);
+#ifdef CONFIG_X86_32
+ for ( ; i < NR_VECTORS; i++)
+ set_intr_gate(i, early_ignore_irq);
+#endif
+ load_idt(&idt_descr);
+}
+
+/**
+ * idt_invalidate - Invalidate interrupt descriptor table
+ * @addr: The virtual address of the 'invalid' IDT
+ */
+void idt_invalidate(void *addr)
+{
+ struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
+
+ load_idt(&idt);
+}
+
+void __init update_intr_gate(unsigned int n, const void *addr)
+{
+ if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+ return;
+ set_intr_gate(n, addr);
+}
+
+void alloc_intr_gate(unsigned int n, const void *addr)
+{
+ BUG_ON(n < FIRST_SYSTEM_VECTOR);
+ if (!test_and_set_bit(n, used_vectors))
+ set_intr_gate(n, addr);
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 4ed0aba8dbc8..52089c043160 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -29,9 +29,6 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
atomic_t irq_err_count;
-/* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
-
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@@ -87,13 +84,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
seq_puts(p, " APIC ICR read retries\n");
-#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
seq_puts(p, " Platform interrupts\n");
}
+#endif
#ifdef CONFIG_SMP
seq_printf(p, "%*s: ", prec, "RES");
for_each_online_cpu(j)
@@ -183,9 +180,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
sum += irq_stats(cpu)->icr_read_retry_count;
-#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
+#endif
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
@@ -259,26 +256,26 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
return 1;
}
+#ifdef CONFIG_X86_LOCAL_APIC
+/* Function pointer for generic interrupt vector handling */
+void (*x86_platform_ipi_callback)(void) = NULL;
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void __smp_x86_platform_ipi(void)
-{
- inc_irq_stat(x86_platform_ipis);
-
- if (x86_platform_ipi_callback)
- x86_platform_ipi_callback();
-}
-
__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
- __smp_x86_platform_ipi();
+ trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+ inc_irq_stat(x86_platform_ipis);
+ if (x86_platform_ipi_callback)
+ x86_platform_ipi_callback();
+ trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
exiting_irq();
set_irq_regs(old_regs);
}
+#endif
#ifdef CONFIG_HAVE_KVM
static void dummy_handler(void) {}
@@ -334,19 +331,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
}
#endif
-__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
-{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- entering_ack_irq();
- trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
- __smp_x86_platform_ipi();
- trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
- exiting_irq();
- set_irq_regs(old_regs);
-}
-
-EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
@@ -431,7 +415,7 @@ int check_irq_vectors_for_cpu_disable(void)
* this w/o holding vector_lock.
*/
for (vector = FIRST_EXTERNAL_VECTOR;
- vector < first_system_vector; vector++) {
+ vector < FIRST_SYSTEM_VECTOR; vector++) {
if (!test_bit(vector, used_vectors) &&
IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
if (++count == this_count)
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 275487872be2..70dee056f92b 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -11,35 +11,23 @@
#include <asm/trace/irq_vectors.h>
#include <linux/interrupt.h>
-static inline void __smp_irq_work_interrupt(void)
-{
- inc_irq_stat(apic_irq_work_irqs);
- irq_work_run();
-}
-
+#ifdef CONFIG_X86_LOCAL_APIC
__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
- __smp_irq_work_interrupt();
- exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
-{
- ipi_entering_ack_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
- __smp_irq_work_interrupt();
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_work_run();
trace_irq_work_exit(IRQ_WORK_VECTOR);
exiting_irq();
}
void arch_irq_work_raise(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
if (!arch_irq_work_has_interrupt())
return;
apic->send_IPI_self(IRQ_WORK_VECTOR);
apic_wait_icr_idle();
-#endif
}
+#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c7fd18526c3e..1add9e08e83e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -55,18 +55,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
};
-int vector_used_by_percpu_irq(unsigned int vector)
-{
- int cpu;
-
- for_each_online_cpu(cpu) {
- if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
- return 1;
- }
-
- return 0;
-}
-
void __init init_ISA_irqs(void)
{
struct irq_chip *chip = legacy_pic->chip;
@@ -99,100 +87,12 @@ void __init init_IRQ(void)
x86_init.irqs.intr_init();
}
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPI for generic function call */
- alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* IPI for generic single function call */
- alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
- call_function_single_interrupt);
-
- /* Low priority IPI to cleanup after moving an irq */
- set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
- set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-
- /* IPI used for rebooting/stopping */
- alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif /* CONFIG_SMP */
-}
-
-static void __init apic_intr_init(void)
-{
- smp_intr_init();
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
- alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
- alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
- alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /* self generated IPI for local APIC timer */
- alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI for X86 platform specific use */
- alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
-#ifdef CONFIG_HAVE_KVM
- /* IPI for KVM to deliver posted interrupt */
- alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
- /* IPI for KVM to deliver interrupt to wake up tasks */
- alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
- /* IPI for KVM to deliver nested posted interrupt */
- alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi);
-#endif
-
- /* IPI vectors for APIC spurious and error interrupts */
- alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
- /* IRQ work interrupts: */
-# ifdef CONFIG_IRQ_WORK
- alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
-# endif
-
-#endif
-}
-
void __init native_init_IRQ(void)
{
- int i;
-
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
- apic_intr_init();
-
- /*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
- */
- i = FIRST_EXTERNAL_VECTOR;
-#ifndef CONFIG_X86_LOCAL_APIC
-#define first_system_vector NR_VECTORS
-#endif
- for_each_clear_bit_from(i, used_vectors, first_system_vector) {
- /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
- set_intr_gate(i, irq_entries_start +
- 8 * (i - FIRST_EXTERNAL_VECTOR));
- }
-#ifdef CONFIG_X86_LOCAL_APIC
- for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
- set_intr_gate(i, spurious_interrupt);
-#endif
+ idt_setup_apic_and_irq_gates();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 38b64587b31b..fd6f8fbbe6f2 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -33,7 +33,6 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
struct setup_data_node *node = file->private_data;
unsigned long remain;
loff_t pos = *ppos;
- struct page *pg;
void *p;
u64 pa;
@@ -47,18 +46,13 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
count = node->len - pos;
pa = node->paddr + sizeof(struct setup_data) + pos;
- pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
- if (PageHighMem(pg)) {
- p = ioremap_cache(pa, count);
- if (!p)
- return -ENXIO;
- } else
- p = __va(pa);
+ p = memremap(pa, count, MEMREMAP_WB);
+ if (!p)
+ return -ENOMEM;
remain = copy_to_user(user_buf, p, count);
- if (PageHighMem(pg))
- iounmap(p);
+ memunmap(p);
if (remain)
return -EFAULT;
@@ -109,7 +103,6 @@ static int __init create_setup_data_nodes(struct dentry *parent)
struct setup_data *data;
int error;
struct dentry *d;
- struct page *pg;
u64 pa_data;
int no = 0;
@@ -126,16 +119,12 @@ static int __init create_setup_data_nodes(struct dentry *parent)
goto err_dir;
}
- pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
- if (PageHighMem(pg)) {
- data = ioremap_cache(pa_data, sizeof(*data));
- if (!data) {
- kfree(node);
- error = -ENXIO;
- goto err_dir;
- }
- } else
- data = __va(pa_data);
+ data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
+ if (!data) {
+ kfree(node);
+ error = -ENOMEM;
+ goto err_dir;
+ }
node->paddr = pa_data;
node->type = data->type;
@@ -143,8 +132,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
error = create_setup_data_node(d, no, node);
pa_data = data->next;
- if (PageHighMem(pg))
- iounmap(data);
+ memunmap(data);
if (error)
goto err_dir;
no++;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 69ea0bc1cfa3..4f98aad38237 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -39,6 +39,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
#include <asm/set_memory.h>
+#include <asm/sections.h>
#include "common.h"
@@ -251,10 +252,12 @@ static int can_optimize(unsigned long paddr)
/*
* Do not optimize in the entry code due to the unstable
- * stack handling.
+ * stack handling and registers setup.
*/
- if ((paddr >= (unsigned long)__entry_text_start) &&
- (paddr < (unsigned long)__entry_text_end))
+ if (((paddr >= (unsigned long)__entry_text_start) &&
+ (paddr < (unsigned long)__entry_text_end)) ||
+ ((paddr >= (unsigned long)__irqentry_text_start) &&
+ (paddr < (unsigned long)__irqentry_text_end)))
return 0;
/* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4afc67f5facc..4b0592ca9e47 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -16,8 +16,8 @@
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/io.h>
-#include <asm/io.h>
#include <asm/setup.h>
static ssize_t version_show(struct kobject *kobj,
@@ -55,7 +55,7 @@ static struct bin_attribute *boot_params_data_attrs[] = {
NULL,
};
-static struct attribute_group boot_params_attr_group = {
+static const struct attribute_group boot_params_attr_group = {
.attrs = boot_params_version_attrs,
.bin_attrs = boot_params_data_attrs,
};
@@ -79,12 +79,12 @@ static int get_setup_data_paddr(int nr, u64 *paddr)
*paddr = pa_data;
return 0;
}
- data = ioremap_cache(pa_data, sizeof(*data));
+ data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
pa_data = data->next;
- iounmap(data);
+ memunmap(data);
i++;
}
return -EINVAL;
@@ -97,17 +97,17 @@ static int __init get_setup_data_size(int nr, size_t *size)
u64 pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = ioremap_cache(pa_data, sizeof(*data));
+ data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
if (nr == i) {
*size = data->len;
- iounmap(data);
+ memunmap(data);
return 0;
}
pa_data = data->next;
- iounmap(data);
+ memunmap(data);
i++;
}
return -EINVAL;
@@ -127,12 +127,12 @@ static ssize_t type_show(struct kobject *kobj,
ret = get_setup_data_paddr(nr, &paddr);
if (ret)
return ret;
- data = ioremap_cache(paddr, sizeof(*data));
+ data = memremap(paddr, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
ret = sprintf(buf, "0x%x\n", data->type);
- iounmap(data);
+ memunmap(data);
return ret;
}
@@ -154,7 +154,7 @@ static ssize_t setup_data_data_read(struct file *fp,
ret = get_setup_data_paddr(nr, &paddr);
if (ret)
return ret;
- data = ioremap_cache(paddr, sizeof(*data));
+ data = memremap(paddr, sizeof(*data), MEMREMAP_WB);
if (!data)
return -ENOMEM;
@@ -170,15 +170,15 @@ static ssize_t setup_data_data_read(struct file *fp,
goto out;
ret = count;
- p = ioremap_cache(paddr + sizeof(*data), data->len);
+ p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB);
if (!p) {
ret = -ENOMEM;
goto out;
}
memcpy(buf, p + off, count);
- iounmap(p);
+ memunmap(p);
out:
- iounmap(data);
+ memunmap(data);
return ret;
}
@@ -202,7 +202,7 @@ static struct bin_attribute *setup_data_data_attrs[] = {
NULL,
};
-static struct attribute_group setup_data_attr_group = {
+static const struct attribute_group setup_data_attr_group = {
.attrs = setup_data_type_attrs,
.bin_attrs = setup_data_data_attrs,
};
@@ -250,13 +250,13 @@ static int __init get_setup_data_total_num(u64 pa_data, int *nr)
*nr = 0;
while (pa_data) {
*nr += 1;
- data = ioremap_cache(pa_data, sizeof(*data));
+ data = memremap(pa_data, sizeof(*data), MEMREMAP_WB);
if (!data) {
ret = -ENOMEM;
goto out;
}
pa_data = data->next;
- iounmap(data);
+ memunmap(data);
}
out:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d04e30e3c0ff..874827b0d7ca 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -263,7 +263,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
switch (kvm_read_and_reset_pf_reason()) {
default:
- trace_do_page_fault(regs, error_code);
+ do_page_fault(regs, error_code);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
@@ -455,7 +455,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
static void __init kvm_apf_trap_init(void)
{
- set_intr_gate(14, async_page_fault);
+ update_intr_gate(X86_TRAP_PF, async_page_fault);
}
void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index a870910c8565..f0e64db18ac8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -21,6 +21,25 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
+static void refresh_ldt_segments(void)
+{
+#ifdef CONFIG_X86_64
+ unsigned short sel;
+
+ /*
+ * Make sure that the cached DS and ES descriptors match the updated
+ * LDT.
+ */
+ savesegment(ds, sel);
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
+ loadsegment(ds, sel);
+
+ savesegment(es, sel);
+ if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
+ loadsegment(es, sel);
+#endif
+}
+
/* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *__mm)
{
@@ -32,6 +51,8 @@ static void flush_ldt(void *__mm)
pc = &mm->context;
set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+
+ refresh_ldt_segments();
}
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8c53c5d7a1bc..00bc751c861c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -26,18 +26,6 @@
#include <asm/set_memory.h>
#include <asm/debugreg.h>
-static void set_idt(void *newidt, __u16 limit)
-{
- struct desc_ptr curidt;
-
- /* ia32 supports unaliged loads & stores */
- curidt.size = limit;
- curidt.address = (unsigned long)newidt;
-
- load_idt(&curidt);
-}
-
-
static void set_gdt(void *newgdt, __u16 limit)
{
struct desc_ptr curgdt;
@@ -245,7 +233,7 @@ void machine_kexec(struct kimage *image)
* If you want to load them you must set up your own idt & gdt.
*/
set_gdt(phys_to_virt(0), 0);
- set_idt(phys_to_virt(0), 0);
+ idt_invalidate(phys_to_virt(0));
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a30473c23..1f790cf9d38f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
- set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
err:
free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
.alloc_pgt_page = alloc_pgt_page,
.context = image,
.page_flag = __PAGE_KERNEL_LARGE_EXEC,
+ .kernpg_flag = _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
pgd_t *level4p;
@@ -334,7 +335,8 @@ void machine_kexec(struct kimage *image)
image->start = relocate_kernel((unsigned long)image->head,
(unsigned long)page_list,
image->start,
- image->preserve_context);
+ image->preserve_context,
+ sme_active());
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -602,3 +604,22 @@ void arch_kexec_unprotect_crashkres(void)
{
kexec_mark_crashkres(false);
}
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+ /*
+ * If SME is active we need to be sure that kexec pages are
+ * not encrypted because when we boot to the new kernel the
+ * pages won't be accessed encrypted (initially).
+ */
+ return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+ /*
+ * If SME is active we need to reset the pages back to being
+ * an encrypted mapping before freeing them.
+ */
+ set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f67bd3205df7..62e7d70aadd5 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
+#include <asm/unwind.h>
#if 0
#define DEBUGP(fmt, ...) \
@@ -213,7 +214,7 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
- *para = NULL;
+ *para = NULL, *orc = NULL, *orc_ip = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -225,6 +226,10 @@ int module_finalize(const Elf_Ehdr *hdr,
locks = s;
if (!strcmp(".parainstructions", secstrings + s->sh_name))
para = s;
+ if (!strcmp(".orc_unwind", secstrings + s->sh_name))
+ orc = s;
+ if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
+ orc_ip = s;
}
if (alt) {
@@ -248,6 +253,10 @@ int module_finalize(const Elf_Ehdr *hdr,
/* make jump label nops */
jump_label_apply_nops(me);
+ if (orc && orc_ip)
+ unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
+ (void *)orc->sh_addr, orc->sh_size);
+
return 0;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0d904d759ff1..5cbb3177ed17 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -429,16 +429,16 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
}
}
-static struct mpf_intel *mpf_found;
+static unsigned long mpf_base;
static unsigned long __init get_mpc_size(unsigned long physptr)
{
struct mpc_table *mpc;
unsigned long size;
- mpc = early_ioremap(physptr, PAGE_SIZE);
+ mpc = early_memremap(physptr, PAGE_SIZE);
size = mpc->length;
- early_iounmap(mpc, PAGE_SIZE);
+ early_memunmap(mpc, PAGE_SIZE);
apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size);
return size;
@@ -450,7 +450,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
unsigned long size;
size = get_mpc_size(mpf->physptr);
- mpc = early_ioremap(mpf->physptr, size);
+ mpc = early_memremap(mpf->physptr, size);
+
/*
* Read the physical hardware table. Anything here will
* override the defaults.
@@ -461,10 +462,10 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
#endif
pr_err("BIOS bug, MP table errors detected!...\n");
pr_cont("... disabling SMP support. (tell your hw vendor)\n");
- early_iounmap(mpc, size);
+ early_memunmap(mpc, size);
return -1;
}
- early_iounmap(mpc, size);
+ early_memunmap(mpc, size);
if (early)
return -1;
@@ -497,12 +498,12 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
*/
void __init default_get_smp_config(unsigned int early)
{
- struct mpf_intel *mpf = mpf_found;
+ struct mpf_intel *mpf;
if (!smp_found_config)
return;
- if (!mpf)
+ if (!mpf_base)
return;
if (acpi_lapic && early)
@@ -515,6 +516,12 @@ void __init default_get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
+ mpf = early_memremap(mpf_base, sizeof(*mpf));
+ if (!mpf) {
+ pr_err("MPTABLE: error mapping MP table\n");
+ return;
+ }
+
pr_info("Intel MultiProcessor Specification v1.%d\n",
mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -529,7 +536,7 @@ void __init default_get_smp_config(unsigned int early)
/*
* Now see if we need to read further.
*/
- if (mpf->feature1 != 0) {
+ if (mpf->feature1) {
if (early) {
/*
* local APIC has default address
@@ -542,8 +549,10 @@ void __init default_get_smp_config(unsigned int early)
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
- if (check_physptr(mpf, early))
+ if (check_physptr(mpf, early)) {
+ early_memunmap(mpf, sizeof(*mpf));
return;
+ }
} else
BUG();
@@ -552,6 +561,8 @@ void __init default_get_smp_config(unsigned int early)
/*
* Only use the first configuration found.
*/
+
+ early_memunmap(mpf, sizeof(*mpf));
}
static void __init smp_reserve_memory(struct mpf_intel *mpf)
@@ -561,15 +572,16 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
static int __init smp_scan_config(unsigned long base, unsigned long length)
{
- unsigned int *bp = phys_to_virt(base);
+ unsigned int *bp;
struct mpf_intel *mpf;
- unsigned long mem;
+ int ret = 0;
apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
base, base + length - 1);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
+ bp = early_memremap(base, length);
mpf = (struct mpf_intel *)bp;
if ((*bp == SMP_MAGIC_IDENT) &&
(mpf->length == 1) &&
@@ -579,24 +591,26 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 1;
#endif
- mpf_found = mpf;
+ mpf_base = base;
- pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
- (unsigned long long) virt_to_phys(mpf),
- (unsigned long long) virt_to_phys(mpf) +
- sizeof(*mpf) - 1, mpf);
+ pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
+ base, base + sizeof(*mpf) - 1, mpf);
- mem = virt_to_phys(mpf);
- memblock_reserve(mem, sizeof(*mpf));
+ memblock_reserve(base, sizeof(*mpf));
if (mpf->physptr)
smp_reserve_memory(mpf);
- return 1;
+ ret = 1;
}
- bp += 4;
+ early_memunmap(bp, length);
+
+ if (ret)
+ break;
+
+ base += 16;
length -= 16;
}
- return 0;
+ return ret;
}
void __init default_find_smp_config(void)
@@ -838,29 +852,40 @@ static int __init update_mp_table(void)
char oem[10];
struct mpf_intel *mpf;
struct mpc_table *mpc, *mpc_new;
+ unsigned long size;
if (!enable_update_mptable)
return 0;
- mpf = mpf_found;
- if (!mpf)
+ if (!mpf_base)
return 0;
+ mpf = early_memremap(mpf_base, sizeof(*mpf));
+ if (!mpf) {
+ pr_err("MPTABLE: mpf early_memremap() failed\n");
+ return 0;
+ }
+
/*
* Now see if we need to go further.
*/
- if (mpf->feature1 != 0)
- return 0;
+ if (mpf->feature1)
+ goto do_unmap_mpf;
if (!mpf->physptr)
- return 0;
+ goto do_unmap_mpf;
- mpc = phys_to_virt(mpf->physptr);
+ size = get_mpc_size(mpf->physptr);
+ mpc = early_memremap(mpf->physptr, size);
+ if (!mpc) {
+ pr_err("MPTABLE: mpc early_memremap() failed\n");
+ goto do_unmap_mpf;
+ }
if (!smp_check_mpc(mpc, oem, str))
- return 0;
+ goto do_unmap_mpc;
- pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf));
+ pr_info("mpf: %llx\n", (u64)mpf_base);
pr_info("physptr: %x\n", mpf->physptr);
if (mpc_new_phys && mpc->length > mpc_new_length) {
@@ -878,21 +903,32 @@ static int __init update_mp_table(void)
new = mpf_checksum((unsigned char *)mpc, mpc->length);
if (old == new) {
pr_info("mpc is readonly, please try alloc_mptable instead\n");
- return 0;
+ goto do_unmap_mpc;
}
pr_info("use in-position replacing\n");
} else {
+ mpc_new = early_memremap(mpc_new_phys, mpc_new_length);
+ if (!mpc_new) {
+ pr_err("MPTABLE: new mpc early_memremap() failed\n");
+ goto do_unmap_mpc;
+ }
mpf->physptr = mpc_new_phys;
- mpc_new = phys_to_virt(mpc_new_phys);
memcpy(mpc_new, mpc, mpc->length);
+ early_memunmap(mpc, size);
mpc = mpc_new;
+ size = mpc_new_length;
/* check if we can modify that */
if (mpc_new_phys - mpf->physptr) {
struct mpf_intel *mpf_new;
/* steal 16 bytes from [0, 1k) */
+ mpf_new = early_memremap(0x400 - 16, sizeof(*mpf_new));
+ if (!mpf_new) {
+ pr_err("MPTABLE: new mpf early_memremap() failed\n");
+ goto do_unmap_mpc;
+ }
pr_info("mpf new: %x\n", 0x400 - 16);
- mpf_new = phys_to_virt(0x400 - 16);
memcpy(mpf_new, mpf, 16);
+ early_memunmap(mpf, sizeof(*mpf));
mpf = mpf_new;
mpf->physptr = mpc_new_phys;
}
@@ -909,6 +945,12 @@ static int __init update_mp_table(void)
*/
replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
+do_unmap_mpc:
+ early_memunmap(mpc, size);
+
+do_unmap_mpf:
+ early_memunmap(mpf, sizeof(*mpf));
+
return 0;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 446c8aa09b9b..35aafc95e4b8 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -39,26 +39,26 @@
#include <trace/events/nmi.h>
struct nmi_desc {
- spinlock_t lock;
+ raw_spinlock_t lock;
struct list_head head;
};
static struct nmi_desc nmi_desc[NMI_MAX] =
{
{
- .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
.head = LIST_HEAD_INIT(nmi_desc[0].head),
},
{
- .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
.head = LIST_HEAD_INIT(nmi_desc[1].head),
},
{
- .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
.head = LIST_HEAD_INIT(nmi_desc[2].head),
},
{
- .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
.head = LIST_HEAD_INIT(nmi_desc[3].head),
},
@@ -163,7 +163,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
init_irq_work(&action->irq_work, nmi_max_handler);
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
/*
* Indicate if there are multiple registrations on the
@@ -181,7 +181,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
else
list_add_tail_rcu(&action->list, &desc->head);
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
EXPORT_SYMBOL(__register_nmi_handler);
@@ -192,7 +192,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
struct nmiaction *n;
unsigned long flags;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
list_for_each_entry_rcu(n, &desc->head, list) {
/*
@@ -207,7 +207,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
}
}
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bc0a849589bb..a14df9eecfed 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.safe_halt = native_safe_halt,
.halt = native_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = paravirt_nop,
-#endif
};
__visible struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 5e16d3f29594..0accc2404b92 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -93,9 +93,12 @@ again:
if (gfpflags_allow_blocking(flag)) {
page = dma_alloc_from_contiguous(dev, count, get_order(size),
flag);
- if (page && page_to_phys(page) + size > dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
+ if (page) {
+ addr = phys_to_dma(dev, page_to_phys(page));
+ if (addr + size > dma_mask) {
+ dma_release_from_contiguous(dev, page, count);
+ page = NULL;
+ }
}
}
/* fallback */
@@ -104,7 +107,7 @@ again:
if (!page)
return NULL;
- addr = page_to_phys(page);
+ addr = phys_to_dma(dev, page_to_phys(page));
if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index a6d404087fe3..4fc3cb60ea11 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -32,7 +32,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- dma_addr_t bus = page_to_phys(page) + offset;
+ dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return NOMMU_MAPPING_ERROR;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e23577e17cf..677077510e30 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,12 +6,14 @@
#include <linux/swiotlb.h>
#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
+#include <linux/mem_encrypt.h>
#include <asm/iommu.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/xen/swiotlb-xen.h>
#include <asm/iommu_table.h>
+
int swiotlb __read_mostly;
void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -79,8 +81,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
pci_swiotlb_late_init);
/*
- * if 4GB or more detected (and iommu=off not set) return 1
- * and set swiotlb to 1.
+ * If 4GB or more detected (and iommu=off not set) or if SME is active
+ * then set swiotlb to 1 and return 1.
*/
int __init pci_swiotlb_detect_4gb(void)
{
@@ -89,6 +91,15 @@ int __init pci_swiotlb_detect_4gb(void)
if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
+
+ /*
+ * If SME is active then swiotlb will be set to 1 so that bounce
+ * buffers are allocated and used for devices that do not support
+ * the addressing range required for the encryption mask.
+ */
+ if (sme_active())
+ swiotlb = 1;
+
return swiotlb;
}
IOMMU_INIT(pci_swiotlb_detect_4gb,
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 91271122f0df..502a77d0adb0 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -16,7 +16,6 @@ void __init x86_early_init_platform_quirks(void)
x86_platform.legacy.reserve_bios_regions = 1;
break;
case X86_SUBARCH_XEN:
- case X86_SUBARCH_LGUEST:
x86_platform.legacy.devices.pnpbios = 0;
x86_platform.legacy.rtc = 0;
break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca198080ea9..bd6b85fac666 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
return ret;
}
#endif
+
void stop_this_cpu(void *dummy)
{
local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
- for (;;)
- halt();
+ for (;;) {
+ /*
+ * Use wbinvd followed by hlt to stop the processor. This
+ * provides support for kexec on a processor that supports
+ * SME. With kexec, going from SME inactive to SME active
+ * requires clearing cache entries so that addresses without
+ * the encryption bit set don't corrupt the same physical
+ * address that has the encryption bit set when caches are
+ * flushed. To achieve this a wbinvd is performed followed by
+ * a hlt. Even if the processor is not in the kexec/SME
+ * scenario this only adds a wbinvd to a halting processor.
+ */
+ asm volatile("wbinvd; hlt" : : : "memory");
+ }
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c6d6dc5f8bb2..11966251cd42 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -56,7 +56,7 @@
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all)
@@ -68,7 +68,7 @@ void __show_regs(struct pt_regs *regs, int all)
if (user_mode(regs)) {
sp = regs->sp;
- ss = regs->ss & 0xffff;
+ ss = regs->ss;
gs = get_user_gs(regs);
} else {
sp = kernel_stack_pointer(regs);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c3169be4c596..302e7b2572d1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
-#include <asm/intel_rdt.h>
+#include <asm/intel_rdt_sched.h>
#include <asm/unistd.h>
#ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */
@@ -69,8 +69,7 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
- (void *)regs->ip);
+ printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
regs->sp, regs->flags);
if (regs->orig_ax != -1)
@@ -149,6 +148,123 @@ void release_thread(struct task_struct *dead_task)
}
}
+enum which_selector {
+ FS,
+ GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available. The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+ unsigned short selector,
+ enum which_selector which)
+{
+ if (likely(selector == 0)) {
+ /*
+ * On Intel (without X86_BUG_NULL_SEG), the segment base could
+ * be the pre-existing saved base or it could be zero. On AMD
+ * (with X86_BUG_NULL_SEG), the segment base could be almost
+ * anything.
+ *
+ * This branch is very hot (it's hit twice on almost every
+ * context switch between 64-bit programs), and avoiding
+ * the RDMSR helps a lot, so we just assume that whatever
+ * value is already saved is correct. This matches historical
+ * Linux behavior, so it won't break existing applications.
+ *
+ * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+ * report that the base is zero, it needs to actually be zero:
+ * see the corresponding logic in load_seg_legacy.
+ */
+ } else {
+ /*
+ * If the selector is 1, 2, or 3, then the base is zero on
+ * !X86_BUG_NULL_SEG CPUs and could be anything on
+ * X86_BUG_NULL_SEG CPUs. In the latter case, Linux
+ * has never attempted to preserve the base across context
+ * switches.
+ *
+ * If selector > 3, then it refers to a real segment, and
+ * saving the base isn't necessary.
+ */
+ if (which == FS)
+ prev_p->thread.fsbase = 0;
+ else
+ prev_p->thread.gsbase = 0;
+ }
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+ savesegment(fs, task->thread.fsindex);
+ savesegment(gs, task->thread.gsindex);
+ save_base_legacy(task, task->thread.fsindex, FS);
+ save_base_legacy(task, task->thread.gsindex, GS);
+}
+
+static __always_inline void loadseg(enum which_selector which,
+ unsigned short sel)
+{
+ if (which == FS)
+ loadsegment(fs, sel);
+ else
+ load_gs_index(sel);
+}
+
+static __always_inline void load_seg_legacy(unsigned short prev_index,
+ unsigned long prev_base,
+ unsigned short next_index,
+ unsigned long next_base,
+ enum which_selector which)
+{
+ if (likely(next_index <= 3)) {
+ /*
+ * The next task is using 64-bit TLS, is not using this
+ * segment at all, or is having fun with arcane CPU features.
+ */
+ if (next_base == 0) {
+ /*
+ * Nasty case: on AMD CPUs, we need to forcibly zero
+ * the base.
+ */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ loadseg(which, __USER_DS);
+ loadseg(which, next_index);
+ } else {
+ /*
+ * We could try to exhaustively detect cases
+ * under which we can skip the segment load,
+ * but there's really only one case that matters
+ * for performance: if both the previous and
+ * next states are fully zeroed, we can skip
+ * the load.
+ *
+ * (This assumes that prev_base == 0 has no
+ * false positives. This is the case on
+ * Intel-style CPUs.)
+ */
+ if (likely(prev_index | next_index | prev_base))
+ loadseg(which, next_index);
+ }
+ } else {
+ if (prev_index != next_index)
+ loadseg(which, next_index);
+ wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+ next_base);
+ }
+ } else {
+ /*
+ * The next task is using a real segment. Loading the selector
+ * is sufficient.
+ */
+ loadseg(which, next_index);
+ }
+}
+
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -229,10 +345,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
+ WARN_ON_ONCE(regs != current_pt_regs());
+
+ if (static_cpu_has(X86_BUG_NULL_SEG)) {
+ /* Loading zero below won't clear the base. */
+ loadsegment(fs, __USER_DS);
+ load_gs_index(__USER_DS);
+ }
+
loadsegment(fs, 0);
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
+
regs->ip = new_ip;
regs->sp = new_sp;
regs->cs = _cs;
@@ -277,7 +402,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned prev_fsindex, prev_gsindex;
+
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
+ this_cpu_read(irq_count) != -1);
switch_fpu_prepare(prev_fpu, cpu);
@@ -286,8 +413,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, prev_fsindex);
- savesegment(gs, prev_gsindex);
+ save_fsgs(prev_p);
/*
* Load TLS before restoring any segments so that segment loads
@@ -326,108 +452,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
- /*
- * Switch FS and GS.
- *
- * These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. The bases
- * don't necessarily match the selectors, as user code can do
- * any number of things to cause them to be inconsistent.
- *
- * We don't promise to preserve the bases if the selectors are
- * nonzero. We also don't promise to preserve the base if the
- * selector is zero and the base doesn't match whatever was
- * most recently passed to ARCH_SET_FS/GS. (If/when the
- * FSGSBASE instructions are enabled, we'll need to offer
- * stronger guarantees.)
- *
- * As an invariant,
- * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
- * impossible.
- */
- if (next->fsindex) {
- /* Loading a nonzero value into FS sets the index and base. */
- loadsegment(fs, next->fsindex);
- } else {
- if (next->fsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_fsindex)
- loadsegment(fs, 0);
- wrmsrl(MSR_FS_BASE, next->fsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- */
- loadsegment(fs, __USER_DS);
- loadsegment(fs, 0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_FS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->fsbase || prev_fsindex)
- loadsegment(fs, 0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_fsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_fsindex)
- prev->fsbase = 0;
- prev->fsindex = prev_fsindex;
-
- if (next->gsindex) {
- /* Loading a nonzero value into GS sets the index and base. */
- load_gs_index(next->gsindex);
- } else {
- if (next->gsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_gsindex)
- load_gs_index(0);
- wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- *
- * This contains a pointless SWAPGS pair.
- * Fixing it would involve an explicit check
- * for Xen or a new pvop.
- */
- load_gs_index(__USER_DS);
- load_gs_index(0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_GS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->gsbase || prev_gsindex)
- load_gs_index(0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_gsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_gsindex)
- prev->gsbase = 0;
- prev->gsindex = prev_gsindex;
+ load_seg_legacy(prev->fsindex, prev->fsbase,
+ next->fsindex, next->fsbase, FS);
+ load_seg_legacy(prev->gsindex, prev->gsbase,
+ next->gsindex, next->gsbase, GS);
switch_fpu_finish(next_fpu, cpu);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 0bee04d41bed..eaa591cfd98b 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -1,6 +1,7 @@
/*
* This file contains work-arounds for x86 and x86_64 platform bugs.
*/
+#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/irq.h>
@@ -656,3 +657,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
#endif
+
+bool x86_apple_machine;
+EXPORT_SYMBOL(x86_apple_machine);
+
+void __init early_platform_quirks(void)
+{
+ x86_apple_machine = dmi_match(DMI_SYS_VENDOR, "Apple Inc.") ||
+ dmi_match(DMI_SYS_VENDOR, "Apple Computer, Inc.");
+}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a56bf6051f4e..54984b142641 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -38,8 +38,6 @@
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
-static const struct desc_ptr no_idt = {};
-
/*
* This is set if we need to go through the 'emergency' path.
* When machine_emergency_restart() is called, we may be on
@@ -638,7 +636,7 @@ static void native_machine_emergency_restart(void)
break;
case BOOT_TRIPLE:
- load_idt(&no_idt);
+ idt_invalidate(NULL);
__asm__ __volatile__("int3");
/* We're probably dead after this, but... */
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 98111b38ebfd..307d3bac5f04 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -47,6 +47,7 @@ relocate_kernel:
* %rsi page_list
* %rdx start address
* %rcx preserve_context
+ * %r8 sme_active
*/
/* Save the CPU context, used for jumping back */
@@ -71,6 +72,9 @@ relocate_kernel:
pushq $0
popfq
+ /* Save SME active flag */
+ movq %r8, %r12
+
/*
* get physical address of control page now
* this is impossible after page table switch
@@ -132,6 +136,16 @@ identity_mapped:
/* Flush the TLB (needed?) */
movq %r9, %cr3
+ /*
+ * If SME is active, there could be old encrypted cache line
+ * entries that will conflict with the now unencrypted memory
+ * used by kexec. Flush the caches before copying the kernel.
+ */
+ testq %r12, %r12
+ jz 1f
+ wbinvd
+1:
+
movq %rcx, %r11
call swap_pages
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3486d0498800..d84afb0a322d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -69,6 +69,7 @@
#include <linux/crash_dump.h>
#include <linux/tboot.h>
#include <linux/jiffies.h>
+#include <linux/mem_encrypt.h>
#include <linux/usb/xhci-dbgp.h>
#include <video/edid.h>
@@ -115,6 +116,7 @@
#include <asm/microcode.h>
#include <asm/mmu_context.h>
#include <asm/kaslr.h>
+#include <asm/unwind.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -374,6 +376,14 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
+ /*
+ * If SME is active, this memory will be marked encrypted by the
+ * kernel when it is accessed (including relocation). However, the
+ * ramdisk image was loaded decrypted by the bootloader, so make
+ * sure that it is encrypted before accessing it.
+ */
+ sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
+
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
@@ -890,7 +900,7 @@ void __init setup_arch(char **cmdline_p)
*/
olpc_ofw_detect();
- early_trap_init();
+ idt_setup_early_traps();
early_cpu_init();
early_ioremap_init();
@@ -1161,7 +1171,7 @@ void __init setup_arch(char **cmdline_p)
init_mem_mapping();
- early_trap_pf_init();
+ idt_setup_early_pf();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
@@ -1206,6 +1216,8 @@ void __init setup_arch(char **cmdline_p)
io_delay_init();
+ early_platform_quirks();
+
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
@@ -1310,6 +1322,8 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_apply_memmap_quirks();
#endif
+
+ unwind_init();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 10edd1e69a68..28dafed6c682 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -155,13 +155,10 @@ static void __init pcpup_populate_pte(unsigned long addr)
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
- struct desc_struct gdt;
+ struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
+ 0xFFFFF);
- pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
- 0x2 | DESCTYPE_S, 0x8);
- gdt.s = 1;
- write_gdt_entry(get_cpu_gdt_rw(cpu),
- GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
#endif
}
@@ -171,7 +168,7 @@ void __init setup_per_cpu_areas(void)
unsigned long delta;
int rc;
- pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+ pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n",
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cc30a74e4adb..e04442345fc0 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -256,7 +256,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = current->sas_ss_sp + current->sas_ss_size;
} else if (IS_ENABLED(CONFIG_X86_32) &&
!onsigstack &&
- (regs->ss & 0xffff) != __USER_DS &&
+ regs->ss != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
/* This is the legacy signal stack switching. */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d798c0da451c..5c574dff4c1a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -254,84 +254,45 @@ finish:
}
/*
- * Reschedule call back.
+ * Reschedule call back. KVM uses this interrupt to force a cpu out of
+ * guest mode
*/
-static inline void __smp_reschedule_interrupt(void)
-{
- inc_irq_stat(irq_resched_count);
- scheduler_ipi();
-}
-
__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
- __smp_reschedule_interrupt();
- /*
- * KVM uses this interrupt to force a cpu out of guest mode
- */
-}
-
-__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
-{
- /*
- * Need to call irq_enter() before calling the trace point.
- * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
- * scheduler_ipi(). This is OK, since those functions are allowed
- * to nest.
- */
- ipi_entering_ack_irq();
- trace_reschedule_entry(RESCHEDULE_VECTOR);
- __smp_reschedule_interrupt();
- trace_reschedule_exit(RESCHEDULE_VECTOR);
- exiting_irq();
- /*
- * KVM uses this interrupt to force a cpu out of guest mode
- */
-}
+ inc_irq_stat(irq_resched_count);
-static inline void __smp_call_function_interrupt(void)
-{
- generic_smp_call_function_interrupt();
- inc_irq_stat(irq_call_count);
+ if (trace_resched_ipi_enabled()) {
+ /*
+ * scheduler_ipi() might call irq_enter() as well, but
+ * nested calls are fine.
+ */
+ irq_enter();
+ trace_reschedule_entry(RESCHEDULE_VECTOR);
+ scheduler_ipi();
+ trace_reschedule_exit(RESCHEDULE_VECTOR);
+ irq_exit();
+ return;
+ }
+ scheduler_ipi();
}
__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
- __smp_call_function_interrupt();
- exiting_irq();
-}
-
-__visible void __irq_entry
-smp_trace_call_function_interrupt(struct pt_regs *regs)
-{
- ipi_entering_ack_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
- __smp_call_function_interrupt();
- trace_call_function_exit(CALL_FUNCTION_VECTOR);
- exiting_irq();
-}
-
-static inline void __smp_call_function_single_interrupt(void)
-{
- generic_smp_call_function_single_interrupt();
inc_irq_stat(irq_call_count);
-}
-
-__visible void __irq_entry
-smp_call_function_single_interrupt(struct pt_regs *regs)
-{
- ipi_entering_ack_irq();
- __smp_call_function_single_interrupt();
+ generic_smp_call_function_interrupt();
+ trace_call_function_exit(CALL_FUNCTION_VECTOR);
exiting_irq();
}
-__visible void __irq_entry
-smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r)
{
ipi_entering_ack_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
- __smp_call_function_single_interrupt();
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
exiting_irq();
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b474c8de7fba..cd6622c3204e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
-static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
+static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
+ int *cpu0_nmi_registered)
{
volatile u32 *trampoline_status =
(volatile u32 *) __va(real_mode_header->trampoline_status);
@@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long start_ip = real_mode_header->trampoline_start;
unsigned long boot_error = 0;
- int cpu0_nmi_registered = 0;
unsigned long timeout;
idle->thread.sp = (unsigned long)task_pt_regs(idle);
@@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
- &cpu0_nmi_registered);
+ cpu0_nmi_registered);
if (!boot_error) {
/*
@@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
*/
smpboot_restore_warm_reset_vector();
}
- /*
- * Clean up the nmi handler. Do this after the callin and callout sync
- * to avoid impact of possible long unregister time.
- */
- if (cpu0_nmi_registered)
- unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
return boot_error;
}
@@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
+ int cpu0_nmi_registered = 0;
unsigned long flags;
- int err;
+ int err, ret = 0;
WARN_ON(irqs_disabled());
@@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
common_cpu_up(cpu, tidle);
- err = do_boot_cpu(apicid, cpu, tidle);
+ err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
if (err) {
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
- return -EIO;
+ ret = -EIO;
+ goto unreg_nmi;
}
/*
@@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
touch_nmi_watchdog();
}
- return 0;
+unreg_nmi:
+ /*
+ * Clean up the nmi handler. Do this after the callin and callout sync
+ * to avoid impact of possible long unregister time.
+ */
+ if (cpu0_nmi_registered)
+ unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
+
+ return ret;
}
/**
@@ -1457,7 +1461,7 @@ __init void prefill_possible_map(void)
/* nr_cpu_ids could be reduced via nr_cpus= */
if (possible > nr_cpu_ids) {
- pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",
+ pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
possible, nr_cpu_ids);
possible = nr_cpu_ids;
}
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 5f25cfbd952e..5ee663836c08 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -13,7 +13,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
unsigned long addr, seg;
addr = regs->ip;
- seg = regs->cs & 0xffff;
+ seg = regs->cs;
if (v8086_mode(regs)) {
addr = (addr & 0xffff) + (seg << 4);
return addr;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 213ddf3e937d..73e4d28112f8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,6 +21,7 @@
#include <asm/compat.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
+#include <asm/mpx.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -100,8 +101,8 @@ out:
return error;
}
-static void find_start_end(unsigned long flags, unsigned long *begin,
- unsigned long *end)
+static void find_start_end(unsigned long addr, unsigned long flags,
+ unsigned long *begin, unsigned long *end)
{
if (!in_compat_syscall() && (flags & MAP_32BIT)) {
/* This is usually used needed to map code in small
@@ -120,7 +121,10 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
}
*begin = get_mmap_base(1);
- *end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
+ if (in_compat_syscall())
+ *end = task_size_32bit();
+ else
+ *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
}
unsigned long
@@ -132,10 +136,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_unmapped_area_info info;
unsigned long begin, end;
+ addr = mpx_unmapped_area_check(addr, len, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
if (flags & MAP_FIXED)
return addr;
- find_start_end(flags, &begin, &end);
+ find_start_end(addr, flags, &begin, &end);
if (len > end)
return -ENOMEM;
@@ -171,6 +179,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ addr = mpx_unmapped_area_check(addr, len, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
@@ -195,6 +207,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = get_mmap_base(0);
+
+ /*
+ * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+ * in the full address space.
+ *
+ * !in_compat_syscall() check to avoid high addresses for x32.
+ */
+ if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+ info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
+
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index dcd699baea1b..a106b9719c58 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx,
while (n-- > 0) {
if (LDT_empty(info) || LDT_zero(info)) {
- desc->a = desc->b = 0;
+ memset(desc, 0, sizeof(*desc));
} else {
fill_ldt(desc, info);
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index 15515132bf0d..c6636d1f60b9 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -4,57 +4,38 @@
* Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
*
*/
-#include <asm/hw_irq.h>
-#include <asm/desc.h>
+#include <linux/jump_label.h>
#include <linux/atomic.h>
-atomic_t trace_idt_ctr = ATOMIC_INIT(0);
-struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
- (unsigned long) trace_idt_table };
-
-/* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+#include <asm/hw_irq.h>
+#include <asm/desc.h>
-static int trace_irq_vector_refcount;
-static DEFINE_MUTEX(irq_vector_mutex);
+DEFINE_STATIC_KEY_FALSE(trace_pagefault_key);
-static void set_trace_idt_ctr(int val)
+int trace_pagefault_reg(void)
{
- atomic_set(&trace_idt_ctr, val);
- /* Ensure the trace_idt_ctr is set before sending IPI */
- wmb();
+ static_branch_inc(&trace_pagefault_key);
+ return 0;
}
-static void switch_idt(void *arg)
+void trace_pagefault_unreg(void)
{
- unsigned long flags;
-
- local_irq_save(flags);
- load_current_idt();
- local_irq_restore(flags);
+ static_branch_dec(&trace_pagefault_key);
}
-int trace_irq_vector_regfunc(void)
+#ifdef CONFIG_SMP
+
+DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key);
+
+int trace_resched_ipi_reg(void)
{
- mutex_lock(&irq_vector_mutex);
- if (!trace_irq_vector_refcount) {
- set_trace_idt_ctr(1);
- smp_call_function(switch_idt, NULL, 0);
- switch_idt(NULL);
- }
- trace_irq_vector_refcount++;
- mutex_unlock(&irq_vector_mutex);
+ static_branch_inc(&trace_resched_ipi_key);
return 0;
}
-void trace_irq_vector_unregfunc(void)
+void trace_resched_ipi_unreg(void)
{
- mutex_lock(&irq_vector_mutex);
- trace_irq_vector_refcount--;
- if (!trace_irq_vector_refcount) {
- set_trace_idt_ctr(0);
- smp_call_function(switch_idt, NULL, 0);
- switch_idt(NULL);
- }
- mutex_unlock(&irq_vector_mutex);
+ static_branch_dec(&trace_resched_ipi_key);
}
+
+#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bf54309b85da..34ea3651362e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -38,11 +38,6 @@
#include <linux/smp.h>
#include <linux/io.h>
-#ifdef CONFIG_EISA
-#include <linux/ioport.h>
-#include <linux/eisa.h>
-#endif
-
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
@@ -70,20 +65,13 @@
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
-
-/* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
#include <asm/proto.h>
#endif
-/* Must be page-aligned because the real IDT is used in a fixmap. */
-gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
-
DECLARE_BITMAP(used_vectors, NR_VECTORS);
-EXPORT_SYMBOL_GPL(used_vectors);
static inline void cond_local_irq_enable(struct pt_regs *regs)
{
@@ -935,87 +923,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
}
#endif
-/* Set of traps needed for early debugging. */
-void __init early_trap_init(void)
-{
- /*
- * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
- * is ready in cpu_init() <-- trap_init(). Before trap_init(),
- * CPU runs at ring 0 so it is impossible to hit an invalid
- * stack. Using the original stack works well enough at this
- * early stage. DEBUG_STACK will be equipped after cpu_init() in
- * trap_init().
- *
- * We don't need to set trace_idt_table like set_intr_gate(),
- * since we don't have trace_debug and it will be reset to
- * 'debug' in trap_init() by set_intr_gate_ist().
- */
- set_intr_gate_notrace(X86_TRAP_DB, debug);
- /* int3 can be called from all */
- set_system_intr_gate(X86_TRAP_BP, &int3);
-#ifdef CONFIG_X86_32
- set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
- load_idt(&idt_descr);
-}
-
-void __init early_trap_pf_init(void)
-{
-#ifdef CONFIG_X86_64
- set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
-}
-
void __init trap_init(void)
{
- int i;
-
-#ifdef CONFIG_EISA
- void __iomem *p = early_ioremap(0x0FFFD9, 4);
-
- if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
- EISA_bus = 1;
- early_iounmap(p, 4);
-#endif
-
- set_intr_gate(X86_TRAP_DE, divide_error);
- set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
- /* int4 can be called from all */
- set_system_intr_gate(X86_TRAP_OF, &overflow);
- set_intr_gate(X86_TRAP_BR, bounds);
- set_intr_gate(X86_TRAP_UD, invalid_op);
- set_intr_gate(X86_TRAP_NM, device_not_available);
-#ifdef CONFIG_X86_32
- set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
-#else
- set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
-#endif
- set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
- set_intr_gate(X86_TRAP_TS, invalid_TSS);
- set_intr_gate(X86_TRAP_NP, segment_not_present);
- set_intr_gate(X86_TRAP_SS, stack_segment);
- set_intr_gate(X86_TRAP_GP, general_protection);
- set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
- set_intr_gate(X86_TRAP_MF, coprocessor_error);
- set_intr_gate(X86_TRAP_AC, alignment_check);
-#ifdef CONFIG_X86_MCE
- set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
-#endif
- set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
-
- /* Reserve all the builtin and the syscall vector: */
- for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
- set_bit(i, used_vectors);
-
-#ifdef CONFIG_IA32_EMULATION
- set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
- set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
-
-#ifdef CONFIG_X86_32
- set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
- set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
+ idt_setup_traps();
/*
* Set the IDT descriptor to a fixed read-only location, so that the
@@ -1030,20 +940,9 @@ void __init trap_init(void)
*/
cpu_init();
- /*
- * X86_TRAP_DB and X86_TRAP_BP have been set
- * in early_trap_init(). However, ITS works only after
- * cpu_init() loads TSS. See comments in early_trap_init().
- */
- set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
- /* int3 can be called from all */
- set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+ idt_setup_ist_traps();
x86_init.irqs.trap_init();
-#ifdef CONFIG_X86_64
- memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
- set_nmi_gate(X86_TRAP_DB, &debug);
- set_nmi_gate(X86_TRAP_BP, &int3);
-#endif
+ idt_setup_debugidt_traps();
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index b9389d72b2f7..d145a0b1f529 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -10,20 +10,22 @@
#define FRAME_HEADER_SIZE (sizeof(long) * 2)
-/*
- * This disables KASAN checking when reading a value from another task's stack,
- * since the other task could be running on another CPU and could have poisoned
- * the stack in the meantime.
- */
-#define READ_ONCE_TASK_STACK(task, x) \
-({ \
- unsigned long val; \
- if (task == current) \
- val = READ_ONCE(x); \
- else \
- val = READ_ONCE_NOCHECK(x); \
- val; \
-})
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+
+ return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return NULL;
+
+ return state->regs ? &state->regs->ip : state->bp + 1;
+}
static void unwind_dump(struct unwind_state *state)
{
@@ -66,15 +68,6 @@ static void unwind_dump(struct unwind_state *state)
}
}
-unsigned long unwind_get_return_address(struct unwind_state *state)
-{
- if (unwind_done(state))
- return 0;
-
- return __kernel_text_address(state->ip) ? state->ip : 0;
-}
-EXPORT_SYMBOL_GPL(unwind_get_return_address);
-
static size_t regs_size(struct pt_regs *regs)
{
/* x86_32 regs from kernel mode are two words shorter: */
@@ -91,10 +84,8 @@ static bool in_entry_code(unsigned long ip)
if (addr >= __entry_text_start && addr < __entry_text_end)
return true;
-#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
if (addr >= __irqentry_text_start && addr < __irqentry_text_end)
return true;
-#endif
return false;
}
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index 039f36738e49..4f0e17b90463 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -19,6 +19,11 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+ return NULL;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
new file mode 100644
index 000000000000..570b70d3f604
--- /dev/null
+++ b/arch/x86/kernel/unwind_orc.c
@@ -0,0 +1,582 @@
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+#include <asm/orc_types.h>
+#include <asm/orc_lookup.h>
+#include <asm/sections.h>
+
+#define orc_warn(fmt, ...) \
+ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+
+extern int __start_orc_unwind_ip[];
+extern int __stop_orc_unwind_ip[];
+extern struct orc_entry __start_orc_unwind[];
+extern struct orc_entry __stop_orc_unwind[];
+
+static DEFINE_MUTEX(sort_mutex);
+int *cur_orc_ip_table = __start_orc_unwind_ip;
+struct orc_entry *cur_orc_table = __start_orc_unwind;
+
+unsigned int lookup_num_blocks;
+bool orc_init;
+
+static inline unsigned long orc_ip(const int *ip)
+{
+ return (unsigned long)ip + *ip;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+ unsigned int num_entries, unsigned long ip)
+{
+ int *first = ip_table;
+ int *last = ip_table + num_entries - 1;
+ int *mid = first, *found = first;
+
+ if (!num_entries)
+ return NULL;
+
+ /*
+ * Do a binary range search to find the rightmost duplicate of a given
+ * starting address. Some entries are section terminators which are
+ * "weak" entries for ensuring there are no gaps. They should be
+ * ignored when they conflict with a real entry.
+ */
+ while (first <= last) {
+ mid = first + ((last - first) / 2);
+
+ if (orc_ip(mid) <= ip) {
+ found = mid;
+ first = mid + 1;
+ } else
+ last = mid - 1;
+ }
+
+ return u_table + (found - ip_table);
+}
+
+#ifdef CONFIG_MODULES
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+ struct module *mod;
+
+ mod = __module_address(ip);
+ if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
+ return NULL;
+ return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind,
+ mod->arch.num_orcs, ip);
+}
+#else
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+ return NULL;
+}
+#endif
+
+static struct orc_entry *orc_find(unsigned long ip)
+{
+ if (!orc_init)
+ return NULL;
+
+ /* For non-init vmlinux addresses, use the fast lookup table: */
+ if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
+ unsigned int idx, start, stop;
+
+ idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
+
+ if (unlikely((idx >= lookup_num_blocks-1))) {
+ orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
+ idx, lookup_num_blocks, ip);
+ return NULL;
+ }
+
+ start = orc_lookup[idx];
+ stop = orc_lookup[idx + 1] + 1;
+
+ if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
+ (__start_orc_unwind + stop > __stop_orc_unwind))) {
+ orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
+ idx, lookup_num_blocks, start, stop, ip);
+ return NULL;
+ }
+
+ return __orc_find(__start_orc_unwind_ip + start,
+ __start_orc_unwind + start, stop - start, ip);
+ }
+
+ /* vmlinux .init slow lookup: */
+ if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+ return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+ __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
+
+ /* Module lookup: */
+ return orc_module_find(ip);
+}
+
+static void orc_sort_swap(void *_a, void *_b, int size)
+{
+ struct orc_entry *orc_a, *orc_b;
+ struct orc_entry orc_tmp;
+ int *a = _a, *b = _b, tmp;
+ int delta = _b - _a;
+
+ /* Swap the .orc_unwind_ip entries: */
+ tmp = *a;
+ *a = *b + delta;
+ *b = tmp - delta;
+
+ /* Swap the corresponding .orc_unwind entries: */
+ orc_a = cur_orc_table + (a - cur_orc_ip_table);
+ orc_b = cur_orc_table + (b - cur_orc_ip_table);
+ orc_tmp = *orc_a;
+ *orc_a = *orc_b;
+ *orc_b = orc_tmp;
+}
+
+static int orc_sort_cmp(const void *_a, const void *_b)
+{
+ struct orc_entry *orc_a;
+ const int *a = _a, *b = _b;
+ unsigned long a_val = orc_ip(a);
+ unsigned long b_val = orc_ip(b);
+
+ if (a_val > b_val)
+ return 1;
+ if (a_val < b_val)
+ return -1;
+
+ /*
+ * The "weak" section terminator entries need to always be on the left
+ * to ensure the lookup code skips them in favor of real entries.
+ * These terminator entries exist to handle any gaps created by
+ * whitelisted .o files which didn't get objtool generation.
+ */
+ orc_a = cur_orc_table + (a - cur_orc_ip_table);
+ return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
+}
+
+#ifdef CONFIG_MODULES
+void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
+ void *_orc, size_t orc_size)
+{
+ int *orc_ip = _orc_ip;
+ struct orc_entry *orc = _orc;
+ unsigned int num_entries = orc_ip_size / sizeof(int);
+
+ WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
+ orc_size % sizeof(*orc) != 0 ||
+ num_entries != orc_size / sizeof(*orc));
+
+ /*
+ * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
+ * associate an .orc_unwind_ip table entry with its corresponding
+ * .orc_unwind entry so they can both be swapped.
+ */
+ mutex_lock(&sort_mutex);
+ cur_orc_ip_table = orc_ip;
+ cur_orc_table = orc;
+ sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
+ mutex_unlock(&sort_mutex);
+
+ mod->arch.orc_unwind_ip = orc_ip;
+ mod->arch.orc_unwind = orc;
+ mod->arch.num_orcs = num_entries;
+}
+#endif
+
+void __init unwind_init(void)
+{
+ size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
+ size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
+ size_t num_entries = orc_ip_size / sizeof(int);
+ struct orc_entry *orc;
+ int i;
+
+ if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
+ orc_size % sizeof(struct orc_entry) != 0 ||
+ num_entries != orc_size / sizeof(struct orc_entry)) {
+ orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n");
+ return;
+ }
+
+ /* Sort the .orc_unwind and .orc_unwind_ip tables: */
+ sort(__start_orc_unwind_ip, num_entries, sizeof(int), orc_sort_cmp,
+ orc_sort_swap);
+
+ /* Initialize the fast lookup table: */
+ lookup_num_blocks = orc_lookup_end - orc_lookup;
+ for (i = 0; i < lookup_num_blocks-1; i++) {
+ orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+ num_entries,
+ LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
+ if (!orc) {
+ orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n");
+ return;
+ }
+
+ orc_lookup[i] = orc - __start_orc_unwind;
+ }
+
+ /* Initialize the ending block: */
+ orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
+ LOOKUP_STOP_IP);
+ if (!orc) {
+ orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n");
+ return;
+ }
+ orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
+
+ orc_init = true;
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+
+ return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return NULL;
+
+ if (state->regs)
+ return &state->regs->ip;
+
+ if (state->sp)
+ return (unsigned long *)state->sp - 1;
+
+ return NULL;
+}
+
+static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+ size_t len)
+{
+ struct stack_info *info = &state->stack_info;
+
+ /*
+ * If the address isn't on the current stack, switch to the next one.
+ *
+ * We may have to traverse multiple stacks to deal with the possibility
+ * that info->next_sp could point to an empty stack and the address
+ * could be on a subsequent stack.
+ */
+ while (!on_stack(info, (void *)addr, len))
+ if (get_stack_info(info->next_sp, state->task, info,
+ &state->stack_mask))
+ return false;
+
+ return true;
+}
+
+static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
+ unsigned long *val)
+{
+ if (!stack_access_ok(state, addr, sizeof(long)))
+ return false;
+
+ *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
+ return true;
+}
+
+#define REGS_SIZE (sizeof(struct pt_regs))
+#define SP_OFFSET (offsetof(struct pt_regs, sp))
+#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
+#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
+
+static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
+ unsigned long *ip, unsigned long *sp, bool full)
+{
+ size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
+ size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
+ struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ if (!stack_access_ok(state, addr, regs_size))
+ return false;
+
+ *ip = regs->ip;
+ *sp = regs->sp;
+
+ return true;
+ }
+
+ if (!stack_access_ok(state, addr, sp_offset))
+ return false;
+
+ *ip = regs->ip;
+
+ if (user_mode(regs)) {
+ if (!stack_access_ok(state, addr + sp_offset,
+ REGS_SIZE - SP_OFFSET))
+ return false;
+
+ *sp = regs->sp;
+ } else
+ *sp = (unsigned long)&regs->sp;
+
+ return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
+ enum stack_type prev_type = state->stack_info.type;
+ struct orc_entry *orc;
+ struct pt_regs *ptregs;
+ bool indirect = false;
+
+ if (unwind_done(state))
+ return false;
+
+ /* Don't let modules unload while we're reading their ORC data. */
+ preempt_disable();
+
+ /* Have we reached the end? */
+ if (state->regs && user_mode(state->regs))
+ goto done;
+
+ /*
+ * Find the orc_entry associated with the text address.
+ *
+ * Decrement call return addresses by one so they work for sibling
+ * calls and calls to noreturn functions.
+ */
+ orc = orc_find(state->signal ? state->ip : state->ip - 1);
+ if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
+ goto done;
+ orig_ip = state->ip;
+
+ /* Find the previous frame's stack: */
+ switch (orc->sp_reg) {
+ case ORC_REG_SP:
+ sp = state->sp + orc->sp_offset;
+ break;
+
+ case ORC_REG_BP:
+ sp = state->bp + orc->sp_offset;
+ break;
+
+ case ORC_REG_SP_INDIRECT:
+ sp = state->sp + orc->sp_offset;
+ indirect = true;
+ break;
+
+ case ORC_REG_BP_INDIRECT:
+ sp = state->bp + orc->sp_offset;
+ indirect = true;
+ break;
+
+ case ORC_REG_R10:
+ if (!state->regs || !state->full_regs) {
+ orc_warn("missing regs for base reg R10 at ip %p\n",
+ (void *)state->ip);
+ goto done;
+ }
+ sp = state->regs->r10;
+ break;
+
+ case ORC_REG_R13:
+ if (!state->regs || !state->full_regs) {
+ orc_warn("missing regs for base reg R13 at ip %p\n",
+ (void *)state->ip);
+ goto done;
+ }
+ sp = state->regs->r13;
+ break;
+
+ case ORC_REG_DI:
+ if (!state->regs || !state->full_regs) {
+ orc_warn("missing regs for base reg DI at ip %p\n",
+ (void *)state->ip);
+ goto done;
+ }
+ sp = state->regs->di;
+ break;
+
+ case ORC_REG_DX:
+ if (!state->regs || !state->full_regs) {
+ orc_warn("missing regs for base reg DX at ip %p\n",
+ (void *)state->ip);
+ goto done;
+ }
+ sp = state->regs->dx;
+ break;
+
+ default:
+ orc_warn("unknown SP base reg %d for ip %p\n",
+ orc->sp_reg, (void *)state->ip);
+ goto done;
+ }
+
+ if (indirect) {
+ if (!deref_stack_reg(state, sp, &sp))
+ goto done;
+ }
+
+ /* Find IP, SP and possibly regs: */
+ switch (orc->type) {
+ case ORC_TYPE_CALL:
+ ip_p = sp - sizeof(long);
+
+ if (!deref_stack_reg(state, ip_p, &state->ip))
+ goto done;
+
+ state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ state->ip, (void *)ip_p);
+
+ state->sp = sp;
+ state->regs = NULL;
+ state->signal = false;
+ break;
+
+ case ORC_TYPE_REGS:
+ if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+ orc_warn("can't dereference registers at %p for ip %p\n",
+ (void *)sp, (void *)orig_ip);
+ goto done;
+ }
+
+ state->regs = (struct pt_regs *)sp;
+ state->full_regs = true;
+ state->signal = true;
+ break;
+
+ case ORC_TYPE_REGS_IRET:
+ if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+ orc_warn("can't dereference iret registers at %p for ip %p\n",
+ (void *)sp, (void *)orig_ip);
+ goto done;
+ }
+
+ ptregs = container_of((void *)sp, struct pt_regs, ip);
+ if ((unsigned long)ptregs >= prev_sp &&
+ on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
+ state->regs = ptregs;
+ state->full_regs = false;
+ } else
+ state->regs = NULL;
+
+ state->signal = true;
+ break;
+
+ default:
+ orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+ break;
+ }
+
+ /* Find BP: */
+ switch (orc->bp_reg) {
+ case ORC_REG_UNDEFINED:
+ if (state->regs && state->full_regs)
+ state->bp = state->regs->bp;
+ break;
+
+ case ORC_REG_PREV_SP:
+ if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
+ goto done;
+ break;
+
+ case ORC_REG_BP:
+ if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
+ goto done;
+ break;
+
+ default:
+ orc_warn("unknown BP base reg %d for ip %p\n",
+ orc->bp_reg, (void *)orig_ip);
+ goto done;
+ }
+
+ /* Prevent a recursive loop due to bad ORC data: */
+ if (state->stack_info.type == prev_type &&
+ on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
+ state->sp <= prev_sp) {
+ orc_warn("stack going in the wrong direction? ip=%p\n",
+ (void *)orig_ip);
+ goto done;
+ }
+
+ preempt_enable();
+ return true;
+
+done:
+ preempt_enable();
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long *first_frame)
+{
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+
+ /*
+ * Refuse to unwind the stack of a task while it's executing on another
+ * CPU. This check is racy, but that's ok: the unwinder has other
+ * checks to prevent it from going off the rails.
+ */
+ if (task_on_another_cpu(task))
+ goto done;
+
+ if (regs) {
+ if (user_mode(regs))
+ goto done;
+
+ state->ip = regs->ip;
+ state->sp = kernel_stack_pointer(regs);
+ state->bp = regs->bp;
+ state->regs = regs;
+ state->full_regs = true;
+ state->signal = true;
+
+ } else if (task == current) {
+ asm volatile("lea (%%rip), %0\n\t"
+ "mov %%rsp, %1\n\t"
+ "mov %%rbp, %2\n\t"
+ : "=r" (state->ip), "=r" (state->sp),
+ "=r" (state->bp));
+
+ } else {
+ struct inactive_task_frame *frame = (void *)task->thread.sp;
+
+ state->sp = task->thread.sp;
+ state->bp = READ_ONCE_NOCHECK(frame->bp);
+ state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
+ }
+
+ if (get_stack_info((unsigned long *)state->sp, state->task,
+ &state->stack_info, &state->stack_mask))
+ return;
+
+ /*
+ * The caller can provide the address of the first frame directly
+ * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+ * to start unwinding at. Skip ahead until we reach it.
+ */
+
+ /* When starting from regs, skip the regs frame: */
+ if (regs) {
+ unwind_next_frame(state);
+ return;
+ }
+
+ /* Otherwise, skip ahead to the user-specified starting frame: */
+ while (!unwind_done(state) &&
+ (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+ state->sp <= (unsigned long)first_frame))
+ unwind_next_frame(state);
+
+ return;
+
+done:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return;
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c8a3b61be0aa..f05f00acac89 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -24,6 +24,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page_types.h>
+#include <asm/orc_lookup.h>
#include <asm/cache.h>
#include <asm/boot.h>
@@ -148,6 +149,8 @@ SECTIONS
BUG_TABLE
+ ORC_UNWIND_TABLE
+
. = ALIGN(PAGE_SIZE);
__vvar_page = .;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 2688c7dc5323..3ea624452f93 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -89,6 +89,5 @@ config KVM_MMU_AUDIT
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig
-source drivers/lguest/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59ca2eea522c..0099e10eb045 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
/*
- * The existing code assumes virtual address is 48-bit in the canonical
- * address checks; exit if it is ever changed.
+ * The existing code assumes virtual address is 48-bit or 57-bit in the
+ * canonical address checks; exit if it is ever changed.
*/
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best && ((best->eax & 0xff00) >> 8) != 48 &&
- ((best->eax & 0xff00) >> 8) != 0)
- return -EINVAL;
+ if (best) {
+ int vaddr_bits = (best->eax & 0xff00) >> 8;
+
+ if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
+ return -EINVAL;
+ }
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+ kvm_mmu_reset_context(vcpu);
kvm_pmu_refresh(vcpu);
return 0;
@@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+ F(AVX512VBMI) | F(LA57) | F(PKU) |
+ 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -469,7 +474,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
/* PKU is not yet implemented for shadow paging. */
- if (!tdp_enabled)
+ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
@@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
}
-void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx, bool check_limit)
{
u32 function = *eax, index = *ecx;
struct kvm_cpuid_entry2 *best;
+ bool entry_found = true;
best = kvm_find_cpuid_entry(vcpu, function, index);
- if (!best)
+ if (!best) {
+ entry_found = false;
+ if (!check_limit)
+ goto out;
+
best = check_cpuid_limit(vcpu, function, index);
+ }
+out:
if (best) {
*eax = best->eax;
*ebx = best->ebx;
@@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
*edx = best->edx;
} else
*eax = *ebx = *ecx = *edx = 0;
- trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
+ trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
+ return entry_found;
}
EXPORT_SYMBOL_GPL(kvm_cpuid);
@@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
+ kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index da6728383052..1ea3c0e1e3a9 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,6 +3,7 @@
#include "x86.h"
#include <asm/cpu.h>
+#include <asm/processor.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
bool kvm_mpx_supported(void);
@@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
-void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx, bool check_limit);
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
return vcpu->arch.maxphyaddr;
}
-static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- if (!static_cpu_has(X86_FEATURE_XSAVE))
- return false;
-
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
- return best && (best->ecx & bit(X86_FEATURE_XSAVE));
-}
-
-static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
- return best && (best->edx & bit(X86_FEATURE_MTRR));
-}
-
-static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
-}
+struct cpuid_reg {
+ u32 function;
+ u32 index;
+ int reg;
+};
-static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_SMEP));
-}
+static const struct cpuid_reg reverse_cpuid[] = {
+ [CPUID_1_EDX] = { 1, 0, CPUID_EDX},
+ [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX},
+ [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
+ [CPUID_1_ECX] = { 1, 0, CPUID_ECX},
+ [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
+ [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
+ [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
+ [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
+ [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
+ [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
+ [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
+ [CPUID_6_EAX] = { 6, 0, CPUID_EAX},
+ [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
+ [CPUID_7_ECX] = { 7, 0, CPUID_ECX},
+ [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},
+};
-static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_SMAP));
-}
+ unsigned x86_leaf = x86_feature / 32;
-static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
+ BUILD_BUG_ON(!__builtin_constant_p(x86_leaf));
+ BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
+ BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
+ return reverse_cpuid[x86_leaf];
}
-static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu)
+static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature)
{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ecx & bit(X86_FEATURE_PKU));
-}
+ struct kvm_cpuid_entry2 *entry;
+ const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
-static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
+ entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index);
+ if (!entry)
+ return NULL;
- best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- return best && (best->edx & bit(X86_FEATURE_LM));
+ switch (cpuid.reg) {
+ case CPUID_EAX:
+ return &entry->eax;
+ case CPUID_EBX:
+ return &entry->ebx;
+ case CPUID_ECX:
+ return &entry->ecx;
+ case CPUID_EDX:
+ return &entry->edx;
+ default:
+ BUILD_BUG();
+ return NULL;
+ }
}
-static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
+static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature)
{
- struct kvm_cpuid_entry2 *best;
+ int *reg;
- best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- return best && (best->ecx & bit(X86_FEATURE_OSVW));
-}
+ if (x86_feature == X86_FEATURE_XSAVE &&
+ !static_cpu_has(X86_FEATURE_XSAVE))
+ return false;
-static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
+ reg = guest_cpuid_get_register(vcpu, x86_feature);
+ if (!reg)
+ return false;
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
- return best && (best->ecx & bit(X86_FEATURE_PCID));
+ return *reg & bit(x86_feature);
}
-static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
+static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature)
{
- struct kvm_cpuid_entry2 *best;
+ int *reg;
- best = kvm_find_cpuid_entry(vcpu, 1, 0);
- return best && (best->ecx & bit(X86_FEATURE_X2APIC));
+ reg = guest_cpuid_get_register(vcpu, x86_feature);
+ if (reg)
+ *reg &= ~bit(x86_feature);
}
static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
@@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
}
-static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- return best && (best->edx & bit(X86_FEATURE_GBPAGES));
-}
-
-static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_RTM));
-}
-
-static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->ebx & bit(X86_FEATURE_MPX));
-}
-
-static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- return best && (best->edx & bit(X86_FEATURE_RDTSCP));
-}
-
-/*
- * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
- */
-#define BIT_NRIPS 3
-
-static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
-
- /*
- * NRIPS is a scattered cpuid feature, so we can't use
- * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
- * position 8, not 3).
- */
- return best && (best->edx & bit(BIT_NRIPS));
-}
-#undef BIT_NRIPS
-
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb0055953fbc..16bf6655aa85 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -28,6 +28,7 @@
#include "x86.h"
#include "tss.h"
+#include "mmu.h"
/*
* Operand types
@@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
ulong la;
u32 lim;
u16 sel;
+ u8 va_bits;
la = seg_base(ctxt, addr.seg) + addr.ea;
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
*linear = la;
- if (is_noncanonical_address(la))
+ va_bits = ctxt_virt_addr_bits(ctxt);
+ if (get_canonical(la, va_bits) != la)
goto bad;
- *max_size = min_t(u64, ~0u, (1ull << 48) - la);
+ *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
if (size > *max_size)
goto bad;
break;
@@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
sizeof(base3), &ctxt->exception);
if (ret != X86EMUL_CONTINUE)
return ret;
- if (is_noncanonical_address(get_desc_base(&seg_desc) |
- ((u64)base3 << 32)))
+ if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+ ((u64)base3 << 32), ctxt))
return emulate_gp(ctxt, 0);
}
load:
@@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
eax = 0x80000001;
ecx = 0;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return edx & bit(X86_FEATURE_LM);
}
@@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
u32 eax, ebx, ecx, edx;
eax = ecx = 0;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
&& ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
&& edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
@@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
eax = 0x00000000;
ecx = 0x00000000;
- ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
/*
* Intel ("GenuineIntel")
* remark: Intel CPUs only support "syscall" in 64bit
@@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
- if (is_noncanonical_address(rcx) ||
- is_noncanonical_address(rdx))
+ if (emul_is_noncanonical_address(rcx, ctxt) ||
+ emul_is_noncanonical_address(rdx, ctxt))
return emulate_gp(ctxt, 0);
break;
}
@@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
/*
* Check MOVBE is set in the guest-visible CPUID leaf.
*/
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
if (!(ecx & FFL(MOVBE)))
return emulate_ud(ctxt);
@@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
if (rc != X86EMUL_CONTINUE)
return rc;
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
- is_noncanonical_address(desc_ptr.address))
+ emul_is_noncanonical_address(desc_ptr.address, ctxt))
return emulate_gp(ctxt, 0);
if (lgdt)
ctxt->ops->set_gdt(ctxt, &desc_ptr);
@@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
eax = reg_read(ctxt, VCPU_REGS_RAX);
ecx = reg_read(ctxt, VCPU_REGS_RCX);
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
*reg_write(ctxt, VCPU_REGS_RAX) = eax;
*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
@@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
{
u32 eax = 1, ebx, ecx = 0, edx;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
if (!(edx & FFL(FXSR)))
return emulate_ud(ctxt);
@@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
u64 rsvd = 0;
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
- if (efer & EFER_LMA)
- rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
+ if (efer & EFER_LMA) {
+ u64 maxphyaddr;
+ u32 eax = 0x80000008;
+
+ if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
+ NULL, false))
+ maxphyaddr = eax & 0xff;
+ else
+ maxphyaddr = 36;
+ rsvd = rsvd_bits(maxphyaddr, 62);
+ }
if (new_val & rsvd)
return emulate_gp(ctxt, 0);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 337b6d2730fa..dc97f2544b6f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
pdata);
}
+ case HV_X64_MSR_TSC_FREQUENCY:
+ data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
+ break;
+ case HV_X64_MSR_APIC_FREQUENCY:
+ data = APIC_BUS_FREQUENCY;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
switch (code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
- kvm_vcpu_on_spin(vcpu);
+ kvm_vcpu_on_spin(vcpu, true);
break;
case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2595f9..9add410f195f 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
@@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
}
-static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
-{
- return kvm_x86_ops->get_pkru(vcpu);
-}
-
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags |= HF_GUEST_MASK;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 589dcc117086..aaf10b6f5380 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -54,8 +54,6 @@
#define PRIu64 "u"
#define PRIo64 "o"
-#define APIC_BUS_CYCLE_NS 1
-
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define apic_debug(fmt, arg...)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 29caa2c3dff9..215721e1426a 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,6 +12,9 @@
#define KVM_APIC_SHORT_MASK 0xc0000
#define KVM_APIC_DEST_MASK 0x800
+#define APIC_BUS_CYCLE_NS 1
+#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
+
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b1dd114956a..eca30c1eb1d9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -108,7 +108,7 @@ module_param(dbg, bool, 0644);
(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
#define PT64_DIR_BASE_ADDR_MASK \
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
#define PT64_LVL_ADDR_MASK(level) \
@@ -126,7 +126,7 @@ module_param(dbg, bool, 0644);
* PT32_LEVEL_BITS))) - 1))
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
- | shadow_x_mask | shadow_nx_mask)
+ | shadow_x_mask | shadow_nx_mask | shadow_me_mask)
#define ACC_EXEC_MASK 1
#define ACC_WRITE_MASK PT_WRITABLE_MASK
@@ -186,6 +186,7 @@ static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
static u64 __read_mostly shadow_mmio_value;
static u64 __read_mostly shadow_present_mask;
+static u64 __read_mostly shadow_me_mask;
/*
* SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
@@ -349,7 +350,7 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
*/
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- u64 acc_track_mask)
+ u64 acc_track_mask, u64 me_mask)
{
BUG_ON(!dirty_mask != !accessed_mask);
BUG_ON(!accessed_mask && !acc_track_mask);
@@ -362,6 +363,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
shadow_x_mask = x_mask;
shadow_present_mask = p_mask;
shadow_acc_track_mask = acc_track_mask;
+ shadow_me_mask = me_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -2167,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
}
struct mmu_page_path {
- struct kvm_mmu_page *parent[PT64_ROOT_LEVEL];
- unsigned int idx[PT64_ROOT_LEVEL];
+ struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
+ unsigned int idx[PT64_ROOT_MAX_LEVEL];
};
#define for_each_sp(pvec, sp, parents, i) \
@@ -2383,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
iterator->level = vcpu->arch.mmu.shadow_root_level;
- if (iterator->level == PT64_ROOT_LEVEL &&
- vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL &&
+ if (iterator->level == PT64_ROOT_4LEVEL &&
+ vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
!vcpu->arch.mmu.direct_map)
--iterator->level;
@@ -2433,7 +2435,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
- shadow_user_mask | shadow_x_mask;
+ shadow_user_mask | shadow_x_mask | shadow_me_mask;
if (sp_ad_disabled(sp))
spte |= shadow_acc_track_value;
@@ -2608,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
sp = list_last_entry(&kvm->arch.active_mmu_pages,
struct kvm_mmu_page, link);
- kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
-
- return true;
+ return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
}
/*
@@ -2745,6 +2745,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
pte_access &= ~ACC_WRITE_MASK;
spte |= (u64)pfn << PAGE_SHIFT;
+ spte |= shadow_me_mask;
if (pte_access & ACC_WRITE_MASK) {
@@ -3259,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn_t gfn, bool prefault)
@@ -3299,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
- make_mmu_pages_available(vcpu);
+ if (make_mmu_pages_available(vcpu) < 0)
+ goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
@@ -3323,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL &&
- (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL ||
+ if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
+ (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
vcpu->arch.mmu.direct_map)) {
hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -3376,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
struct kvm_mmu_page *sp;
unsigned i;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
- make_mmu_pages_available(vcpu);
- sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
+ if(make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 1;
+ }
+ sp = kvm_mmu_get_page(vcpu, 0, 0,
+ vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -3389,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
MMU_WARN_ON(VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
- make_mmu_pages_available(vcpu);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 1;
+ }
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
root = __pa(sp->spt);
@@ -3420,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* Do we shadow a long mode page table? If so we need to
* write-protect the guests page table root.
*/
- if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
MMU_WARN_ON(VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
- make_mmu_pages_available(vcpu);
- sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
- 0, ACC_ALL);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 1;
+ }
+ sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3442,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* the shadow page table may be a PAE or a long mode page table.
*/
pm_mask = PT_PRESENT_MASK;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL)
+ if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
for (i = 0; i < 4; ++i) {
@@ -3460,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
- make_mmu_pages_available(vcpu);
+ if (make_mmu_pages_available(vcpu) < 0) {
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 1;
+ }
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
root = __pa(sp->spt);
@@ -3475,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* If we shadow a 32 bit page table with a long mode page
* table we enter this path.
*/
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
if (vcpu->arch.mmu.lm_root == NULL) {
/*
* The additional page necessary for this is only
@@ -3520,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
- if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
sp = page_header(root);
mmu_sync_children(vcpu, sp);
@@ -3585,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
+ /*
+ * A nested guest cannot use the MMIO cache if it is using nested
+ * page tables, because cr2 is a nGPA while the cache stores GPAs.
+ */
+ if (mmu_is_nested(vcpu))
+ return false;
+
if (direct)
return vcpu_match_mmio_gpa(vcpu, addr);
@@ -3596,7 +3618,7 @@ static bool
walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
struct kvm_shadow_walk_iterator iterator;
- u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+ u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
int root, leaf;
bool reserved = false;
@@ -3637,7 +3659,23 @@ exit:
return reserved;
}
-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+/*
+ * Return values of handle_mmio_page_fault:
+ * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+ * directly.
+ * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+ * fault path update the mmio spte.
+ * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+ * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
+ */
+enum {
+ RET_MMIO_PF_EMULATE = 1,
+ RET_MMIO_PF_INVALID = 2,
+ RET_MMIO_PF_RETRY = 0,
+ RET_MMIO_PF_BUG = -1
+};
+
+static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
u64 spte;
bool reserved;
@@ -3869,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
- make_mmu_pages_available(vcpu);
+ if (make_mmu_pages_available(vcpu) < 0)
+ goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
@@ -4022,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
rsvd_check->rsvd_bits_mask[1][0] =
rsvd_check->rsvd_bits_mask[0][0];
break;
- case PT64_ROOT_LEVEL:
+ case PT64_ROOT_5LEVEL:
+ rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd |
+ nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+ rsvd_bits(maxphyaddr, 51);
+ rsvd_check->rsvd_bits_mask[1][4] =
+ rsvd_check->rsvd_bits_mask[0][4];
+ case PT64_ROOT_4LEVEL:
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
@@ -4052,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
{
__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
cpuid_maxphyaddr(vcpu), context->root_level,
- context->nx, guest_cpuid_has_gbpages(vcpu),
+ context->nx,
+ guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
is_pse(vcpu), guest_cpuid_is_amd(vcpu));
}
@@ -4062,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
{
u64 bad_mt_xwr;
+ rsvd_check->rsvd_bits_mask[0][4] =
+ rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
rsvd_check->rsvd_bits_mask[0][3] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
rsvd_check->rsvd_bits_mask[0][2] =
@@ -4071,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
/* large page */
+ rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
rsvd_check->rsvd_bits_mask[1][2] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
@@ -4106,16 +4155,28 @@ void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+ struct rsvd_bits_validate *shadow_zero_check;
+ int i;
/*
* Passing "true" to the last argument is okay; it adds a check
* on bit 8 of the SPTEs which KVM doesn't use anyway.
*/
- __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+ shadow_zero_check = &context->shadow_zero_check;
+ __reset_rsvds_bits_mask(vcpu, shadow_zero_check,
boot_cpu_data.x86_phys_bits,
context->shadow_root_level, uses_nx,
- guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
- true);
+ guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+ is_pse(vcpu), true);
+
+ if (!shadow_me_mask)
+ return;
+
+ for (i = context->shadow_root_level; --i >= 0;) {
+ shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+ shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+ }
+
}
EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
@@ -4133,17 +4194,29 @@ static void
reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
{
+ struct rsvd_bits_validate *shadow_zero_check;
+ int i;
+
+ shadow_zero_check = &context->shadow_zero_check;
+
if (boot_cpu_is_amd())
- __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+ __reset_rsvds_bits_mask(vcpu, shadow_zero_check,
boot_cpu_data.x86_phys_bits,
context->shadow_root_level, false,
boot_cpu_has(X86_FEATURE_GBPAGES),
true, true);
else
- __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+ __reset_rsvds_bits_mask_ept(shadow_zero_check,
boot_cpu_data.x86_phys_bits,
false);
+ if (!shadow_me_mask)
+ return;
+
+ for (i = context->shadow_root_level; --i >= 0;) {
+ shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
+ shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
+ }
}
/*
@@ -4158,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
boot_cpu_data.x86_phys_bits, execonly);
}
+#define BYTE_MASK(access) \
+ ((1 & (access) ? 2 : 0) | \
+ (2 & (access) ? 4 : 0) | \
+ (3 & (access) ? 8 : 0) | \
+ (4 & (access) ? 16 : 0) | \
+ (5 & (access) ? 32 : 0) | \
+ (6 & (access) ? 64 : 0) | \
+ (7 & (access) ? 128 : 0))
+
+
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
- unsigned bit, byte, pfec;
- u8 map;
- bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
+ unsigned byte;
+
+ const u8 x = BYTE_MASK(ACC_EXEC_MASK);
+ const u8 w = BYTE_MASK(ACC_WRITE_MASK);
+ const u8 u = BYTE_MASK(ACC_USER_MASK);
+
+ bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
+ bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
+ bool cr0_wp = is_write_protection(vcpu);
- cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
- cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
- pfec = byte << 1;
- map = 0;
- wf = pfec & PFERR_WRITE_MASK;
- uf = pfec & PFERR_USER_MASK;
- ff = pfec & PFERR_FETCH_MASK;
+ unsigned pfec = byte << 1;
+
/*
- * PFERR_RSVD_MASK bit is set in PFEC if the access is not
- * subject to SMAP restrictions, and cleared otherwise. The
- * bit is only meaningful if the SMAP bit is set in CR4.
+ * Each "*f" variable has a 1 bit for each UWX value
+ * that causes a fault with the given PFEC.
*/
- smapf = !(pfec & PFERR_RSVD_MASK);
- for (bit = 0; bit < 8; ++bit) {
- x = bit & ACC_EXEC_MASK;
- w = bit & ACC_WRITE_MASK;
- u = bit & ACC_USER_MASK;
-
- if (!ept) {
- /* Not really needed: !nx will cause pte.nx to fault */
- x |= !mmu->nx;
- /* Allow supervisor writes if !cr0.wp */
- w |= !is_write_protection(vcpu) && !uf;
- /* Disallow supervisor fetches of user code if cr4.smep */
- x &= !(cr4_smep && u && !uf);
-
- /*
- * SMAP:kernel-mode data accesses from user-mode
- * mappings should fault. A fault is considered
- * as a SMAP violation if all of the following
- * conditions are ture:
- * - X86_CR4_SMAP is set in CR4
- * - A user page is accessed
- * - Page fault in kernel mode
- * - if CPL = 3 or X86_EFLAGS_AC is clear
- *
- * Here, we cover the first three conditions.
- * The fourth is computed dynamically in
- * permission_fault() and is in smapf.
- *
- * Also, SMAP does not affect instruction
- * fetches, add the !ff check here to make it
- * clearer.
- */
- smap = cr4_smap && u && !uf && !ff;
- }
- fault = (ff && !x) || (uf && !u) || (wf && !w) ||
- (smapf && smap);
- map |= fault << bit;
+ /* Faults from writes to non-writable pages */
+ u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+ /* Faults from user mode accesses to supervisor pages */
+ u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+ /* Faults from fetches of non-executable pages*/
+ u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+ /* Faults from kernel mode fetches of user pages */
+ u8 smepf = 0;
+ /* Faults from kernel mode accesses of user pages */
+ u8 smapf = 0;
+
+ if (!ept) {
+ /* Faults from kernel mode accesses to user pages */
+ u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
+
+ /* Not really needed: !nx will cause pte.nx to fault */
+ if (!mmu->nx)
+ ff = 0;
+
+ /* Allow supervisor writes if !cr0.wp */
+ if (!cr0_wp)
+ wf = (pfec & PFERR_USER_MASK) ? wf : 0;
+
+ /* Disallow supervisor fetches of user code if cr4.smep */
+ if (cr4_smep)
+ smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
+
+ /*
+ * SMAP:kernel-mode data accesses from user-mode
+ * mappings should fault. A fault is considered
+ * as a SMAP violation if all of the following
+ * conditions are ture:
+ * - X86_CR4_SMAP is set in CR4
+ * - A user page is accessed
+ * - The access is not a fetch
+ * - Page fault in kernel mode
+ * - if CPL = 3 or X86_EFLAGS_AC is clear
+ *
+ * Here, we cover the first three conditions.
+ * The fourth is computed dynamically in permission_fault();
+ * PFERR_RSVD_MASK bit will be set in PFEC if the access is
+ * *not* subject to SMAP restrictions.
+ */
+ if (cr4_smap)
+ smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
}
- mmu->permissions[byte] = map;
+
+ mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
}
}
@@ -4331,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
static void paging64_init_context(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
{
- paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+ int root_level = is_la57_mode(vcpu) ?
+ PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+
+ paging64_init_context_common(vcpu, context, root_level);
}
static void paging32_init_context(struct kvm_vcpu *vcpu,
@@ -4372,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
- context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+ context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
context->root_hpa = INVALID_PAGE;
context->direct_map = true;
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
@@ -4386,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->root_level = 0;
} else if (is_long_mode(vcpu)) {
context->nx = is_nx(vcpu);
- context->root_level = PT64_ROOT_LEVEL;
+ context->root_level = is_la57_mode(vcpu) ?
+ PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
reset_rsvds_bits_mask(vcpu, context);
context->gva_to_gpa = paging64_gva_to_gpa;
} else if (is_pae(vcpu)) {
@@ -4443,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
MMU_WARN_ON(VALID_PAGE(context->root_hpa));
- context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+ context->shadow_root_level = PT64_ROOT_4LEVEL;
context->nx = true;
context->ept_ad = accessed_dirty;
@@ -4452,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
- context->root_level = context->shadow_root_level;
+ context->root_level = PT64_ROOT_4LEVEL;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
context->base_role.ad_disabled = !accessed_dirty;
@@ -4497,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
} else if (is_long_mode(vcpu)) {
g_context->nx = is_nx(vcpu);
- g_context->root_level = PT64_ROOT_LEVEL;
+ g_context->root_level = is_la57_mode(vcpu) ?
+ PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
reset_rsvds_bits_mask(vcpu, g_context);
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
} else if (is_pae(vcpu)) {
@@ -4787,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
}
EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
+static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
{
LIST_HEAD(invalid_list);
if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES))
- return;
+ return 0;
while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) {
if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list))
@@ -4801,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
++vcpu->kvm->stat.mmu_recycled;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
+ if (!kvm_mmu_available_pages(vcpu->kvm))
+ return -ENOSPC;
+ return 0;
}
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
@@ -4808,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
{
int r, emulation_type = EMULTYPE_RETRY;
enum emulation_result er;
- bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
+ bool direct = vcpu->arch.mmu.direct_map;
+
+ /* With shadow page tables, fault_address contains a GVA or nGPA. */
+ if (vcpu->arch.mmu.direct_map) {
+ vcpu->arch.gpa_available = true;
+ vcpu->arch.gpa_val = cr2;
+ }
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
@@ -4820,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
if (r < 0)
return r;
+ /* Must be RET_MMIO_PF_INVALID. */
}
r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
@@ -4835,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* This can occur when using nested virtualization with nested
* paging in both guests. If true, we simply unprotect the page
* and resume the guest.
- *
- * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
- * in PFERR_NEXT_GUEST_PAGE)
*/
- if (error_code == PFERR_NESTED_GUEST_PAGE) {
+ if (vcpu->arch.mmu.direct_map &&
+ (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
return 1;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d7d248a000dd..64a2dbd2b1af 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,7 +37,8 @@
#define PT32_DIR_PSE36_MASK \
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
-#define PT64_ROOT_LEVEL 4
+#define PT64_ROOT_5LEVEL 5
+#define PT64_ROOT_4LEVEL 4
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
@@ -48,6 +49,9 @@
static inline u64 rsvd_bits(int s, int e)
{
+ if (e < s)
+ return 0;
+
return ((1ULL << (e - s + 1)) - 1) << s;
}
@@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-/*
- * Return values of handle_mmio_page_fault:
- * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
- * directly.
- * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
- * fault path update the mmio spte.
- * RET_MMIO_PF_RETRY: let CPU fault again on the address.
- * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
- */
-enum {
- RET_MMIO_PF_EMULATE = 1,
- RET_MMIO_PF_INVALID = 2,
- RET_MMIO_PF_RETRY = 0,
- RET_MMIO_PF_BUG = -1
-};
-
-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty);
@@ -185,7 +172,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
* index of the protection domain, so pte_pkey * 2 is
* is the index of the first bit for the domain.
*/
- pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+ pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = (pfec & ~1) +
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index dcce533d420c..d22ddbdf5e6e 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
+ if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
sp = page_header(root);
- __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
+ __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
return;
}
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 0149ac59c273..e9ea2d45ae66 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
* enable MTRRs and it is obviously undesirable to run the
* guest entirely with UC memory and we use WB.
*/
- if (guest_cpuid_has_mtrr(vcpu))
+ if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR))
return MTRR_TYPE_UNCACHABLE;
else
return MTRR_TYPE_WRBACK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b0454c7e4cff..86b68dc5a649 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
&map_writable))
return 0;
- if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
- walker.gfn, pfn, walker.pte_access, &r))
+ if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
return r;
/*
@@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
- make_mmu_pages_available(vcpu);
+ if (make_mmu_pages_available(vcpu) < 0)
+ goto out_unlock;
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1107626938cc..2c1cfe68a9af 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO);
static int vls = true;
module_param(vls, int, 0444);
-/* AVIC VM ID bit masks and lock */
-static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
-static DEFINE_SPINLOCK(avic_vm_id_lock);
+/* enable/disable Virtual GIF */
+static int vgif = true;
+module_param(vgif, int, 0444);
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
+static inline bool vgif_enabled(struct vcpu_svm *svm)
+{
+ return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
+}
+
static inline void enable_gif(struct vcpu_svm *svm)
{
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl |= V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
}
static inline void disable_gif(struct vcpu_svm *svm)
{
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}
static inline bool gif_set(struct vcpu_svm *svm)
{
- return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
+ if (vgif_enabled(svm))
+ return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+ else
+ return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static unsigned long iopm_base;
@@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid)
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
}
-static int get_npt_level(void)
+static int get_npt_level(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
- return PT64_ROOT_LEVEL;
+ return PT64_ROOT_4LEVEL;
#else
return PT32E_ROOT_LEVEL;
#endif
@@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.reinject;
+ bool reinject = vcpu->arch.exception.injected;
u32 error_code = vcpu->arch.exception.error_code;
/*
@@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
static void disable_nmi_singlestep(struct vcpu_svm *svm)
{
svm->nmi_singlestep = false;
+
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
/* Clear our flags if they were not set by the guest */
if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
@@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
*/
#define SVM_VM_DATA_HASH_BITS 8
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+static u32 next_vm_id = 0;
+static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
/* Note:
@@ -1100,7 +1117,7 @@ static __init int svm_hardware_setup(void)
if (vls) {
if (!npt_enabled ||
- !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
+ !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
!IS_ENABLED(CONFIG_X86_64)) {
vls = false;
} else {
@@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void)
}
}
+ if (vgif) {
+ if (!boot_cpu_has(X86_FEATURE_VGIF))
+ vgif = false;
+ else
+ pr_info("Virtual GIF supported\n");
+ }
+
return 0;
err:
@@ -1167,9 +1191,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb;
struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
- phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
- phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
- phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
+ phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
+ phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
+ phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1232,8 +1256,8 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_MWAIT);
}
- control->iopm_base_pa = iopm_base;
- control->msrpm_base_pa = __pa(svm->msrpm);
+ control->iopm_base_pa = __sme_set(iopm_base);
+ control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK;
init_seg(&save->es);
@@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
+ if (vgif) {
+ clr_intercept(svm, INTERCEPT_STGI);
+ clr_intercept(svm, INTERCEPT_CLGI);
+ svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1377,9 +1407,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return -EINVAL;
new_entry = READ_ONCE(*entry);
- new_entry = (page_to_phys(svm->avic_backing_page) &
- AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
- AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
+ new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
+ AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
+ AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
WRITE_ONCE(*entry, new_entry);
svm->avic_physical_id_cache = entry;
@@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int avic_get_next_vm_id(void)
-{
- int id;
-
- spin_lock(&avic_vm_id_lock);
-
- /* AVIC VM ID is one-based. */
- id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
- if (id <= AVIC_VM_ID_MASK)
- __set_bit(id, avic_vm_id_bitmap);
- else
- id = -EAGAIN;
-
- spin_unlock(&avic_vm_id_lock);
- return id;
-}
-
-static inline int avic_free_vm_id(int id)
-{
- if (id <= 0 || id > AVIC_VM_ID_MASK)
- return -EINVAL;
-
- spin_lock(&avic_vm_id_lock);
- __clear_bit(id, avic_vm_id_bitmap);
- spin_unlock(&avic_vm_id_lock);
- return 0;
-}
-
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm)
if (!avic)
return;
- avic_free_vm_id(vm_data->avic_vm_id);
-
if (vm_data->avic_logical_id_table_page)
__free_page(vm_data->avic_logical_id_table_page);
if (vm_data->avic_physical_id_table_page)
@@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm)
static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
- int vm_id, err = -ENOMEM;
+ int err = -ENOMEM;
struct kvm_arch *vm_data = &kvm->arch;
struct page *p_page;
struct page *l_page;
+ struct kvm_arch *ka;
+ u32 vm_id;
if (!avic)
return 0;
- vm_id = avic_get_next_vm_id();
- if (vm_id < 0)
- return vm_id;
- vm_data->avic_vm_id = (u32)vm_id;
-
/* Allocating physical APIC ID table (4KB) */
p_page = alloc_page(GFP_KERNEL);
if (!p_page)
@@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm)
clear_page(page_address(l_page));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+ again:
+ vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
+ if (vm_id == 0) { /* id is 1-based, zero is not okay */
+ next_vm_id_wrapped = 1;
+ goto again;
+ }
+ /* Is it still in use? Only possible if wrapped at least once */
+ if (next_vm_id_wrapped) {
+ hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+ struct kvm *k2 = container_of(ka, struct kvm, arch);
+ struct kvm_arch *vd2 = &k2->arch;
+ if (vd2->avic_vm_id == vm_id)
+ goto again;
+ }
+ }
+ vm_data->avic_vm_id = vm_id;
hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
@@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
}
init_vmcb(svm);
- kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+ kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
@@ -1647,7 +1660,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
- svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
+ svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
svm->asid_generation = 0;
init_vmcb(svm);
@@ -1675,7 +1688,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+ __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
@@ -1777,11 +1790,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
-static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
switch (reg) {
@@ -2335,7 +2343,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
- ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
offset_in_page(cr3) + index * 8, 8);
if (ret)
return 0;
@@ -2347,7 +2355,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.nested_cr3 = root;
+ svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
svm_flush_tlb(vcpu);
}
@@ -2389,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
- vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -2878,7 +2886,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
svm->nested.msrpm[p] = svm->msrpm[p] | value;
}
- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
+ svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
return true;
}
@@ -3152,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
+ /*
+ * If VGIF is enabled, the STGI intercept is only added to
+ * detect the opening of the NMI window; remove it now.
+ */
+ if (vgif_enabled(svm))
+ clr_intercept(svm, INTERCEPT_STGI);
+
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@@ -3749,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
static int pause_interception(struct vcpu_svm *svm)
{
- kvm_vcpu_on_spin(&(svm->vcpu));
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ bool in_kernel = (svm_get_cpl(vcpu) == 0);
+
+ kvm_vcpu_on_spin(vcpu, in_kernel);
return 1;
}
@@ -4233,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
- vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF);
-
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -4511,7 +4527,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
irq.vector);
*svm = to_svm(vcpu);
- vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+ vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
vcpu_info->vector = irq.vector;
return 0;
@@ -4562,7 +4578,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
struct amd_iommu_pi_data pi;
/* Try to enable guest_mode in IRTE */
- pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
+ AVIC_HPA_MASK);
pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
svm->vcpu.vcpu_id);
pi.is_guest_mode = true;
@@ -4686,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
* 1, because that's a separate STGI/VMRUN intercept. The next time we
* get that intercept, this function will be called again though and
- * we'll get the vintr intercept.
+ * we'll get the vintr intercept. However, if the vGIF feature is
+ * enabled, the STGI interception will not occur. Enable the irq
+ * window under the assumption that the hardware will set the GIF.
*/
- if (gif_set(svm) && nested_svm_intr(svm)) {
+ if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
@@ -4702,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
- if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ if (!gif_set(svm)) {
+ if (vgif_enabled(svm))
+ set_intercept(svm, INTERCEPT_STGI);
return; /* STGI will cause a vm exit */
+ }
if (svm->nested.exit_required)
return; /* we're not going to run the guest yet */
@@ -5011,7 +5033,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->save.cr3 = root;
+ svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
svm_flush_tlb(vcpu);
}
@@ -5020,7 +5042,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.nested_cr3 = root;
+ svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
/* Also sync guest cr3 here in case we live migrate */
@@ -5075,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_cpuid_entry2 *entry;
/* Update nrips enabled cache */
- svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
+ svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
if (!kvm_vcpu_apicv_active(vcpu))
return;
- entry = kvm_find_cpuid_entry(vcpu, 1, 0);
- if (entry)
- entry->ecx &= ~bit(X86_FEATURE_X2APIC);
+ guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
}
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -5413,8 +5432,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .get_pkru = svm_get_pkru,
-
.tlb_flush = svm_flush_tlb,
.run = svm_vcpu_run,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0a6cc6754ec5..8a202c49e2a0 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio,
*/
TRACE_EVENT(kvm_cpuid,
TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
- unsigned long rcx, unsigned long rdx),
- TP_ARGS(function, rax, rbx, rcx, rdx),
+ unsigned long rcx, unsigned long rdx, bool found),
+ TP_ARGS(function, rax, rbx, rcx, rdx, found),
TP_STRUCT__entry(
__field( unsigned int, function )
@@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid,
__field( unsigned long, rbx )
__field( unsigned long, rcx )
__field( unsigned long, rdx )
+ __field( bool, found )
),
TP_fast_assign(
@@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid,
__entry->rbx = rbx;
__entry->rcx = rcx;
__entry->rdx = rdx;
+ __entry->found = found;
),
- TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
+ TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s",
__entry->function, __entry->rax,
- __entry->rbx, __entry->rcx, __entry->rdx)
+ __entry->rbx, __entry->rcx, __entry->rdx,
+ __entry->found ? "found" : "not found")
);
#define AREG(x) { APIC_##x, "APIC_" #x }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b21b1223035..4253adef9044 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -243,11 +243,13 @@ struct __packed vmcs12 {
u64 virtual_apic_page_addr;
u64 apic_access_addr;
u64 posted_intr_desc_addr;
+ u64 vm_function_control;
u64 ept_pointer;
u64 eoi_exit_bitmap0;
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
+ u64 eptp_list_address;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
@@ -481,6 +483,7 @@ struct nested_vmx {
u64 nested_vmx_cr4_fixed0;
u64 nested_vmx_cr4_fixed1;
u64 nested_vmx_vmcs_enum;
+ u64 nested_vmx_vmfunc_controls;
};
#define POSTED_INTR_ON 0
@@ -573,6 +576,8 @@ struct vcpu_vmx {
#endif
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
+ u32 secondary_exec_control;
+
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -636,8 +641,6 @@ struct vcpu_vmx {
u64 current_tsc_ratio;
- bool guest_pkru_valid;
- u32 guest_pkru;
u32 host_pkru;
/*
@@ -763,11 +766,13 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
+ FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
FIELD64(EPT_POINTER, ept_pointer),
FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+ FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -891,25 +896,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->nested.cached_vmcs12;
}
-static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
-{
- struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
- if (is_error_page(page))
- return NULL;
-
- return page;
-}
-
-static void nested_release_page(struct page *page)
-{
- kvm_release_page_dirty(page);
-}
-
-static void nested_release_page_clean(struct page *page)
-{
- kvm_release_page_clean(page);
-}
-
static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
@@ -1214,6 +1200,16 @@ static inline bool cpu_has_vmx_ept_4levels(void)
return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
}
+static inline bool cpu_has_vmx_ept_mt_wb(void)
+{
+ return vmx_capability.ept & VMX_EPTP_WB_BIT;
+}
+
+static inline bool cpu_has_vmx_ept_5levels(void)
+{
+ return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT;
+}
+
static inline bool cpu_has_vmx_ept_ad_bits(void)
{
return vmx_capability.ept & VMX_EPT_AD_BIT;
@@ -1319,6 +1315,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
SECONDARY_EXEC_TSC_SCALING;
}
+static inline bool cpu_has_vmx_vmfunc(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -1359,8 +1361,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
{
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
- vmx_xsaves_supported();
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
}
static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
@@ -1393,6 +1394,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}
+static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
+}
+
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ VMX_VMFUNC_EPTP_SWITCHING);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2383,11 +2396,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
}
-static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
-{
- return to_vmx(vcpu)->guest_pkru;
-}
-
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -2457,15 +2465,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
*/
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned int nr = vcpu->arch.exception.nr;
if (nr == PF_VECTOR) {
if (vcpu->arch.exception.nested_apf) {
- nested_vmx_inject_exception_vmexit(vcpu,
- vcpu->arch.apf.nested_apf_token);
+ *exit_qual = vcpu->arch.apf.nested_apf_token;
return 1;
}
/*
@@ -2479,16 +2486,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
*/
if (nested_vmx_is_page_fault_vmexit(vmcs12,
vcpu->arch.exception.error_code)) {
- nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2);
+ *exit_qual = vcpu->arch.cr2;
return 1;
}
} else {
- unsigned long exit_qual = 0;
- if (nr == DB_VECTOR)
- exit_qual = vcpu->arch.dr6;
-
if (vmcs12->exception_bitmap & (1u << nr)) {
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ if (nr == DB_VECTOR)
+ *exit_qual = vcpu->arch.dr6;
+ else
+ *exit_qual = 0;
return 1;
}
}
@@ -2501,14 +2507,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
- bool reinject = vcpu->arch.exception.reinject;
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (!reinject && is_guest_mode(vcpu) &&
- nested_vmx_check_exception(vcpu))
- return;
-
if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
@@ -2607,7 +2608,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
index = __find_msr_index(vmx, MSR_TSC_AUX);
- if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu))
+ if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
move_msr_up(vmx, index, save_nmsrs++);
/*
* MSR_STAR is only needed on long mode guests, and only
@@ -2667,12 +2668,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
}
}
-static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
- return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31)));
-}
-
/*
* nested_vmx_allowed() checks whether a guest should be allowed to use VMX
* instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
@@ -2681,7 +2676,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
*/
static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
{
- return nested && guest_cpuid_has_vmx(vcpu);
+ return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
}
/*
@@ -2804,21 +2799,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
- /* secondary cpu-based controls */
+ /*
+ * secondary cpu-based controls. Do not include those that
+ * depend on CPUID bits, they are added later by vmx_cpuid_update.
+ */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
vmx->nested.nested_vmx_secondary_ctls_low,
vmx->nested.nested_vmx_secondary_ctls_high);
vmx->nested.nested_vmx_secondary_ctls_low = 0;
vmx->nested.nested_vmx_secondary_ctls_high &=
- SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_XSAVES;
+ SECONDARY_EXEC_WBINVD_EXITING;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
@@ -2841,6 +2836,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
+ if (cpu_has_vmx_vmfunc()) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ /*
+ * Advertise EPTP switching unconditionally
+ * since we emulate it
+ */
+ vmx->nested.nested_vmx_vmfunc_controls =
+ VMX_VMFUNC_EPTP_SWITCHING;
+ }
+
/*
* Old versions of KVM use the single-context version without
* checking for support, so declare that it is supported even
@@ -3210,6 +3216,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
+ case MSR_IA32_VMX_VMFUNC:
+ *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+ break;
default:
return 1;
}
@@ -3263,7 +3272,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
- (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -3287,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.ia32_xss;
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
return 1;
/* Otherwise falls through */
default:
@@ -3346,9 +3357,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
- (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
return 1;
- if (is_noncanonical_address(data & PAGE_MASK) ||
+ if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
@@ -3409,7 +3421,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
return 1;
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
@@ -3646,8 +3659,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
+ SECONDARY_EXEC_RDSEED |
+ SECONDARY_EXEC_RDRAND |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING;
+ SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_ENABLE_VMFUNC;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -4279,16 +4295,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmx->emulation_required = emulation_required(vcpu);
}
+static int get_ept_level(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
+ return 5;
+ return 4;
+}
+
static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
{
- u64 eptp;
+ u64 eptp = VMX_EPTP_MT_WB;
+
+ eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
- /* TODO write the value reading from MSR */
- eptp = VMX_EPT_DEFAULT_MT |
- VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
if (enable_ept_ad_bits &&
(!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
- eptp |= VMX_EPT_AD_ENABLE_BIT;
+ eptp |= VMX_EPTP_AD_ENABLE_BIT;
eptp |= (root_hpa & PAGE_MASK);
return eptp;
@@ -5250,10 +5272,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
-static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
+static bool vmx_rdrand_supported(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_RDRAND;
+}
+
+static bool vmx_rdseed_supported(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_RDSEED;
+}
+
+static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
{
+ struct kvm_vcpu *vcpu = &vmx->vcpu;
+
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
- if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu))
+ if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0)
exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
@@ -5267,7 +5303,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
if (!ple_gap)
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
- if (!kvm_vcpu_apicv_active(&vmx->vcpu))
+ if (!kvm_vcpu_apicv_active(vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -5281,7 +5317,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- return exec_control;
+ if (vmx_xsaves_supported()) {
+ /* Exposing XSAVES only when XSAVE is exposed */
+ bool xsaves_enabled =
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
+
+ if (!xsaves_enabled)
+ exec_control &= ~SECONDARY_EXEC_XSAVES;
+
+ if (nested) {
+ if (xsaves_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_XSAVES;
+ else
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_XSAVES;
+ }
+ }
+
+ if (vmx_rdtscp_supported()) {
+ bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
+ if (!rdtscp_enabled)
+ exec_control &= ~SECONDARY_EXEC_RDTSCP;
+
+ if (nested) {
+ if (rdtscp_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_RDTSCP;
+ else
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_RDTSCP;
+ }
+ }
+
+ if (vmx_invpcid_supported()) {
+ /* Exposing INVPCID only when PCID is exposed */
+ bool invpcid_enabled =
+ guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_PCID);
+
+ if (!invpcid_enabled) {
+ exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+ guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
+ }
+
+ if (nested) {
+ if (invpcid_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_INVPCID;
+ else
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_ENABLE_INVPCID;
+ }
+ }
+
+ if (vmx_rdrand_supported()) {
+ bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
+ if (rdrand_enabled)
+ exec_control &= ~SECONDARY_EXEC_RDRAND;
+
+ if (nested) {
+ if (rdrand_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_RDRAND;
+ else
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_RDRAND;
+ }
+ }
+
+ if (vmx_rdseed_supported()) {
+ bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
+ if (rdseed_enabled)
+ exec_control &= ~SECONDARY_EXEC_RDSEED;
+
+ if (nested) {
+ if (rdseed_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_RDSEED;
+ else
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_RDSEED;
+ }
+ }
+
+ vmx->secondary_exec_control = exec_control;
}
static void ept_set_mmio_spte_mask(void)
@@ -5325,8 +5446,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls()) {
+ vmx_compute_secondary_exec_control(vmx);
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
- vmx_secondary_exec_control(vmx));
+ vmx->secondary_exec_control);
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@@ -5364,6 +5486,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
#endif
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
@@ -5842,6 +5967,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
static int handle_triple_fault(struct kvm_vcpu *vcpu)
{
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->mmio_needed = 0;
return 0;
}
@@ -6337,7 +6463,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
gpa_t gpa;
- u32 error_code;
+ u64 error_code;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -6369,9 +6495,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
EPT_VIOLATION_EXECUTABLE))
? PFERR_PRESENT_MASK : 0;
- vcpu->arch.gpa_available = true;
- vcpu->arch.exit_qualification = exit_qualification;
+ error_code |= (exit_qualification & 0x100) != 0 ?
+ PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
+ vcpu->arch.exit_qualification = exit_qualification;
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}
@@ -6380,23 +6507,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
int ret;
gpa_t gpa;
+ /*
+ * A nested guest cannot optimize MMIO vmexits, because we have an
+ * nGPA here instead of the required GPA.
+ */
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+ if (!is_guest_mode(vcpu) &&
+ !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
return kvm_skip_emulated_instruction(vcpu);
}
- ret = handle_mmio_page_fault(vcpu, gpa, true);
- vcpu->arch.gpa_available = true;
- if (likely(ret == RET_MMIO_PF_EMULATE))
- return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
- EMULATE_DONE;
-
- if (unlikely(ret == RET_MMIO_PF_INVALID))
- return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
-
- if (unlikely(ret == RET_MMIO_PF_RETRY))
- return 1;
+ ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
+ if (ret >= 0)
+ return ret;
/* It is the real ept misconfig */
WARN_ON(1);
@@ -6563,7 +6687,7 @@ void vmx_enable_tdp(void)
enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
0ull, VMX_EPT_EXECUTABLE_MASK,
cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
- VMX_EPT_RWX_MASK);
+ VMX_EPT_RWX_MASK, 0ull);
ept_set_mmio_spte_mask();
kvm_enable_tdp();
@@ -6618,7 +6742,8 @@ static __init int hardware_setup(void)
init_vmcs_shadow_fields();
if (!cpu_has_vmx_ept() ||
- !cpu_has_vmx_ept_4levels()) {
+ !cpu_has_vmx_ept_4levels() ||
+ !cpu_has_vmx_ept_mt_wb()) {
enable_ept = 0;
enable_unrestricted_guest = 0;
enable_ept_ad_bits = 0;
@@ -6761,7 +6886,13 @@ static int handle_pause(struct kvm_vcpu *vcpu)
if (ple_gap)
grow_ple_window(vcpu);
- kvm_vcpu_on_spin(vcpu);
+ /*
+ * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
+ * VM-execution control is ignored if CPL > 0. OTOH, KVM
+ * never set PAUSE_EXITING and just set PLE if supported,
+ * so the vcpu must be CPL=0 if it gets a PAUSE exit.
+ */
+ kvm_vcpu_on_spin(vcpu, true);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -6776,6 +6907,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
return handle_nop(vcpu);
}
+static int handle_invalid_op(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
static int handle_monitor_trap(struct kvm_vcpu *vcpu)
{
return 1;
@@ -6992,7 +7129,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
* non-canonical form. This is the only check on the memory
* destination for long mode!
*/
- exn = is_noncanonical_address(*ret);
+ exn = is_noncanonical_address(*ret, vcpu);
} else if (is_protmode(vcpu)) {
/* Protected mode: apply checks for segment validity in the
* following order:
@@ -7156,19 +7293,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
+ page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
+ if (is_error_page(page)) {
nested_vmx_failInvalid(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
if (*(u32 *)kmap(page) != VMCS12_REVISION) {
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
nested_vmx_failInvalid(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
@@ -7249,16 +7386,16 @@ static void free_nested(struct vcpu_vmx *vmx)
kfree(vmx->nested.cached_vmcs12);
/* Unpin physical memory we referred to in current vmcs02 */
if (vmx->nested.apic_access_page) {
- nested_release_page(vmx->nested.apic_access_page);
+ kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
if (vmx->nested.virtual_apic_page) {
- nested_release_page(vmx->nested.virtual_apic_page);
+ kvm_release_page_dirty(vmx->nested.virtual_apic_page);
vmx->nested.virtual_apic_page = NULL;
}
if (vmx->nested.pi_desc_page) {
kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
+ kvm_release_page_dirty(vmx->nested.pi_desc_page);
vmx->nested.pi_desc_page = NULL;
vmx->nested.pi_desc = NULL;
}
@@ -7625,15 +7762,15 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
+ page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
+ if (is_error_page(page)) {
nested_vmx_failInvalid(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
new_vmcs12 = kmap(page);
if (new_vmcs12->revision_id != VMCS12_REVISION) {
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
return kvm_skip_emulated_instruction(vcpu);
@@ -7646,7 +7783,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
*/
memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
set_current_vmptr(vmx, vmptr);
}
@@ -7797,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
- if (is_noncanonical_address(operand.gla)) {
+ if (is_noncanonical_address(operand.gla, vcpu)) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
@@ -7854,6 +7991,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ /* Check for memory type validity */
+ switch (address & VMX_EPTP_MT_MASK) {
+ case VMX_EPTP_MT_UC:
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+ return false;
+ break;
+ case VMX_EPTP_MT_WB:
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ /* only 4 levels page-walk length are valid */
+ if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
+ return false;
+
+ /* Reserved bits should not be set */
+ if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+ return false;
+
+ /* AD, if set, should be supported */
+ if (address & VMX_EPTP_AD_ENABLE_BIT) {
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+ return false;
+ }
+
+ return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u64 address;
+ bool accessed_dirty;
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+ if (!nested_cpu_has_eptp_switching(vmcs12) ||
+ !nested_cpu_has_ept(vmcs12))
+ return 1;
+
+ if (index >= VMFUNC_EPTP_ENTRIES)
+ return 1;
+
+
+ if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
+ &address, index * 8, 8))
+ return 1;
+
+ accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
+
+ /*
+ * If the (L2) guest does a vmfunc to the currently
+ * active ept pointer, we don't have to do anything else
+ */
+ if (vmcs12->ept_pointer != address) {
+ if (!valid_ept_address(vcpu, address))
+ return 1;
+
+ kvm_mmu_unload(vcpu);
+ mmu->ept_ad = accessed_dirty;
+ mmu->base_role.ad_disabled = !accessed_dirty;
+ vmcs12->ept_pointer = address;
+ /*
+ * TODO: Check what's the correct approach in case
+ * mmu reload fails. Currently, we just let the next
+ * reload potentially fail
+ */
+ kvm_mmu_reload(vcpu);
+ }
+
+ return 0;
+}
+
+static int handle_vmfunc(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs12 *vmcs12;
+ u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+
+ /*
+ * VMFUNC is only supported for nested guests, but we always enable the
+ * secondary control for simplicity; for non-nested mode, fake that we
+ * didn't by injecting #UD.
+ */
+ if (!is_guest_mode(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ vmcs12 = get_vmcs12(vcpu);
+ if ((vmcs12->vm_function_control & (1 << function)) == 0)
+ goto fail;
+
+ switch (function) {
+ case 0:
+ if (nested_vmx_eptp_switching(vcpu, vmcs12))
+ goto fail;
+ break;
+ default:
+ goto fail;
+ }
+ return kvm_skip_emulated_instruction(vcpu);
+
+fail:
+ nested_vmx_vmexit(vcpu, vmx->exit_reason,
+ vmcs_read32(VM_EXIT_INTR_INFO),
+ vmcs_readl(EXIT_QUALIFICATION));
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7901,9 +8156,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept,
[EXIT_REASON_INVVPID] = handle_invvpid,
+ [EXIT_REASON_RDRAND] = handle_invalid_op,
+ [EXIT_REASON_RDSEED] = handle_invalid_op,
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
+ [EXIT_REASON_VMFUNC] = handle_vmfunc,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
@@ -8219,6 +8477,10 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
* table is L0's fault.
*/
return false;
+ case EXIT_REASON_INVPCID:
+ return
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
+ nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
@@ -8236,6 +8498,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
case EXIT_REASON_PML_FULL:
/* We emulate PML support to L1. */
return false;
+ case EXIT_REASON_VMFUNC:
+ /* VM functions are emulated through L2->L0 vmexits. */
+ return false;
default:
return true;
}
@@ -8494,7 +8759,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 vectoring_info = vmx->idt_vectoring_info;
trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
- vcpu->arch.gpa_available = false;
/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
@@ -8786,7 +9050,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
desc = (gate_desc *)vmx->host_idt_base + vector;
- entry = gate_offset(*desc);
+ entry = gate_offset(desc);
asm volatile(
#ifdef CONFIG_X86_64
"mov %%" _ASM_SP ", %[sp]\n\t"
@@ -9020,8 +9284,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
- if (vmx->guest_pkru_valid)
- __write_pkru(vmx->guest_pkru);
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
+ vcpu->arch.pkru != vmx->host_pkru)
+ __write_pkru(vcpu->arch.pkru);
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
@@ -9169,13 +9435,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
- if (boot_cpu_has(X86_FEATURE_OSPKE)) {
- vmx->guest_pkru = __read_pkru();
- if (vmx->guest_pkru != vmx->host_pkru) {
- vmx->guest_pkru_valid = true;
+ if (static_cpu_has(X86_FEATURE_PKU) &&
+ kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
+ vcpu->arch.pkru = __read_pkru();
+ if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
- } else
- vmx->guest_pkru_valid = false;
}
/*
@@ -9348,11 +9612,6 @@ static void __init vmx_check_processor_compat(void *rtn)
}
}
-static int get_ept_level(void)
-{
- return VMX_EPT_DEFAULT_GAW + 1;
-}
-
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
u8 cache;
@@ -9469,39 +9728,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx);
-
- if (vmx_rdtscp_supported()) {
- bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu);
- if (!rdtscp_enabled)
- secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP;
-
- if (nested) {
- if (rdtscp_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_RDTSCP;
- else
- vmx->nested.nested_vmx_secondary_ctls_high &=
- ~SECONDARY_EXEC_RDTSCP;
- }
- }
- /* Exposing INVPCID only when PCID is exposed */
- best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
- if (vmx_invpcid_supported() &&
- (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) ||
- !guest_cpuid_has_pcid(vcpu))) {
- secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID;
-
- if (best)
- best->ebx &= ~bit(X86_FEATURE_INVPCID);
+ if (cpu_has_secondary_exec_ctrls()) {
+ vmx_compute_secondary_exec_control(vmx);
+ vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
}
- if (cpu_has_secondary_exec_ctrls())
- vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
if (nested_vmx_allowed(vcpu))
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -9542,7 +9775,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
{
- return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT;
+ return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT;
}
/* Callbacks for nested_ept_init_mmu_context: */
@@ -9555,18 +9788,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
- bool wants_ad;
-
WARN_ON(mmu_is_nested(vcpu));
- wants_ad = nested_ept_ad_enabled(vcpu);
- if (wants_ad && !enable_ept_ad_bits)
+ if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
return 1;
kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.nested_vmx_ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
- wants_ad);
+ nested_ept_ad_enabled(vcpu));
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@ -9617,6 +9847,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct page *page;
u64 hpa;
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -9626,17 +9857,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
* physical address remains valid. We keep a reference
* to it so we can release it later.
*/
- if (vmx->nested.apic_access_page) /* shouldn't happen */
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page =
- nested_get_page(vcpu, vmcs12->apic_access_addr);
+ if (vmx->nested.apic_access_page) { /* shouldn't happen */
+ kvm_release_page_dirty(vmx->nested.apic_access_page);
+ vmx->nested.apic_access_page = NULL;
+ }
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
/*
* If translation failed, no matter: This feature asks
* to exit when accessing the given address, and if it
* can never be accessed, this feature won't do
* anything anyway.
*/
- if (vmx->nested.apic_access_page) {
+ if (!is_error_page(page)) {
+ vmx->nested.apic_access_page = page;
hpa = page_to_phys(vmx->nested.apic_access_page);
vmcs_write64(APIC_ACCESS_ADDR, hpa);
} else {
@@ -9651,10 +9884,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
- if (vmx->nested.virtual_apic_page) /* shouldn't happen */
- nested_release_page(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page =
- nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+ if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
+ kvm_release_page_dirty(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page = NULL;
+ }
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
/*
* If translation failed, VM entry will fail because
@@ -9669,7 +9903,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
* control. But such a configuration is useless, so
* let's keep the code simple.
*/
- if (vmx->nested.virtual_apic_page) {
+ if (!is_error_page(page)) {
+ vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
}
@@ -9678,16 +9913,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
if (nested_cpu_has_posted_intr(vmcs12)) {
if (vmx->nested.pi_desc_page) { /* shouldn't happen */
kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
+ kvm_release_page_dirty(vmx->nested.pi_desc_page);
+ vmx->nested.pi_desc_page = NULL;
}
- vmx->nested.pi_desc_page =
- nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
- vmx->nested.pi_desc =
- (struct pi_desc *)kmap(vmx->nested.pi_desc_page);
- if (!vmx->nested.pi_desc) {
- nested_release_page_clean(vmx->nested.pi_desc_page);
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
+ if (is_error_page(page))
return;
- }
+ vmx->nested.pi_desc_page = page;
+ vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
vmx->nested.pi_desc =
(struct pi_desc *)((void *)vmx->nested.pi_desc +
(unsigned long)(vmcs12->posted_intr_desc_addr &
@@ -9753,6 +9986,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+ return 0;
+
+ if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -9769,8 +10014,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
return false;
- page = nested_get_page(vcpu, vmcs12->msr_bitmap);
- if (!page)
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
+ if (is_error_page(page))
return false;
msr_bitmap_l1 = (unsigned long *)kmap(page);
@@ -9800,7 +10045,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
}
}
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
return true;
}
@@ -10194,13 +10439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
- exec_control = vmx_secondary_exec_control(vmx);
+ exec_control = vmx->secondary_exec_control;
/* Take the following fields only from vmcs12 */
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
@@ -10208,6 +10456,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
vmcs_write64(EOI_EXIT_BITMAP0,
vmcs12->eoi_exit_bitmap0);
@@ -10433,6 +10685,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -10460,6 +10715,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12)) {
+ if (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_cpu_has_eptp_switching(vmcs12)) {
+ if (!nested_cpu_has_ept(vmcs12) ||
+ !page_address_valid(vcpu, vmcs12->eptp_list_address))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
+
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -10706,7 +10973,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
u32 idt_vectoring;
unsigned int nr;
- if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
+ if (vcpu->arch.exception.injected) {
nr = vcpu->arch.exception.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
@@ -10745,12 +11012,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long exit_qual;
- if (vcpu->arch.exception.pending ||
- vcpu->arch.nmi_injected ||
- vcpu->arch.interrupt.pending)
+ if (kvm_event_needs_reinjection(vcpu))
return -EBUSY;
+ if (vcpu->arch.exception.pending &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ vcpu->arch.exception.pending = false;
+ return 0;
+ }
+
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
vmx->nested.preemption_timer_expired) {
if (vmx->nested.nested_run_pending)
@@ -11191,16 +11466,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
- nested_release_page(vmx->nested.apic_access_page);
+ kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
if (vmx->nested.virtual_apic_page) {
- nested_release_page(vmx->nested.virtual_apic_page);
+ kvm_release_page_dirty(vmx->nested.virtual_apic_page);
vmx->nested.virtual_apic_page = NULL;
}
if (vmx->nested.pi_desc_page) {
kunmap(vmx->nested.pi_desc_page);
- nested_release_page(vmx->nested.pi_desc_page);
+ kvm_release_page_dirty(vmx->nested.pi_desc_page);
vmx->nested.pi_desc_page = NULL;
vmx->nested.pi_desc = NULL;
}
@@ -11376,14 +11651,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
- page = nested_get_page(vcpu, vmcs12->pml_address);
- if (!page)
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
+ if (is_error_page(page))
return 0;
pml_address = kmap(page);
pml_address[vmcs12->guest_pml_index--] = gpa;
kunmap(page);
- nested_release_page_clean(page);
+ kvm_release_page_clean(page);
}
return 0;
@@ -11682,8 +11957,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
- .get_pkru = vmx_get_pkru,
-
.tlb_flush = vmx_flush_tlb,
.run = vmx_vcpu_run,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d734aa8c5b4f..6069af86da3b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/mem_encrypt.h>
#include <trace/events/kvm.h>
@@ -310,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
u64 new_state = msr_info->data &
(MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
- u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
- 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+ u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
+ (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
+ if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
+ return 1;
if (!msr_info->host_initiated &&
- ((msr_info->data & reserved_bits) != 0 ||
- new_state == X2APIC_ENABLE ||
- (new_state == MSR_IA32_APICBASE_ENABLE &&
+ ((new_state == MSR_IA32_APICBASE_ENABLE &&
old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
(new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
old_state == 0)))
@@ -389,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
- if (!vcpu->arch.exception.pending) {
+ if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
if (has_error && !is_protmode(vcpu))
has_error = false;
- vcpu->arch.exception.pending = true;
+ if (reinject) {
+ /*
+ * On vmentry, vcpu->arch.exception.pending is only
+ * true if an event injection was blocked by
+ * nested_run_pending. In that case, however,
+ * vcpu_enter_guest requests an immediate exit,
+ * and the guest shouldn't proceed far enough to
+ * need reinjection.
+ */
+ WARN_ON_ONCE(vcpu->arch.exception.pending);
+ vcpu->arch.exception.injected = true;
+ } else {
+ vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.injected = false;
+ }
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.nr = nr;
vcpu->arch.exception.error_code = error_code;
- vcpu->arch.exception.reinject = reinject;
return;
}
@@ -412,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
class2 = exception_class(nr);
if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
|| (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
- /* generate double fault per SDM Table 5-5 */
+ /*
+ * Generate double fault per SDM Table 5-5. Set
+ * exception.pending = true so that the double fault
+ * can trigger a nested vmexit.
+ */
vcpu->arch.exception.pending = true;
+ vcpu->arch.exception.injected = false;
vcpu->arch.exception.has_error_code = true;
vcpu->arch.exception.nr = DF_VECTOR;
vcpu->arch.exception.error_code = 0;
@@ -754,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (cr4 & CR4_RESERVED_BITS)
return 1;
- if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
return 1;
- if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
return 1;
- if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
return 1;
- if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
return 1;
- if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
+ return 1;
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
return 1;
if (is_long_mode(vcpu)) {
@@ -779,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
- if (!guest_cpuid_has_pcid(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
return 1;
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
@@ -813,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
return 0;
}
- if (is_long_mode(vcpu)) {
- if (cr3 & CR3_L_MODE_RESERVED_BITS)
- return 1;
- } else if (is_pae(vcpu) && is_paging(vcpu) &&
+ if (is_long_mode(vcpu) &&
+ (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+ return 1;
+ else if (is_pae(vcpu) && is_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
@@ -883,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
u64 fixed = DR6_FIXED_1;
- if (!guest_cpuid_has_rtm(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
fixed |= DR6_RTM;
return fixed;
}
@@ -993,6 +1015,7 @@ static u32 emulated_msrs[] = {
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+ HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_RESET,
@@ -1021,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
if (efer & efer_reserved_bits)
return false;
- if (efer & EFER_FFXSR) {
- struct kvm_cpuid_entry2 *feat;
-
- feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
+ if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
return false;
- }
-
- if (efer & EFER_SVME) {
- struct kvm_cpuid_entry2 *feat;
- feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
+ if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
return false;
- }
return true;
}
@@ -1083,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
- if (is_noncanonical_address(msr->data))
+ if (is_noncanonical_address(msr->data, vcpu))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
@@ -1100,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- msr->data = get_canonical(msr->data);
+ msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
}
return kvm_x86_ops->set_msr(vcpu, msr);
}
@@ -1533,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
- if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
+ if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
update_ia32_tsc_adjust_msr(vcpu, offset);
+
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
@@ -2184,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
case MSR_IA32_TSC_ADJUST:
- if (guest_cpuid_has_tsc_adjust(vcpu)) {
+ if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
if (!msr_info->host_initiated) {
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
adjust_tsc_offset_guest(vcpu, adj);
@@ -2306,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
- if (!guest_cpuid_has_osvw(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.length = data;
break;
case MSR_AMD64_OSVW_STATUS:
- if (!guest_cpuid_has_osvw(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.status = data;
break;
@@ -2536,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0xbe702111;
break;
case MSR_AMD64_OSVW_ID_LENGTH:
- if (!guest_cpuid_has_osvw(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.length;
break;
case MSR_AMD64_OSVW_STATUS:
- if (!guest_cpuid_has_osvw(vcpu))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.status;
break;
@@ -2881,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
int idx;
+
+ if (vcpu->preempted)
+ vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
+
/*
* Disable page faults because we're in atomic context here.
* kvm_write_guest_offset_cached() would call might_fault()
@@ -3073,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
process_nmi(vcpu);
+ /*
+ * FIXME: pass injected and pending separately. This is only
+ * needed for nested virtualization, whose state cannot be
+ * migrated yet. For now we can combine them.
+ */
events->exception.injected =
- vcpu->arch.exception.pending &&
+ (vcpu->arch.exception.pending ||
+ vcpu->arch.exception.injected) &&
!kvm_exception_is_soft(vcpu->arch.exception.nr);
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
@@ -3129,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
process_nmi(vcpu);
+ vcpu->arch.exception.injected = false;
vcpu->arch.exception.pending = events->exception.injected;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -3245,7 +3270,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
- memcpy(dest + offset, src, size);
+ if (feature == XFEATURE_MASK_PKRU)
+ memcpy(dest + offset, &vcpu->arch.pkru,
+ sizeof(vcpu->arch.pkru));
+ else
+ memcpy(dest + offset, src, size);
+
}
valid -= feature;
@@ -3283,7 +3313,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, index,
&size, &offset, &ecx, &edx);
- memcpy(dest, src + offset, size);
+ if (feature == XFEATURE_MASK_PKRU)
+ memcpy(&vcpu->arch.pkru, src + offset,
+ sizeof(vcpu->arch.pkru));
+ else
+ memcpy(dest, src + offset, size);
}
valid -= feature;
@@ -4661,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
*/
if (vcpu->arch.gpa_available &&
emulator_can_use_gpa(ctxt) &&
- vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
- (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
- gpa = exception->address;
- goto mmio;
+ (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
+ gpa = vcpu->arch.gpa_val;
+ ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
+ } else {
+ ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
+ if (ret < 0)
+ return X86EMUL_PROPAGATE_FAULT;
}
- ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
-
- if (ret < 0)
- return X86EMUL_PROPAGATE_FAULT;
-
- /* For APIC access vmexit */
- if (ret)
- goto mmio;
-
- if (ops->read_write_emulate(vcpu, gpa, val, bytes))
+ if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
return X86EMUL_CONTINUE;
-mmio:
/*
* Is this MMIO handled locally?
*/
@@ -5217,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
}
-static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
{
- kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
+ return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
}
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
@@ -6116,7 +6143,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0,
- PT_PRESENT_MASK, 0);
+ PT_PRESENT_MASK, 0, sme_me_mask);
kvm_timer_init();
perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -6352,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
int r;
/* try to reinject previous events if any */
+ if (vcpu->arch.exception.injected) {
+ kvm_x86_ops->queue_exception(vcpu);
+ return 0;
+ }
+
+ /*
+ * Exceptions must be injected immediately, or the exception
+ * frame will have the address of the NMI or interrupt handler.
+ */
+ if (!vcpu->arch.exception.pending) {
+ if (vcpu->arch.nmi_injected) {
+ kvm_x86_ops->set_nmi(vcpu);
+ return 0;
+ }
+
+ if (vcpu->arch.interrupt.pending) {
+ kvm_x86_ops->set_irq(vcpu);
+ return 0;
+ }
+ }
+
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (r != 0)
+ return r;
+ }
+
+ /* try to inject new event if pending */
if (vcpu->arch.exception.pending) {
trace_kvm_inj_exception(vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.injected = true;
+
if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
@@ -6368,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
kvm_x86_ops->queue_exception(vcpu);
- return 0;
- }
-
- if (vcpu->arch.nmi_injected) {
- kvm_x86_ops->set_nmi(vcpu);
- return 0;
- }
-
- if (vcpu->arch.interrupt.pending) {
- kvm_x86_ops->set_irq(vcpu);
- return 0;
- }
-
- if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
- r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
- if (r != 0)
- return r;
- }
-
- /* try to inject new event if pending */
- if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
+ } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
vcpu->arch.smi_pending = false;
enter_smm(vcpu);
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -6605,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
vcpu->arch.hflags |= HF_SMM_MASK;
memset(buf, 0, 512);
- if (guest_cpuid_has_longmode(vcpu))
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
enter_smm_save_state_32(vcpu, buf);
@@ -6657,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
- if (guest_cpuid_has_longmode(vcpu))
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
kvm_update_cpuid(vcpu);
@@ -6725,17 +6763,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
-void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
- unsigned long address)
-{
- /*
- * The physical address of apic access page is stored in the VMCS.
- * Update it when it becomes invalid.
- */
- if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
- kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
-}
-
/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -6775,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->mmio_needed = 0;
r = 0;
goto out;
}
@@ -6863,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
+ WARN_ON(vcpu->arch.exception.pending);
}
if (kvm_lapic_enabled(vcpu)) {
@@ -7005,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
+ vcpu->arch.gpa_available = false;
r = kvm_x86_ops->handle_exit(vcpu);
return r;
@@ -7423,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int pending_vec, max_bits, idx;
struct desc_ptr dt;
- if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ (sregs->cr4 & X86_CR4_OSXSAVE))
+ return -EINVAL;
+
+ apic_base_msr.data = sregs->apic_base;
+ apic_base_msr.host_initiated = true;
+ if (kvm_set_apic_base(vcpu, &apic_base_msr))
return -EINVAL;
dt.size = sregs->idt.limit;
@@ -7442,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
- apic_base_msr.data = sregs->apic_base;
- apic_base_msr.host_initiated = true;
- kvm_set_apic_base(vcpu, &apic_base_msr);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -7633,7 +7666,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/
vcpu->guest_fpu_loaded = 1;
__kernel_fpu_begin();
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
+ /* PKRU is separately restored in kvm_x86_ops->run. */
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+ ~XFEATURE_MASK_PKRU);
trace_kvm_fpu(1);
}
@@ -7733,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.nmi_injected = false;
kvm_clear_interrupt_queue(vcpu);
kvm_clear_exception_queue(vcpu);
+ vcpu->arch.exception.pending = false;
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
@@ -7992,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_pmu_init(vcpu);
vcpu->arch.pending_external_vector = -1;
+ vcpu->arch.preempted_in_kernel = false;
kvm_hv_vcpu_init(vcpu);
@@ -8439,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.preempted_in_kernel;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 612067074905..51e349cf5f45 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -11,7 +11,7 @@
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
- vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.injected = false;
}
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
@@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
+ return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}
@@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
return cs_l;
}
+static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+ return (vcpu->arch.efer & EFER_LMA) &&
+ kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+#else
+ return 0;
+#endif
+}
+
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
{
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
@@ -87,10 +97,48 @@ static inline u32 bit(int bitno)
return 1 << (bitno & 31);
}
+static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
+{
+ return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+}
+
+static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
+{
+ return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
+}
+
+static inline u64 get_canonical(u64 la, u8 vaddr_bits)
+{
+ return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+ return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la;
+#else
+ return false;
+#endif
+}
+
+static inline bool emul_is_noncanonical_address(u64 la,
+ struct x86_emulate_ctxt *ctxt)
+{
+#ifdef CONFIG_X86_64
+ return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
+#else
+ return false;
+#endif
+}
+
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
gva_t gva, gfn_t gfn, unsigned access)
{
- vcpu->arch.mmio_gva = gva & PAGE_MASK;
+ /*
+ * If this is a shadow nested page table, the "GVA" is
+ * actually a nGPA.
+ */
+ vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK;
vcpu->arch.access = access;
vcpu->arch.mmio_gfn = gfn;
vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
deleted file mode 100644
index 08f41caada45..000000000000
--- a/arch/x86/lguest/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-config LGUEST_GUEST
- bool "Lguest guest support"
- depends on X86_32 && PARAVIRT && PCI
- select TTY
- select VIRTUALIZATION
- select VIRTIO
- select VIRTIO_CONSOLE
- help
- Lguest is a tiny in-kernel hypervisor. Selecting this will
- allow your kernel to boot under lguest. This option will increase
- your kernel size by about 10k. If in doubt, say N.
-
- If you say Y here, make sure you say Y (or M) to the virtio block
- and net drivers which lguest needs.
diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile
deleted file mode 100644
index 8f38d577a2fa..000000000000
--- a/arch/x86/lguest/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-y := head_32.o boot.o
-CFLAGS_boot.o := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
deleted file mode 100644
index 99472698c931..000000000000
--- a/arch/x86/lguest/boot.c
+++ /dev/null
@@ -1,1558 +0,0 @@
-/*P:010
- * A hypervisor allows multiple Operating Systems to run on a single machine.
- * To quote David Wheeler: "Any problem in computer science can be solved with
- * another layer of indirection."
- *
- * We keep things simple in two ways. First, we start with a normal Linux
- * kernel and insert a module (lg.ko) which allows us to run other Linux
- * kernels the same way we'd run processes. We call the first kernel the Host,
- * and the others the Guests. The program which sets up and configures Guests
- * (such as the example in tools/lguest/lguest.c) is called the Launcher.
- *
- * Secondly, we only run specially modified Guests, not normal kernels: setting
- * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
- * how to be a Guest at boot time. This means that you can use the same kernel
- * you boot normally (ie. as a Host) as a Guest.
- *
- * These Guests know that they cannot do privileged operations, such as disable
- * interrupts, and that they have to ask the Host to do such things explicitly.
- * This file consists of all the replacements for such low-level native
- * hardware operations: these special Guest versions call the Host.
- *
- * So how does the kernel know it's a Guest? We'll see that later, but let's
- * just say that we end up here where we replace the native functions various
- * "paravirt" structures with our Guest versions, then boot like normal.
-:*/
-
-/*
- * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/kernel.h>
-#include <linux/start_kernel.h>
-#include <linux/string.h>
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/lguest.h>
-#include <linux/lguest_launcher.h>
-#include <linux/virtio_console.h>
-#include <linux/pm.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/virtio_pci.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-#include <asm/lguest.h>
-#include <asm/paravirt.h>
-#include <asm/param.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/setup.h>
-#include <asm/e820/api.h>
-#include <asm/mce.h>
-#include <asm/io.h>
-#include <asm/fpu/api.h>
-#include <asm/stackprotector.h>
-#include <asm/reboot.h> /* for struct machine_ops */
-#include <asm/kvm_para.h>
-#include <asm/pci_x86.h>
-#include <asm/pci-direct.h>
-
-/*G:010
- * Welcome to the Guest!
- *
- * The Guest in our tale is a simple creature: identical to the Host but
- * behaving in simplified but equivalent ways. In particular, the Guest is the
- * same kernel as the Host (or at least, built from the same source code).
-:*/
-
-struct lguest_data lguest_data = {
- .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
- .noirq_iret = (u32)lguest_noirq_iret,
- .kernel_address = PAGE_OFFSET,
- .blocked_interrupts = { 1 }, /* Block timer interrupts */
- .syscall_vec = IA32_SYSCALL_VECTOR,
-};
-
-/*G:037
- * async_hcall() is pretty simple: I'm quite proud of it really. We have a
- * ring buffer of stored hypercalls which the Host will run though next time we
- * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall
- * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
- * and 255 once the Host has finished with it.
- *
- * If we come around to a slot which hasn't been finished, then the table is
- * full and we just make the hypercall directly. This has the nice side
- * effect of causing the Host to run all the stored calls in the ring buffer
- * which empties it for next time!
- */
-static void async_hcall(unsigned long call, unsigned long arg1,
- unsigned long arg2, unsigned long arg3,
- unsigned long arg4)
-{
- /* Note: This code assumes we're uniprocessor. */
- static unsigned int next_call;
- unsigned long flags;
-
- /*
- * Disable interrupts if not already disabled: we don't want an
- * interrupt handler making a hypercall while we're already doing
- * one!
- */
- local_irq_save(flags);
- if (lguest_data.hcall_status[next_call] != 0xFF) {
- /* Table full, so do normal hcall which will flush table. */
- hcall(call, arg1, arg2, arg3, arg4);
- } else {
- lguest_data.hcalls[next_call].arg0 = call;
- lguest_data.hcalls[next_call].arg1 = arg1;
- lguest_data.hcalls[next_call].arg2 = arg2;
- lguest_data.hcalls[next_call].arg3 = arg3;
- lguest_data.hcalls[next_call].arg4 = arg4;
- /* Arguments must all be written before we mark it to go */
- wmb();
- lguest_data.hcall_status[next_call] = 0;
- if (++next_call == LHCALL_RING_SIZE)
- next_call = 0;
- }
- local_irq_restore(flags);
-}
-
-/*G:035
- * Notice the lazy_hcall() above, rather than hcall(). This is our first real
- * optimization trick!
- *
- * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
- * them as a batch when lazy_mode is eventually turned off. Because hypercalls
- * are reasonably expensive, batching them up makes sense. For example, a
- * large munmap might update dozens of page table entries: that code calls
- * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
- * lguest_leave_lazy_mode().
- *
- * So, when we're in lazy mode, we call async_hcall() to store the call for
- * future processing:
- */
-static void lazy_hcall1(unsigned long call, unsigned long arg1)
-{
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- hcall(call, arg1, 0, 0, 0);
- else
- async_hcall(call, arg1, 0, 0, 0);
-}
-
-/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
-static void lazy_hcall2(unsigned long call,
- unsigned long arg1,
- unsigned long arg2)
-{
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- hcall(call, arg1, arg2, 0, 0);
- else
- async_hcall(call, arg1, arg2, 0, 0);
-}
-
-static void lazy_hcall3(unsigned long call,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3)
-{
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- hcall(call, arg1, arg2, arg3, 0);
- else
- async_hcall(call, arg1, arg2, arg3, 0);
-}
-
-#ifdef CONFIG_X86_PAE
-static void lazy_hcall4(unsigned long call,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3,
- unsigned long arg4)
-{
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
- hcall(call, arg1, arg2, arg3, arg4);
- else
- async_hcall(call, arg1, arg2, arg3, arg4);
-}
-#endif
-
-/*G:036
- * When lazy mode is turned off, we issue the do-nothing hypercall to
- * flush any stored calls, and call the generic helper to reset the
- * per-cpu lazy mode variable.
- */
-static void lguest_leave_lazy_mmu_mode(void)
-{
- hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
- paravirt_leave_lazy_mmu();
-}
-
-/*
- * We also catch the end of context switch; we enter lazy mode for much of
- * that too, so again we need to flush here.
- *
- * (Technically, this is lazy CPU mode, and normally we're in lazy MMU
- * mode, but unlike Xen, lguest doesn't care about the difference).
- */
-static void lguest_end_context_switch(struct task_struct *next)
-{
- hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
- paravirt_end_context_switch(next);
-}
-
-/*G:032
- * After that diversion we return to our first native-instruction
- * replacements: four functions for interrupt control.
- *
- * The simplest way of implementing these would be to have "turn interrupts
- * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow:
- * these are by far the most commonly called functions of those we override.
- *
- * So instead we keep an "irq_enabled" field inside our "struct lguest_data",
- * which the Guest can update with a single instruction. The Host knows to
- * check there before it tries to deliver an interrupt.
- */
-
-/*
- * save_flags() is expected to return the processor state (ie. "flags"). The
- * flags word contains all kind of stuff, but in practice Linux only cares
- * about the interrupt flag. Our "save_flags()" just returns that.
- */
-asmlinkage __visible unsigned long lguest_save_fl(void)
-{
- return lguest_data.irq_enabled;
-}
-
-/* Interrupts go off... */
-asmlinkage __visible void lguest_irq_disable(void)
-{
- lguest_data.irq_enabled = 0;
-}
-
-/*
- * Let's pause a moment. Remember how I said these are called so often?
- * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
- * break some rules. In particular, these functions are assumed to save their
- * own registers if they need to: normal C functions assume they can trash the
- * eax register. To use normal C functions, we use
- * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
- * C function, then restores it.
- */
-PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
-PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
-/*:*/
-
-/* These are in head_32.S */
-extern void lg_irq_enable(void);
-extern void lg_restore_fl(unsigned long flags);
-
-/*M:003
- * We could be more efficient in our checking of outstanding interrupts, rather
- * than using a branch. One way would be to put the "irq_enabled" field in a
- * page by itself, and have the Host write-protect it when an interrupt comes
- * in when irqs are disabled. There will then be a page fault as soon as
- * interrupts are re-enabled.
- *
- * A better method is to implement soft interrupt disable generally for x86:
- * instead of disabling interrupts, we set a flag. If an interrupt does come
- * in, we then disable them for real. This is uncommon, so we could simply use
- * a hypercall for interrupt control and not worry about efficiency.
-:*/
-
-/*G:034
- * The Interrupt Descriptor Table (IDT).
- *
- * The IDT tells the processor what to do when an interrupt comes in. Each
- * entry in the table is a 64-bit descriptor: this holds the privilege level,
- * address of the handler, and... well, who cares? The Guest just asks the
- * Host to make the change anyway, because the Host controls the real IDT.
- */
-static void lguest_write_idt_entry(gate_desc *dt,
- int entrynum, const gate_desc *g)
-{
- /*
- * The gate_desc structure is 8 bytes long: we hand it to the Host in
- * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors
- * around like this; typesafety wasn't a big concern in Linux's early
- * years.
- */
- u32 *desc = (u32 *)g;
- /* Keep the local copy up to date. */
- native_write_idt_entry(dt, entrynum, g);
- /* Tell Host about this new entry. */
- hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
-}
-
-/*
- * Changing to a different IDT is very rare: we keep the IDT up-to-date every
- * time it is written, so we can simply loop through all entries and tell the
- * Host about them.
- */
-static void lguest_load_idt(const struct desc_ptr *desc)
-{
- unsigned int i;
- struct desc_struct *idt = (void *)desc->address;
-
- for (i = 0; i < (desc->size+1)/8; i++)
- hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
-}
-
-/*
- * The Global Descriptor Table.
- *
- * The Intel architecture defines another table, called the Global Descriptor
- * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt"
- * instruction, and then several other instructions refer to entries in the
- * table. There are three entries which the Switcher needs, so the Host simply
- * controls the entire thing and the Guest asks it to make changes using the
- * LOAD_GDT hypercall.
- *
- * This is the exactly like the IDT code.
- */
-static void lguest_load_gdt(const struct desc_ptr *desc)
-{
- unsigned int i;
- struct desc_struct *gdt = (void *)desc->address;
-
- for (i = 0; i < (desc->size+1)/8; i++)
- hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
-}
-
-/*
- * For a single GDT entry which changes, we simply change our copy and
- * then tell the host about it.
- */
-static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
- const void *desc, int type)
-{
- native_write_gdt_entry(dt, entrynum, desc, type);
- /* Tell Host about this new entry. */
- hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
- dt[entrynum].a, dt[entrynum].b, 0);
-}
-
-/*
- * There are three "thread local storage" GDT entries which change
- * on every context switch (these three entries are how glibc implements
- * __thread variables). As an optimization, we have a hypercall
- * specifically for this case.
- *
- * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall
- * which took a range of entries?
- */
-static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- /*
- * There's one problem which normal hardware doesn't have: the Host
- * can't handle us removing entries we're currently using. So we clear
- * the GS register here: if it's needed it'll be reloaded anyway.
- */
- lazy_load_gs(0);
- lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu);
-}
-
-/*G:038
- * That's enough excitement for now, back to ploughing through each of the
- * different pv_ops structures (we're about 1/3 of the way through).
- *
- * This is the Local Descriptor Table, another weird Intel thingy. Linux only
- * uses this for some strange applications like Wine. We don't do anything
- * here, so they'll get an informative and friendly Segmentation Fault.
- */
-static void lguest_set_ldt(const void *addr, unsigned entries)
-{
-}
-
-/*
- * This loads a GDT entry into the "Task Register": that entry points to a
- * structure called the Task State Segment. Some comments scattered though the
- * kernel code indicate that this used for task switching in ages past, along
- * with blood sacrifice and astrology.
- *
- * Now there's nothing interesting in here that we don't get told elsewhere.
- * But the native version uses the "ltr" instruction, which makes the Host
- * complain to the Guest about a Segmentation Fault and it'll oops. So we
- * override the native version with a do-nothing version.
- */
-static void lguest_load_tr_desc(void)
-{
-}
-
-/*
- * The "cpuid" instruction is a way of querying both the CPU identity
- * (manufacturer, model, etc) and its features. It was introduced before the
- * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
- * As you might imagine, after a decade and a half this treatment, it is now a
- * giant ball of hair. Its entry in the current Intel manual runs to 28 pages.
- *
- * This instruction even it has its own Wikipedia entry. The Wikipedia entry
- * has been translated into 6 languages. I am not making this up!
- *
- * We could get funky here and identify ourselves as "GenuineLguest", but
- * instead we just use the real "cpuid" instruction. Then I pretty much turned
- * off feature bits until the Guest booted. (Don't say that: you'll damage
- * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is
- * hardly future proof.) No one's listening! They don't like you anyway,
- * parenthetic weirdo!
- *
- * Replacing the cpuid so we can turn features off is great for the kernel, but
- * anyone (including userspace) can just use the raw "cpuid" instruction and
- * the Host won't even notice since it isn't privileged. So we try not to get
- * too worked up about it.
- */
-static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
- unsigned int *cx, unsigned int *dx)
-{
- int function = *ax;
-
- native_cpuid(ax, bx, cx, dx);
- switch (function) {
- /*
- * CPUID 0 gives the highest legal CPUID number (and the ID string).
- * We futureproof our code a little by sticking to known CPUID values.
- */
- case 0:
- if (*ax > 5)
- *ax = 5;
- break;
-
- /*
- * CPUID 1 is a basic feature request.
- *
- * CX: we only allow kernel to see SSE3, CMPXCHG16B and SSSE3
- * DX: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU and PAE.
- */
- case 1:
- *cx &= 0x00002201;
- *dx &= 0x07808151;
- /*
- * The Host can do a nice optimization if it knows that the
- * kernel mappings (addresses above 0xC0000000 or whatever
- * PAGE_OFFSET is set to) haven't changed. But Linux calls
- * flush_tlb_user() for both user and kernel mappings unless
- * the Page Global Enable (PGE) feature bit is set.
- */
- *dx |= 0x00002000;
- /*
- * We also lie, and say we're family id 5. 6 or greater
- * leads to a rdmsr in early_init_intel which we can't handle.
- * Family ID is returned as bits 8-12 in ax.
- */
- *ax &= 0xFFFFF0FF;
- *ax |= 0x00000500;
- break;
-
- /*
- * This is used to detect if we're running under KVM. We might be,
- * but that's a Host matter, not us. So say we're not.
- */
- case KVM_CPUID_SIGNATURE:
- *bx = *cx = *dx = 0;
- break;
-
- /*
- * 0x80000000 returns the highest Extended Function, so we futureproof
- * like we do above by limiting it to known fields.
- */
- case 0x80000000:
- if (*ax > 0x80000008)
- *ax = 0x80000008;
- break;
-
- /*
- * PAE systems can mark pages as non-executable. Linux calls this the
- * NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced
- * Virus Protection). We just switch it off here, since we don't
- * support it.
- */
- case 0x80000001:
- *dx &= ~(1 << 20);
- break;
- }
-}
-
-/*
- * Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
- * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
- * it. The Host needs to know when the Guest wants to change them, so we have
- * a whole series of functions like read_cr0() and write_cr0().
- *
- * We start with cr0. cr0 allows you to turn on and off all kinds of basic
- * features, but the only cr0 bit that Linux ever used at runtime was the
- * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8)
- *
- * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if
- * the floating point unit is used. Which allows us to restore FPU state
- * lazily after a task switch if we wanted to, but wouldn't a name like
- * "FPUTRAP bit" be a little less cryptic?
- *
- * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore
- * cr0.
- */
-static void lguest_write_cr0(unsigned long val)
-{
-}
-
-static unsigned long lguest_read_cr0(void)
-{
- return 0;
-}
-
-/*
- * cr2 is the virtual address of the last page fault, which the Guest only ever
- * reads. The Host kindly writes this into our "struct lguest_data", so we
- * just read it out of there.
- */
-static unsigned long lguest_read_cr2(void)
-{
- return lguest_data.cr2;
-}
-
-/* See lguest_set_pte() below. */
-static bool cr3_changed = false;
-static unsigned long current_cr3;
-
-/*
- * cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0. Keep a local copy, and tell the Host when it changes.
- */
-static void lguest_write_cr3(unsigned long cr3)
-{
- lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
- current_cr3 = cr3;
-
- /* These two page tables are simple, linear, and used during boot */
- if (cr3 != __pa_symbol(swapper_pg_dir) &&
- cr3 != __pa_symbol(initial_page_table))
- cr3_changed = true;
-}
-
-static unsigned long lguest_read_cr3(void)
-{
- return current_cr3;
-}
-
-/* cr4 is used to enable and disable PGE, but we don't care. */
-static unsigned long lguest_read_cr4(void)
-{
- return 0;
-}
-
-static void lguest_write_cr4(unsigned long val)
-{
-}
-
-/*
- * Page Table Handling.
- *
- * Now would be a good time to take a rest and grab a coffee or similarly
- * relaxing stimulant. The easy parts are behind us, and the trek gradually
- * winds uphill from here.
- *
- * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU
- * maps virtual addresses to physical addresses using "page tables". We could
- * use one huge index of 1 million entries: each address is 4 bytes, so that's
- * 1024 pages just to hold the page tables. But since most virtual addresses
- * are unused, we use a two level index which saves space. The cr3 register
- * contains the physical address of the top level "page directory" page, which
- * contains physical addresses of up to 1024 second-level pages. Each of these
- * second level pages contains up to 1024 physical addresses of actual pages,
- * or Page Table Entries (PTEs).
- *
- * Here's a diagram, where arrows indicate physical addresses:
- *
- * cr3 ---> +---------+
- * | --------->+---------+
- * | | | PADDR1 |
- * Mid-level | | PADDR2 |
- * (PMD) page | | |
- * | | Lower-level |
- * | | (PTE) page |
- * | | | |
- * .... ....
- *
- * So to convert a virtual address to a physical address, we look up the top
- * level, which points us to the second level, which gives us the physical
- * address of that page. If the top level entry was not present, or the second
- * level entry was not present, then the virtual address is invalid (we
- * say "the page was not mapped").
- *
- * Put another way, a 32-bit virtual address is divided up like so:
- *
- * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>|
- * Index into top Index into second Offset within page
- * page directory page pagetable page
- *
- * Now, unfortunately, this isn't the whole story: Intel added Physical Address
- * Extension (PAE) to allow 32 bit systems to use 64GB of memory (ie. 36 bits).
- * These are held in 64-bit page table entries, so we can now only fit 512
- * entries in a page, and the neat three-level tree breaks down.
- *
- * The result is a four level page table:
- *
- * cr3 --> [ 4 Upper ]
- * [ Level ]
- * [ Entries ]
- * [(PUD Page)]---> +---------+
- * | --------->+---------+
- * | | | PADDR1 |
- * Mid-level | | PADDR2 |
- * (PMD) page | | |
- * | | Lower-level |
- * | | (PTE) page |
- * | | | |
- * .... ....
- *
- *
- * And the virtual address is decoded as:
- *
- * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
- * |<-2->|<--- 9 bits ---->|<---- 9 bits --->|<------ 12 bits ------>|
- * Index into Index into mid Index into lower Offset within page
- * top entries directory page pagetable page
- *
- * It's too hard to switch between these two formats at runtime, so Linux only
- * supports one or the other depending on whether CONFIG_X86_PAE is set. Many
- * distributions turn it on, and not just for people with silly amounts of
- * memory: the larger PTE entries allow room for the NX bit, which lets the
- * kernel disable execution of pages and increase security.
- *
- * This was a problem for lguest, which couldn't run on these distributions;
- * then Matias Zabaljauregui figured it all out and implemented it, and only a
- * handful of puppies were crushed in the process!
- *
- * Back to our point: the kernel spends a lot of time changing both the
- * top-level page directory and lower-level pagetable pages. The Guest doesn't
- * know physical addresses, so while it maintains these page tables exactly
- * like normal, it also needs to keep the Host informed whenever it makes a
- * change: the Host will create the real page tables based on the Guests'.
- */
-
-/*
- * The Guest calls this after it has set a second-level entry (pte), ie. to map
- * a page into a process' address space. We tell the Host the toplevel and
- * address this corresponds to. The Guest uses one pagetable per process, so
- * we need to tell the Host which one we're changing (mm->pgd).
- */
-static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
-#ifdef CONFIG_X86_PAE
- /* PAE needs to hand a 64 bit page table entry, so it uses two args. */
- lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr,
- ptep->pte_low, ptep->pte_high);
-#else
- lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
-#endif
-}
-
-/* This is the "set and update" combo-meal-deal version. */
-static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
-{
- native_set_pte(ptep, pteval);
- lguest_pte_update(mm, addr, ptep);
-}
-
-/*
- * The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
- * to set a middle-level entry when PAE is activated.
- *
- * Again, we set the entry then tell the Host which page we changed,
- * and the index of the entry we changed.
- */
-#ifdef CONFIG_X86_PAE
-static void lguest_set_pud(pud_t *pudp, pud_t pudval)
-{
- native_set_pud(pudp, pudval);
-
- /* 32 bytes aligned pdpt address and the index. */
- lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0,
- (__pa(pudp) & 0x1F) / sizeof(pud_t));
-}
-
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- native_set_pmd(pmdp, pmdval);
- lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
- (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
-}
-#else
-
-/* The Guest calls lguest_set_pmd to set a top-level entry when !PAE. */
-static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- native_set_pmd(pmdp, pmdval);
- lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK,
- (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t));
-}
-#endif
-
-/*
- * There are a couple of legacy places where the kernel sets a PTE, but we
- * don't know the top level any more. This is useless for us, since we don't
- * know which pagetable is changing or what address, so we just tell the Host
- * to forget all of them. Fortunately, this is very rare.
- *
- * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow: 48 seconds! So we don't even tell
- * the Host anything changed until we've done the first real page table switch,
- * which brings boot back to 4.3 seconds.
- */
-static void lguest_set_pte(pte_t *ptep, pte_t pteval)
-{
- native_set_pte(ptep, pteval);
- if (cr3_changed)
- lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-#ifdef CONFIG_X86_PAE
-/*
- * With 64-bit PTE values, we need to be careful setting them: if we set 32
- * bits at a time, the hardware could see a weird half-set entry. These
- * versions ensure we update all 64 bits at once.
- */
-static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- native_set_pte_atomic(ptep, pte);
- if (cr3_changed)
- lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-static void lguest_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- native_pte_clear(mm, addr, ptep);
- lguest_pte_update(mm, addr, ptep);
-}
-
-static void lguest_pmd_clear(pmd_t *pmdp)
-{
- lguest_set_pmd(pmdp, __pmd(0));
-}
-#endif
-
-/*
- * Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
- * native page table operations. On native hardware you can set a new page
- * table entry whenever you want, but if you want to remove one you have to do
- * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
- *
- * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only
- * called when a valid entry is written, not when it's removed (ie. marked not
- * present). Instead, this is where we come when the Guest wants to remove a
- * page table entry: we tell the Host to set that entry to 0 (ie. the present
- * bit is zero).
- */
-static void lguest_flush_tlb_single(unsigned long addr)
-{
- /* Simply set it to zero: if it was not, it will fault back in. */
- lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
-}
-
-/*
- * This is what happens after the Guest has removed a large number of entries.
- * This tells the Host that any of the page table entries for userspace might
- * have changed, ie. virtual addresses below PAGE_OFFSET.
- */
-static void lguest_flush_tlb_user(void)
-{
- lazy_hcall1(LHCALL_FLUSH_TLB, 0);
-}
-
-/*
- * This is called when the kernel page tables have changed. That's not very
- * common (unless the Guest is using highmem, which makes the Guest extremely
- * slow), so it's worth separating this from the user flushing above.
- */
-static void lguest_flush_tlb_kernel(void)
-{
- lazy_hcall1(LHCALL_FLUSH_TLB, 1);
-}
-
-/*
- * The Unadvanced Programmable Interrupt Controller.
- *
- * This is an attempt to implement the simplest possible interrupt controller.
- * I spent some time looking though routines like set_irq_chip_and_handler,
- * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and
- * I *think* this is as simple as it gets.
- *
- * We can tell the Host what interrupts we want blocked ready for using the
- * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as
- * simple as setting a bit. We don't actually "ack" interrupts as such, we
- * just mask and unmask them. I wonder if we should be cleverer?
- */
-static void disable_lguest_irq(struct irq_data *data)
-{
- set_bit(data->irq, lguest_data.blocked_interrupts);
-}
-
-static void enable_lguest_irq(struct irq_data *data)
-{
- clear_bit(data->irq, lguest_data.blocked_interrupts);
-}
-
-/* This structure describes the lguest IRQ controller. */
-static struct irq_chip lguest_irq_controller = {
- .name = "lguest",
- .irq_mask = disable_lguest_irq,
- .irq_mask_ack = disable_lguest_irq,
- .irq_unmask = enable_lguest_irq,
-};
-
-/*
- * Interrupt descriptors are allocated as-needed, but low-numbered ones are
- * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it
- * tells us the irq is already used: other errors (ie. ENOMEM) we take
- * seriously.
- */
-static int lguest_setup_irq(unsigned int irq)
-{
- struct irq_desc *desc;
- int err;
-
- /* Returns -ve error or vector number. */
- err = irq_alloc_desc_at(irq, 0);
- if (err < 0 && err != -EEXIST)
- return err;
-
- /*
- * Tell the Linux infrastructure that the interrupt is
- * controlled by our level-based lguest interrupt controller.
- */
- irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
- handle_level_irq, "level");
-
- /* Some systems map "vectors" to interrupts weirdly. Not us! */
- desc = irq_to_desc(irq);
- __this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq], desc);
- return 0;
-}
-
-static int lguest_enable_irq(struct pci_dev *dev)
-{
- int err;
- u8 line = 0;
-
- /* We literally use the PCI interrupt line as the irq number. */
- pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
- err = lguest_setup_irq(line);
- if (!err)
- dev->irq = line;
- return err;
-}
-
-/* We don't do hotplug PCI, so this shouldn't be called. */
-static void lguest_disable_irq(struct pci_dev *dev)
-{
- WARN_ON(1);
-}
-
-/*
- * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
- * interrupt (except 128, which is used for system calls).
- */
-static void __init lguest_init_IRQ(void)
-{
- unsigned int i;
-
- for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
- if (i != IA32_SYSCALL_VECTOR)
- set_intr_gate(i, irq_entries_start +
- 8 * (i - FIRST_EXTERNAL_VECTOR));
- }
-
- /*
- * This call is required to set up for 4k stacks, where we have
- * separate stacks for hard and soft interrupts.
- */
- irq_ctx_init(smp_processor_id());
-}
-
-/*
- * Time.
- *
- * It would be far better for everyone if the Guest had its own clock, but
- * until then the Host gives us the time on every interrupt.
- */
-static void lguest_get_wallclock(struct timespec *now)
-{
- *now = lguest_data.time;
-}
-
-/*
- * The TSC is an Intel thing called the Time Stamp Counter. The Host tells us
- * what speed it runs at, or 0 if it's unusable as a reliable clock source.
- * This matches what we want here: if we return 0 from this function, the x86
- * TSC clock will give up and not register itself.
- */
-static unsigned long lguest_tsc_khz(void)
-{
- return lguest_data.tsc_khz;
-}
-
-/*
- * If we can't use the TSC, the kernel falls back to our lower-priority
- * "lguest_clock", where we read the time value given to us by the Host.
- */
-static u64 lguest_clock_read(struct clocksource *cs)
-{
- unsigned long sec, nsec;
-
- /*
- * Since the time is in two parts (seconds and nanoseconds), we risk
- * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
- * and getting 99 and 0. As Linux tends to come apart under the stress
- * of time travel, we must be careful:
- */
- do {
- /* First we read the seconds part. */
- sec = lguest_data.time.tv_sec;
- /*
- * This read memory barrier tells the compiler and the CPU that
- * this can't be reordered: we have to complete the above
- * before going on.
- */
- rmb();
- /* Now we read the nanoseconds part. */
- nsec = lguest_data.time.tv_nsec;
- /* Make sure we've done that. */
- rmb();
- /* Now if the seconds part has changed, try again. */
- } while (unlikely(lguest_data.time.tv_sec != sec));
-
- /* Our lguest clock is in real nanoseconds. */
- return sec*1000000000ULL + nsec;
-}
-
-/* This is the fallback clocksource: lower priority than the TSC clocksource. */
-static struct clocksource lguest_clock = {
- .name = "lguest",
- .rating = 200,
- .read = lguest_clock_read,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-/*
- * We also need a "struct clock_event_device": Linux asks us to set it to go
- * off some time in the future. Actually, James Morris figured all this out, I
- * just applied the patch.
- */
-static int lguest_clockevent_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- /* FIXME: I don't think this can ever happen, but James tells me he had
- * to put this code in. Maybe we should remove it now. Anyone? */
- if (delta < LG_CLOCK_MIN_DELTA) {
- if (printk_ratelimit())
- printk(KERN_DEBUG "%s: small delta %lu ns\n",
- __func__, delta);
- return -ETIME;
- }
-
- /* Please wake us this far in the future. */
- hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
- return 0;
-}
-
-static int lguest_clockevent_shutdown(struct clock_event_device *evt)
-{
- /* A 0 argument shuts the clock down. */
- hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
- return 0;
-}
-
-/* This describes our primitive timer chip. */
-static struct clock_event_device lguest_clockevent = {
- .name = "lguest",
- .features = CLOCK_EVT_FEAT_ONESHOT,
- .set_next_event = lguest_clockevent_set_next_event,
- .set_state_shutdown = lguest_clockevent_shutdown,
- .rating = INT_MAX,
- .mult = 1,
- .shift = 0,
- .min_delta_ns = LG_CLOCK_MIN_DELTA,
- .min_delta_ticks = LG_CLOCK_MIN_DELTA,
- .max_delta_ns = LG_CLOCK_MAX_DELTA,
- .max_delta_ticks = LG_CLOCK_MAX_DELTA,
-};
-
-/*
- * This is the Guest timer interrupt handler (hardware interrupt 0). We just
- * call the clockevent infrastructure and it does whatever needs doing.
- */
-static void lguest_time_irq(struct irq_desc *desc)
-{
- unsigned long flags;
-
- /* Don't interrupt us while this is running. */
- local_irq_save(flags);
- lguest_clockevent.event_handler(&lguest_clockevent);
- local_irq_restore(flags);
-}
-
-/*
- * At some point in the boot process, we get asked to set up our timing
- * infrastructure. The kernel doesn't expect timer interrupts before this, but
- * we cleverly initialized the "blocked_interrupts" field of "struct
- * lguest_data" so that timer interrupts were blocked until now.
- */
-static void lguest_time_init(void)
-{
- /* Set up the timer interrupt (0) to go to our simple timer routine */
- if (lguest_setup_irq(0) != 0)
- panic("Could not set up timer irq");
- irq_set_handler(0, lguest_time_irq);
-
- clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
-
- /* We can't set cpumask in the initializer: damn C limitations! Set it
- * here and register our timer device. */
- lguest_clockevent.cpumask = cpumask_of(0);
- clockevents_register_device(&lguest_clockevent);
-
- /* Finally, we unblock the timer interrupt. */
- clear_bit(0, lguest_data.blocked_interrupts);
-}
-
-/*
- * Miscellaneous bits and pieces.
- *
- * Here is an oddball collection of functions which the Guest needs for things
- * to work. They're pretty simple.
- */
-
-/*
- * The Guest needs to tell the Host what stack it expects traps to use. For
- * native hardware, this is part of the Task State Segment mentioned above in
- * lguest_load_tr_desc(), but to help hypervisors there's this special call.
- *
- * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
- * segment), the privilege level (we're privilege level 1, the Host is 0 and
- * will not tolerate us trying to use that), the stack pointer, and the number
- * of pages in the stack.
- */
-static void lguest_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
-{
- lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
- THREAD_SIZE / PAGE_SIZE);
- tss->x86_tss.sp0 = thread->sp0;
-}
-
-/* Let's just say, I wouldn't do debugging under a Guest. */
-static unsigned long lguest_get_debugreg(int regno)
-{
- /* FIXME: Implement */
- return 0;
-}
-
-static void lguest_set_debugreg(int regno, unsigned long value)
-{
- /* FIXME: Implement */
-}
-
-/*
- * There are times when the kernel wants to make sure that no memory writes are
- * caught in the cache (that they've all reached real hardware devices). This
- * doesn't matter for the Guest which has virtual hardware.
- *
- * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush
- * (clflush) instruction is available and the kernel uses that. Otherwise, it
- * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction.
- * Unlike clflush, wbinvd can only be run at privilege level 0. So we can
- * ignore clflush, but replace wbinvd.
- */
-static void lguest_wbinvd(void)
-{
-}
-
-/*
- * If the Guest expects to have an Advanced Programmable Interrupt Controller,
- * we play dumb by ignoring writes and returning 0 for reads. So it's no
- * longer Programmable nor Controlling anything, and I don't think 8 lines of
- * code qualifies for Advanced. It will also never interrupt anything. It
- * does, however, allow us to get through the Linux boot code.
- */
-#ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(u32 reg, u32 v)
-{
-}
-
-static u32 lguest_apic_read(u32 reg)
-{
- return 0;
-}
-
-static u64 lguest_apic_icr_read(void)
-{
- return 0;
-}
-
-static void lguest_apic_icr_write(u32 low, u32 id)
-{
- /* Warn to see if there's any stray references */
- WARN_ON(1);
-}
-
-static void lguest_apic_wait_icr_idle(void)
-{
- return;
-}
-
-static u32 lguest_apic_safe_wait_icr_idle(void)
-{
- return 0;
-}
-
-static void set_lguest_basic_apic_ops(void)
-{
- apic->read = lguest_apic_read;
- apic->write = lguest_apic_write;
- apic->icr_read = lguest_apic_icr_read;
- apic->icr_write = lguest_apic_icr_write;
- apic->wait_icr_idle = lguest_apic_wait_icr_idle;
- apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle;
-};
-#endif
-
-/* STOP! Until an interrupt comes in. */
-static void lguest_safe_halt(void)
-{
- hcall(LHCALL_HALT, 0, 0, 0, 0);
-}
-
-/*
- * The SHUTDOWN hypercall takes a string to describe what's happening, and
- * an argument which says whether this to restart (reboot) the Guest or not.
- *
- * Note that the Host always prefers that the Guest speak in physical addresses
- * rather than virtual addresses, so we use __pa() here.
- */
-static void lguest_power_off(void)
-{
- hcall(LHCALL_SHUTDOWN, __pa("Power down"),
- LGUEST_SHUTDOWN_POWEROFF, 0, 0);
-}
-
-/*
- * Panicing.
- *
- * Don't. But if you did, this is what happens.
- */
-static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
-{
- hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
- /* The hcall won't return, but to keep gcc happy, we're "done". */
- return NOTIFY_DONE;
-}
-
-static struct notifier_block paniced = {
- .notifier_call = lguest_panic
-};
-
-/* Setting up memory is fairly easy. */
-static __init char *lguest_memory_setup(void)
-{
- /*
- * The Linux bootloader header contains an "e820" memory map: the
- * Launcher populated the first entry with our memory limit.
- */
- e820__range_add(boot_params.e820_table[0].addr,
- boot_params.e820_table[0].size,
- boot_params.e820_table[0].type);
-
- /* This string is for the boot messages. */
- return "LGUEST";
-}
-
-/* Offset within PCI config space of BAR access capability. */
-static int console_cfg_offset = 0;
-static int console_access_cap;
-
-/* Set up so that we access off in bar0 (on bus 0, device 1, function 0) */
-static void set_cfg_window(u32 cfg_offset, u32 off)
-{
- write_pci_config_byte(0, 1, 0,
- cfg_offset + offsetof(struct virtio_pci_cap, bar),
- 0);
- write_pci_config(0, 1, 0,
- cfg_offset + offsetof(struct virtio_pci_cap, length),
- 4);
- write_pci_config(0, 1, 0,
- cfg_offset + offsetof(struct virtio_pci_cap, offset),
- off);
-}
-
-static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val)
-{
- /*
- * We could set this up once, then leave it; nothing else in the *
- * kernel should touch these registers. But if it went wrong, that
- * would be a horrible bug to find.
- */
- set_cfg_window(cfg_offset, off);
- write_pci_config(0, 1, 0,
- cfg_offset + sizeof(struct virtio_pci_cap), val);
-}
-
-static void probe_pci_console(void)
-{
- u8 cap, common_cap = 0, device_cap = 0;
- u32 device_len;
-
- /* Avoid recursive printk into here. */
- console_cfg_offset = -1;
-
- if (!early_pci_allowed()) {
- printk(KERN_ERR "lguest: early PCI access not allowed!\n");
- return;
- }
-
- /* We expect a console PCI device at BUS0, slot 1. */
- if (read_pci_config(0, 1, 0, 0) != 0x10431AF4) {
- printk(KERN_ERR "lguest: PCI device is %#x!\n",
- read_pci_config(0, 1, 0, 0));
- return;
- }
-
- /* Find the capabilities we need (must be in bar0) */
- cap = read_pci_config_byte(0, 1, 0, PCI_CAPABILITY_LIST);
- while (cap) {
- u8 vndr = read_pci_config_byte(0, 1, 0, cap);
- if (vndr == PCI_CAP_ID_VNDR) {
- u8 type, bar;
-
- type = read_pci_config_byte(0, 1, 0,
- cap + offsetof(struct virtio_pci_cap, cfg_type));
- bar = read_pci_config_byte(0, 1, 0,
- cap + offsetof(struct virtio_pci_cap, bar));
-
- switch (type) {
- case VIRTIO_PCI_CAP_DEVICE_CFG:
- if (bar == 0)
- device_cap = cap;
- break;
- case VIRTIO_PCI_CAP_PCI_CFG:
- console_access_cap = cap;
- break;
- }
- }
- cap = read_pci_config_byte(0, 1, 0, cap + PCI_CAP_LIST_NEXT);
- }
- if (!device_cap || !console_access_cap) {
- printk(KERN_ERR "lguest: No caps (%u/%u/%u) in console!\n",
- common_cap, device_cap, console_access_cap);
- return;
- }
-
- /*
- * Note that we can't check features, until we've set the DRIVER
- * status bit. We don't want to do that until we have a real driver,
- * so we just check that the device-specific config has room for
- * emerg_wr. If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE
- * it should ignore the access.
- */
- device_len = read_pci_config(0, 1, 0,
- device_cap + offsetof(struct virtio_pci_cap, length));
- if (device_len < (offsetof(struct virtio_console_config, emerg_wr)
- + sizeof(u32))) {
- printk(KERN_ERR "lguest: console missing emerg_wr field\n");
- return;
- }
-
- console_cfg_offset = read_pci_config(0, 1, 0,
- device_cap + offsetof(struct virtio_pci_cap, offset));
- printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n");
-}
-
-/*
- * We will eventually use the virtio console device to produce console output,
- * but before that is set up we use the virtio PCI console's backdoor mmio
- * access and the "emergency" write facility (which is legal even before the
- * device is configured).
- */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
-{
- /* If we couldn't find PCI console, forget it. */
- if (console_cfg_offset < 0)
- return count;
-
- if (unlikely(!console_cfg_offset)) {
- probe_pci_console();
- if (console_cfg_offset < 0)
- return count;
- }
-
- write_bar_via_cfg(console_access_cap,
- console_cfg_offset
- + offsetof(struct virtio_console_config, emerg_wr),
- buf[0]);
- return 1;
-}
-
-/*
- * Rebooting also tells the Host we're finished, but the RESTART flag tells the
- * Launcher to reboot us.
- */
-static void lguest_restart(char *reason)
-{
- hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
-}
-
-/*G:050
- * Patching (Powerfully Placating Performance Pedants)
- *
- * We have already seen that pv_ops structures let us replace simple native
- * instructions with calls to the appropriate back end all throughout the
- * kernel. This allows the same kernel to run as a Guest and as a native
- * kernel, but it's slow because of all the indirect branches.
- *
- * Remember that David Wheeler quote about "Any problem in computer science can
- * be solved with another layer of indirection"? The rest of that quote is
- * "... But that usually will create another problem." This is the first of
- * those problems.
- *
- * Our current solution is to allow the paravirt back end to optionally patch
- * over the indirect calls to replace them with something more efficient. We
- * patch two of the simplest of the most commonly called functions: disable
- * interrupts and save interrupts. We usually have 6 or 10 bytes to patch
- * into: the Guest versions of these operations are small enough that we can
- * fit comfortably.
- *
- * First we need assembly templates of each of the patchable Guest operations,
- * and these are in head_32.S.
- */
-
-/*G:060 We construct a table from the assembler templates: */
-static const struct lguest_insns
-{
- const char *start, *end;
-} lguest_insns[] = {
- [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
- [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
-};
-
-/*
- * Now our patch routine is fairly simple (based on the native one in
- * paravirt.c). If we have a replacement, we copy it in and return how much of
- * the available space we used.
- */
-static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
- unsigned long addr, unsigned len)
-{
- unsigned int insn_len;
-
- /* Don't do anything special if we don't have a replacement */
- if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
- return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
- insn_len = lguest_insns[type].end - lguest_insns[type].start;
-
- /* Similarly if it can't fit (doesn't happen, but let's be thorough). */
- if (len < insn_len)
- return paravirt_patch_default(type, clobber, ibuf, addr, len);
-
- /* Copy in our instructions. */
- memcpy(ibuf, lguest_insns[type].start, insn_len);
- return insn_len;
-}
-
-/*G:029
- * Once we get to lguest_init(), we know we're a Guest. The various
- * pv_ops structures in the kernel provide points for (almost) every routine we
- * have to override to avoid privileged instructions.
- */
-__init void lguest_init(void)
-{
- /* We're under lguest. */
- pv_info.name = "lguest";
- /* We're running at privilege level 1, not 0 as normal. */
- pv_info.kernel_rpl = 1;
- /* Everyone except Xen runs with this set. */
- pv_info.shared_kernel_pmd = 1;
-
- /*
- * We set up all the lguest overrides for sensitive operations. These
- * are detailed with the operations themselves.
- */
-
- /* Interrupt-related operations */
- pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl);
- pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
- pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable);
- pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
- pv_irq_ops.safe_halt = lguest_safe_halt;
-
- /* Setup operations */
- pv_init_ops.patch = lguest_patch;
-
- /* Intercepts of various CPU instructions */
- pv_cpu_ops.load_gdt = lguest_load_gdt;
- pv_cpu_ops.cpuid = lguest_cpuid;
- pv_cpu_ops.load_idt = lguest_load_idt;
- pv_cpu_ops.iret = lguest_iret;
- pv_cpu_ops.load_sp0 = lguest_load_sp0;
- pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
- pv_cpu_ops.set_ldt = lguest_set_ldt;
- pv_cpu_ops.load_tls = lguest_load_tls;
- pv_cpu_ops.get_debugreg = lguest_get_debugreg;
- pv_cpu_ops.set_debugreg = lguest_set_debugreg;
- pv_cpu_ops.read_cr0 = lguest_read_cr0;
- pv_cpu_ops.write_cr0 = lguest_write_cr0;
- pv_cpu_ops.read_cr4 = lguest_read_cr4;
- pv_cpu_ops.write_cr4 = lguest_write_cr4;
- pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
- pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
- pv_cpu_ops.wbinvd = lguest_wbinvd;
- pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
- pv_cpu_ops.end_context_switch = lguest_end_context_switch;
-
- /* Pagetable management */
- pv_mmu_ops.write_cr3 = lguest_write_cr3;
- pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
- pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
- pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
- pv_mmu_ops.set_pte = lguest_set_pte;
- pv_mmu_ops.set_pte_at = lguest_set_pte_at;
- pv_mmu_ops.set_pmd = lguest_set_pmd;
-#ifdef CONFIG_X86_PAE
- pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic;
- pv_mmu_ops.pte_clear = lguest_pte_clear;
- pv_mmu_ops.pmd_clear = lguest_pmd_clear;
- pv_mmu_ops.set_pud = lguest_set_pud;
-#endif
- pv_mmu_ops.read_cr2 = lguest_read_cr2;
- pv_mmu_ops.read_cr3 = lguest_read_cr3;
- pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
- pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
- pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
- pv_mmu_ops.pte_update = lguest_pte_update;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- /* APIC read/write intercepts */
- set_lguest_basic_apic_ops();
-#endif
-
- x86_init.resources.memory_setup = lguest_memory_setup;
- x86_init.irqs.intr_init = lguest_init_IRQ;
- x86_init.timers.timer_init = lguest_time_init;
- x86_platform.calibrate_tsc = lguest_tsc_khz;
- x86_platform.get_wallclock = lguest_get_wallclock;
-
- /*
- * Now is a good time to look at the implementations of these functions
- * before returning to the rest of lguest_init().
- */
-
- /*G:070
- * Now we've seen all the paravirt_ops, we return to
- * lguest_init() where the rest of the fairly chaotic boot setup
- * occurs.
- */
-
- /*
- * The stack protector is a weird thing where gcc places a canary
- * value on the stack and then checks it on return. This file is
- * compiled with -fno-stack-protector it, so we got this far without
- * problems. The value of the canary is kept at offset 20 from the
- * %gs register, so we need to set that up before calling C functions
- * in other files.
- */
- setup_stack_canary_segment(0);
-
- /*
- * We could just call load_stack_canary_segment(), but we might as well
- * call switch_to_new_gdt() which loads the whole table and sets up the
- * per-cpu segment descriptor register %fs as well.
- */
- switch_to_new_gdt(0);
-
- /*
- * The Host<->Guest Switcher lives at the top of our address space, and
- * the Host told us how big it is when we made LGUEST_INIT hypercall:
- * it put the answer in lguest_data.reserve_mem
- */
- reserve_top_address(lguest_data.reserve_mem);
-
- /* Hook in our special panic hypercall code. */
- atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-
- /*
- * This is messy CPU setup stuff which the native boot code does before
- * start_kernel, so we have to do, too:
- */
- cpu_detect(&new_cpu_data);
- /* head.S usually sets up the first capability word, so do it here. */
- new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
-
- /* Math is always hard! */
- set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
-
- /* We don't have features. We have puppies! Puppies! */
-#ifdef CONFIG_X86_MCE
- mca_cfg.disabled = true;
-#endif
-#ifdef CONFIG_ACPI
- acpi_disabled = 1;
-#endif
-
- /*
- * We set the preferred console to "hvc". This is the "hypervisor
- * virtual console" driver written by the PowerPC people, which we also
- * adapted for lguest's use.
- */
- add_preferred_console("hvc", 0, NULL);
-
- /* Register our very early console. */
- virtio_cons_early_init(early_put_chars);
-
- /* Don't let ACPI try to control our PCI interrupts. */
- disable_acpi();
-
- /* We control them ourselves, by overriding these two hooks. */
- pcibios_enable_irq = lguest_enable_irq;
- pcibios_disable_irq = lguest_disable_irq;
-
- /*
- * Last of all, we set the power management poweroff hook to point to
- * the Guest routine to power off, and the reboot hook to our restart
- * routine.
- */
- pm_power_off = lguest_power_off;
- machine_ops.restart = lguest_restart;
-
- /*
- * Now we're set up, call i386_start_kernel() in head32.c and we proceed
- * to boot as normal. It never returns.
- */
- i386_start_kernel();
-}
-/*
- * This marks the end of stage II of our journey, The Guest.
- *
- * It is now time for us to explore the layer of virtual drivers and complete
- * our understanding of the Guest in "make Drivers".
- */
diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S
deleted file mode 100644
index d5ae63f5ec5d..000000000000
--- a/arch/x86/lguest/head_32.S
+++ /dev/null
@@ -1,192 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/lguest.h>
-#include <asm/lguest_hcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-
-/*G:020
-
- * Our story starts with the bzImage: booting starts at startup_32 in
- * arch/x86/boot/compressed/head_32.S. This merely uncompresses the real
- * kernel in place and then jumps into it: startup_32 in
- * arch/x86/kernel/head_32.S. Both routines expects a boot header in the %esi
- * register, which is created by the bootloader (the Launcher in our case).
- *
- * The startup_32 function does very little: it clears the uninitialized global
- * C variables which we expect to be zero (ie. BSS) and then copies the boot
- * header and kernel command line somewhere safe, and populates some initial
- * page tables. Finally it checks the 'hardware_subarch' field. This was
- * introduced in 2.6.24 for lguest and Xen: if it's set to '1' (lguest's
- * assigned number), then it calls us here.
- *
- * WARNING: be very careful here! We're running at addresses equal to physical
- * addresses (around 0), not above PAGE_OFFSET as most code expects
- * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any
- * data without remembering to subtract __PAGE_OFFSET!
- *
- * The .section line puts this code in .init.text so it will be discarded after
- * boot.
- */
-.section .init.text, "ax", @progbits
-ENTRY(lguest_entry)
- /*
- * We make the "initialization" hypercall now to tell the Host where
- * our lguest_data struct is.
- */
- movl $LHCALL_LGUEST_INIT, %eax
- movl $lguest_data - __PAGE_OFFSET, %ebx
- int $LGUEST_TRAP_ENTRY
-
- /* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */
- movl $LHCALL_NEW_PGTABLE, %eax
- movl $(initial_page_table - __PAGE_OFFSET), %ebx
- int $LGUEST_TRAP_ENTRY
-
- /* Set up the initial stack so we can run C code. */
- movl $(init_thread_union+THREAD_SIZE),%esp
-
- /* Jumps are relative: we're running __PAGE_OFFSET too low. */
- jmp lguest_init+__PAGE_OFFSET
-
-/*G:055
- * We create a macro which puts the assembler code between lgstart_ and lgend_
- * markers. These templates are put in the .text section: they can't be
- * discarded after boot as we may need to patch modules, too.
- */
-.text
-#define LGUEST_PATCH(name, insns...) \
- lgstart_##name: insns; lgend_##name:; \
- .globl lgstart_##name; .globl lgend_##name
-
-LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
-LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
-
-/*G:033
- * But using those wrappers is inefficient (we'll see why that doesn't matter
- * for save_fl and irq_disable later). If we write our routines carefully in
- * assembler, we can avoid clobbering any registers and avoid jumping through
- * the wrapper functions.
- *
- * I skipped over our first piece of assembler, but this one is worth studying
- * in a bit more detail so I'll describe in easy stages. First, the routine to
- * enable interrupts:
- */
-ENTRY(lg_irq_enable)
- /*
- * The reverse of irq_disable, this sets lguest_data.irq_enabled to
- * X86_EFLAGS_IF (ie. "Interrupts enabled").
- */
- movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
- /*
- * But now we need to check if the Host wants to know: there might have
- * been interrupts waiting to be delivered, in which case it will have
- * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we
- * jump to send_interrupts, otherwise we're done.
- */
- cmpl $0, lguest_data+LGUEST_DATA_irq_pending
- jnz send_interrupts
- /*
- * One cool thing about x86 is that you can do many things without using
- * a register. In this case, the normal path hasn't needed to save or
- * restore any registers at all!
- */
- ret
-send_interrupts:
- /*
- * OK, now we need a register: eax is used for the hypercall number,
- * which is LHCALL_SEND_INTERRUPTS.
- *
- * We used not to bother with this pending detection at all, which was
- * much simpler. Sooner or later the Host would realize it had to
- * send us an interrupt. But that turns out to make performance 7
- * times worse on a simple tcp benchmark. So now we do this the hard
- * way.
- */
- pushl %eax
- movl $LHCALL_SEND_INTERRUPTS, %eax
- /* This is the actual hypercall trap. */
- int $LGUEST_TRAP_ENTRY
- /* Put eax back the way we found it. */
- popl %eax
- ret
-
-/*
- * Finally, the "popf" or "restore flags" routine. The %eax register holds the
- * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
- * enabling interrupts again, if it's 0 we're leaving them off.
- */
-ENTRY(lg_restore_fl)
- /* This is just "lguest_data.irq_enabled = flags;" */
- movl %eax, lguest_data+LGUEST_DATA_irq_enabled
- /*
- * Now, if the %eax value has enabled interrupts and
- * lguest_data.irq_pending is set, we want to tell the Host so it can
- * deliver any outstanding interrupts. Fortunately, both values will
- * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
- * instruction will AND them together for us. If both are set, we
- * jump to send_interrupts.
- */
- testl lguest_data+LGUEST_DATA_irq_pending, %eax
- jnz send_interrupts
- /* Again, the normal path has used no extra registers. Clever, huh? */
- ret
-/*:*/
-
-/* These demark the EIP where host should never deliver interrupts. */
-.global lguest_noirq_iret
-
-/*M:004
- * When the Host reflects a trap or injects an interrupt into the Guest, it
- * sets the eflags interrupt bit on the stack based on lguest_data.irq_enabled,
- * so the Guest iret logic does the right thing when restoring it. However,
- * when the Host sets the Guest up for direct traps, such as system calls, the
- * processor is the one to push eflags onto the stack, and the interrupt bit
- * will be 1 (in reality, interrupts are always enabled in the Guest).
- *
- * This turns out to be harmless: the only trap which should happen under Linux
- * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
- * regions), which has to be reflected through the Host anyway. If another
- * trap *does* go off when interrupts are disabled, the Guest will panic, and
- * we'll never get to this iret!
-:*/
-
-/*G:045
- * There is one final paravirt_op that the Guest implements, and glancing at it
- * you can see why I left it to last. It's *cool*! It's in *assembler*!
- *
- * The "iret" instruction is used to return from an interrupt or trap. The
- * stack looks like this:
- * old address
- * old code segment & privilege level
- * old processor flags ("eflags")
- *
- * The "iret" instruction pops those values off the stack and restores them all
- * at once. The only problem is that eflags includes the Interrupt Flag which
- * the Guest can't change: the CPU will simply ignore it when we do an "iret".
- * So we have to copy eflags from the stack to lguest_data.irq_enabled before
- * we do the "iret".
- *
- * There are two problems with this: firstly, we can't clobber any registers
- * and secondly, the whole thing needs to be atomic. The first problem
- * is solved by using "push memory"/"pop memory" instruction pair for copying.
- *
- * The second is harder: copying eflags to lguest_data.irq_enabled will turn
- * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever. Our solution to this is to tell the
- * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled. (It's not necessary to protect pop instruction, since
- * data gets updated only after it completes, so we only need to protect
- * one instruction, iret).
- */
-ENTRY(lguest_iret)
- pushl 2*4(%esp)
- /*
- * Note the %ss: segment prefix here. Normal data accesses use the
- * "ds" segment, but that will have already been restored for whatever
- * we're returning to (such as userspace): we can't trust it. The %ss:
- * prefix makes sure we use the stack segment, which is still valid.
- */
- popl %ss:lguest_data+LGUEST_DATA_irq_enabled
-lguest_noirq_iret:
- iret
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 5cc78bf57232..3261abb21ef4 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
return 0; /* Buffer overrun */
}
+/*
+ * Find a non-boolean option (i.e. option=argument). In accordance with
+ * standard Linux practice, if this option is repeated, this returns the
+ * last instance on the command line.
+ *
+ * @cmdline: the cmdline string
+ * @max_cmdline_size: the maximum size of cmdline
+ * @option: option string to look for
+ * @buffer: memory buffer to return the option argument
+ * @bufsize: size of the supplied memory buffer
+ *
+ * Returns the length of the argument (regardless of if it was
+ * truncated to fit in the buffer), or -1 on not found.
+ */
+static int
+__cmdline_find_option(const char *cmdline, int max_cmdline_size,
+ const char *option, char *buffer, int bufsize)
+{
+ char c;
+ int pos = 0, len = -1;
+ const char *opptr = NULL;
+ char *bufptr = buffer;
+ enum {
+ st_wordstart = 0, /* Start of word/after whitespace */
+ st_wordcmp, /* Comparing this word */
+ st_wordskip, /* Miscompare, skip */
+ st_bufcpy, /* Copying this to buffer */
+ } state = st_wordstart;
+
+ if (!cmdline)
+ return -1; /* No command line */
+
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos++ < max_cmdline_size) {
+ c = *(char *)cmdline++;
+ if (!c)
+ break;
+
+ switch (state) {
+ case st_wordstart:
+ if (myisspace(c))
+ break;
+
+ state = st_wordcmp;
+ opptr = option;
+ /* fall through */
+
+ case st_wordcmp:
+ if ((c == '=') && !*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for, prepare to
+ * copy the argument.
+ */
+ len = 0;
+ bufptr = buffer;
+ state = st_bufcpy;
+ break;
+ } else if (c == *opptr++) {
+ /*
+ * We are currently matching, so continue
+ * to the next character on the cmdline.
+ */
+ break;
+ }
+ state = st_wordskip;
+ /* fall through */
+
+ case st_wordskip:
+ if (myisspace(c))
+ state = st_wordstart;
+ break;
+
+ case st_bufcpy:
+ if (myisspace(c)) {
+ state = st_wordstart;
+ } else {
+ /*
+ * Increment len, but don't overrun the
+ * supplied buffer and leave room for the
+ * NULL terminator.
+ */
+ if (++len < bufsize)
+ *bufptr++ = c;
+ }
+ break;
+ }
+ }
+
+ if (bufsize)
+ *bufptr = '\0';
+
+ return len;
+}
+
int cmdline_find_option_bool(const char *cmdline, const char *option)
{
return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
}
+
+int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
+ int bufsize)
+{
+ return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
+ buffer, bufsize);
+}
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
index f77ba3058b31..066996dba6a2 100644
--- a/arch/x86/math-emu/div_Xsig.S
+++ b/arch/x86/math-emu/div_Xsig.S
@@ -363,3 +363,4 @@ L_bugged_2:
pop %ebx
jmp L_exit
#endif /* PARANOID */
+ENDPROC(div_Xsig)
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
index 47099628fa4c..2c71527bd917 100644
--- a/arch/x86/math-emu/div_small.S
+++ b/arch/x86/math-emu/div_small.S
@@ -44,4 +44,4 @@ ENTRY(FPU_div_small)
leave
ret
-
+ENDPROC(FPU_div_small)
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 0203baefb5c0..d4a7df2205b8 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -147,7 +147,7 @@ void math_emulate(struct math_emu_info *info)
}
code_descriptor = FPU_get_ldt_descriptor(FPU_CS);
- if (SEG_D_SIZE(code_descriptor)) {
+ if (code_descriptor.d) {
/* The above test may be wrong, the book is not clear */
/* Segmented 32 bit protected mode */
addr_modes.default_mode = SEG32;
@@ -155,11 +155,10 @@ void math_emulate(struct math_emu_info *info)
/* 16 bit protected mode */
addr_modes.default_mode = PM16;
}
- FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
- code_limit = code_base
- + (SEG_LIMIT(code_descriptor) +
- 1) * SEG_GRANULARITY(code_descriptor)
- - 1;
+ FPU_EIP += code_base = seg_get_base(&code_descriptor);
+ code_limit = seg_get_limit(&code_descriptor) + 1;
+ code_limit *= seg_get_granularity(&code_descriptor);
+ code_limit += code_base - 1;
if (code_limit < code_base)
code_limit = 0xffffffff;
}
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index a179254a5122..699f329f1d40 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -34,17 +34,43 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
return ret;
}
-#define SEG_D_SIZE(x) ((x).b & (3 << 21))
-#define SEG_G_BIT(x) ((x).b & (1 << 23))
-#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
-#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23)))
-#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \
- | (((s).b & 0xff) << 16) | ((s).a >> 16))
-#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff))
-#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
-#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
-#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
- == (1 << 10))
+#define SEG_TYPE_WRITABLE (1U << 1)
+#define SEG_TYPE_EXPANDS_DOWN (1U << 2)
+#define SEG_TYPE_EXECUTE (1U << 3)
+#define SEG_TYPE_EXPAND_MASK (SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE)
+#define SEG_TYPE_EXECUTE_MASK (SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE)
+
+static inline unsigned long seg_get_base(struct desc_struct *d)
+{
+ unsigned long base = (unsigned long)d->base2 << 24;
+
+ return base | ((unsigned long)d->base1 << 16) | d->base0;
+}
+
+static inline unsigned long seg_get_limit(struct desc_struct *d)
+{
+ return ((unsigned long)d->limit1 << 16) | d->limit0;
+}
+
+static inline unsigned long seg_get_granularity(struct desc_struct *d)
+{
+ return d->g ? 4096 : 1;
+}
+
+static inline bool seg_expands_down(struct desc_struct *d)
+{
+ return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN;
+}
+
+static inline bool seg_execute_only(struct desc_struct *d)
+{
+ return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE;
+}
+
+static inline bool seg_writable(struct desc_struct *d)
+{
+ return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
+}
#define I387 (&current->thread.fpu.state)
#define FPU_info (I387->soft.info)
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index b8ef9f9d2ffc..c48967c6a0e2 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -159,17 +159,18 @@ static long pm_address(u_char FPU_modrm, u_char segment,
}
descriptor = FPU_get_ldt_descriptor(addr->selector);
- base_address = SEG_BASE_ADDR(descriptor);
+ base_address = seg_get_base(&descriptor);
address = base_address + offset;
- limit = base_address
- + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+ limit = seg_get_limit(&descriptor) + 1;
+ limit *= seg_get_granularity(&descriptor);
+ limit += base_address - 1;
if (limit < base_address)
limit = 0xffffffff;
- if (SEG_EXPAND_DOWN(descriptor)) {
- if (SEG_G_BIT(descriptor))
+ if (seg_expands_down(&descriptor)) {
+ if (descriptor.g) {
seg_top = 0xffffffff;
- else {
+ } else {
seg_top = base_address + (1 << 20);
if (seg_top < base_address)
seg_top = 0xffffffff;
@@ -182,8 +183,8 @@ static long pm_address(u_char FPU_modrm, u_char segment,
(address > limit) || (address < base_address) ? 0 :
((limit - address) >= 254 ? 255 : limit - address + 1);
}
- if (SEG_EXECUTE_ONLY(descriptor) ||
- (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
+ if (seg_execute_only(&descriptor) ||
+ (!seg_writable(&descriptor) && (FPU_modrm & FPU_WRITE_BIT))) {
access_limit = 0;
}
return address;
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
index 717785a53eb4..22e0631bb85a 100644
--- a/arch/x86/math-emu/mul_Xsig.S
+++ b/arch/x86/math-emu/mul_Xsig.S
@@ -62,6 +62,7 @@ ENTRY(mul32_Xsig)
popl %esi
leave
ret
+ENDPROC(mul32_Xsig)
ENTRY(mul64_Xsig)
@@ -114,6 +115,7 @@ ENTRY(mul64_Xsig)
popl %esi
leave
ret
+ENDPROC(mul64_Xsig)
@@ -173,4 +175,4 @@ ENTRY(mul_Xsig_Xsig)
popl %esi
leave
ret
-
+ENDPROC(mul_Xsig_Xsig)
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
index 17315c89ff3d..a9aaf414135d 100644
--- a/arch/x86/math-emu/polynom_Xsig.S
+++ b/arch/x86/math-emu/polynom_Xsig.S
@@ -133,3 +133,4 @@ L_accum_done:
popl %esi
leave
ret
+ENDPROC(polynomial_Xsig)
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
index 8b6352efceef..53ac1a343c69 100644
--- a/arch/x86/math-emu/reg_norm.S
+++ b/arch/x86/math-emu/reg_norm.S
@@ -94,6 +94,7 @@ L_overflow:
call arith_overflow
pop %ebx
jmp L_exit
+ENDPROC(FPU_normalize)
@@ -145,3 +146,4 @@ L_exit_nuo_zero:
popl %ebx
leave
ret
+ENDPROC(FPU_normalize_nuo)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
index d1d4e48b4f67..41af5b208d88 100644
--- a/arch/x86/math-emu/reg_round.S
+++ b/arch/x86/math-emu/reg_round.S
@@ -706,3 +706,5 @@ L_exception_exit:
mov $-1,%eax
jmp fpu_reg_round_special_exit
#endif /* PARANOID */
+
+ENDPROC(FPU_round)
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
index 47c4c2434d85..3b1bc5e9b2f6 100644
--- a/arch/x86/math-emu/reg_u_add.S
+++ b/arch/x86/math-emu/reg_u_add.S
@@ -165,3 +165,4 @@ L_exit:
leave
ret
#endif /* PARANOID */
+ENDPROC(FPU_u_add)
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
index cc00654b6f9a..796eb5ab921b 100644
--- a/arch/x86/math-emu/reg_u_div.S
+++ b/arch/x86/math-emu/reg_u_div.S
@@ -469,3 +469,5 @@ L_exit:
leave
ret
#endif /* PARANOID */
+
+ENDPROC(FPU_u_div)
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
index 973f12af97df..6196f68cf3c1 100644
--- a/arch/x86/math-emu/reg_u_mul.S
+++ b/arch/x86/math-emu/reg_u_mul.S
@@ -146,3 +146,4 @@ L_exit:
ret
#endif /* PARANOID */
+ENDPROC(FPU_u_mul)
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
index 1b6c24801d22..d115b900919a 100644
--- a/arch/x86/math-emu/reg_u_sub.S
+++ b/arch/x86/math-emu/reg_u_sub.S
@@ -270,3 +270,4 @@ L_exit:
popl %esi
leave
ret
+ENDPROC(FPU_u_sub)
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
index bbe0e87718e4..87c99749a495 100644
--- a/arch/x86/math-emu/round_Xsig.S
+++ b/arch/x86/math-emu/round_Xsig.S
@@ -78,7 +78,7 @@ L_exit:
popl %ebx
leave
ret
-
+ENDPROC(round_Xsig)
@@ -138,4 +138,4 @@ L_n_exit:
popl %ebx
leave
ret
-
+ENDPROC(norm_Xsig)
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
index 31cdd118e918..c8552edeec75 100644
--- a/arch/x86/math-emu/shr_Xsig.S
+++ b/arch/x86/math-emu/shr_Xsig.S
@@ -85,3 +85,4 @@ L_more_than_95:
popl %esi
leave
ret
+ENDPROC(shr_Xsig)
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
index 518428317985..340dd6897f85 100644
--- a/arch/x86/math-emu/wm_shrx.S
+++ b/arch/x86/math-emu/wm_shrx.S
@@ -92,6 +92,7 @@ L_more_than_95:
popl %esi
leave
ret
+ENDPROC(FPU_shrx)
/*---------------------------------------------------------------------------+
@@ -202,3 +203,4 @@ Ls_more_than_95:
popl %esi
leave
ret
+ENDPROC(FPU_shrxs)
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index d258f59564e1..695afae38fdf 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -468,3 +468,4 @@ sqrt_more_prec_large:
/* Our estimate is too large */
movl $0x7fffff00,%eax
jmp sqrt_round_result
+ENDPROC(wm_sqrt)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 0fbdcb64f9f8..72bf8c01c6e3 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -39,3 +39,5 @@ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0470826d2bdc..5e3ac6fe6c9e 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -13,12 +13,12 @@
*/
#include <linux/debugfs.h>
+#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
-#include <asm/kasan.h>
#include <asm/pgtable.h>
/*
@@ -138,7 +138,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
{
pgprotval_t pr = pgprot_val(prot);
static const char * const level_name[] =
- { "cr3", "pgd", "pud", "pmd", "pte" };
+ { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
if (!pgprot_val(prot)) {
/* Not present */
@@ -162,12 +162,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
pt_dump_cont_printf(m, dmsg, " ");
/* Bit 7 has a different meaning on level 3 vs 4 */
- if (level <= 3 && pr & _PAGE_PSE)
+ if (level <= 4 && pr & _PAGE_PSE)
pt_dump_cont_printf(m, dmsg, "PSE ");
else
pt_dump_cont_printf(m, dmsg, " ");
- if ((level == 4 && pr & _PAGE_PAT) ||
- ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
+ if ((level == 5 && pr & _PAGE_PAT) ||
+ ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE))
pt_dump_cont_printf(m, dmsg, "PAT ");
else
pt_dump_cont_printf(m, dmsg, " ");
@@ -188,11 +188,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
*/
static unsigned long normalize_addr(unsigned long u)
{
-#ifdef CONFIG_X86_64
- return (signed long)(u << 16) >> 16;
-#else
- return u;
-#endif
+ int shift;
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return u;
+
+ shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
+ return (signed long)(u << shift) >> shift;
}
/*
@@ -297,32 +298,62 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
for (i = 0; i < PTRS_PER_PTE; i++) {
prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, __pgprot(prot), 4);
+ note_page(m, st, __pgprot(prot), 5);
start++;
}
}
+#ifdef CONFIG_KASAN
+
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_zero_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+ void *pt)
+{
+ if (__pa(pt) == __pa(kasan_zero_pmd) ||
+#ifdef CONFIG_X86_5LEVEL
+ __pa(pt) == __pa(kasan_zero_p4d) ||
+#endif
+ __pa(pt) == __pa(kasan_zero_pud)) {
+ pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
+ note_page(m, st, __pgprot(prot), 5);
+ return true;
+ }
+ return false;
+}
+#else
+static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
+ void *pt)
+{
+ return false;
+}
+#endif
#if PTRS_PER_PMD > 1
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
{
int i;
- pmd_t *start;
+ pmd_t *start, *pmd_start;
pgprotval_t prot;
- start = (pmd_t *)pud_page_vaddr(addr);
+ pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
if (pmd_large(*start) || !pmd_present(*start)) {
prot = pmd_flags(*start);
- note_page(m, st, __pgprot(prot), 3);
- } else {
+ note_page(m, st, __pgprot(prot), 4);
+ } else if (!kasan_page_table(m, st, pmd_start)) {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 3);
+ note_page(m, st, __pgprot(0), 4);
start++;
}
}
@@ -335,39 +366,27 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
#if PTRS_PER_PUD > 1
-/*
- * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
- * KASAN fills page tables with the same values. Since there is no
- * point in checking page table more than once we just skip repeated
- * entries. This saves us dozens of seconds during boot.
- */
-static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
-{
- return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
-}
-
static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
{
int i;
- pud_t *start;
+ pud_t *start, *pud_start;
pgprotval_t prot;
pud_t *prev_pud = NULL;
- start = (pud_t *)p4d_page_vaddr(addr);
+ pud_start = start = (pud_t *)p4d_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
- if (!pud_none(*start) &&
- !pud_already_checked(prev_pud, start, st->check_wx)) {
+ if (!pud_none(*start)) {
if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start);
- note_page(m, st, __pgprot(prot), 2);
- } else {
+ note_page(m, st, __pgprot(prot), 3);
+ } else if (!kasan_page_table(m, st, pud_start)) {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 2);
+ note_page(m, st, __pgprot(0), 3);
prev_pud = start;
start++;
@@ -385,10 +404,10 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{
int i;
- p4d_t *start;
+ p4d_t *start, *p4d_start;
pgprotval_t prot;
- start = (p4d_t *)pgd_page_vaddr(addr);
+ p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_P4D; i++) {
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
@@ -396,7 +415,7 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
if (p4d_large(*start) || !p4d_present(*start)) {
prot = p4d_flags(*start);
note_page(m, st, __pgprot(prot), 2);
- } else {
+ } else if (!kasan_page_table(m, st, p4d_start)) {
walk_pud_level(m, st, *start,
P + i * P4D_LEVEL_MULT);
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 0ea8afcb929c..c076f710de4c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_fault);
+/*
+ * Handler for UD0 exception following a failed test against the
+ * result of a refcount inc/dec/add/sub.
+ */
+bool ex_handler_refcount(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ /* First unconditionally saturate the refcount. */
+ *(int *)regs->cx = INT_MIN / 2;
+
+ /*
+ * Strictly speaking, this reports the fixup destination, not
+ * the fault location, and not the actually overflowing
+ * instruction, which is the instruction before the "js", but
+ * since that instruction could be a variety of lengths, just
+ * report the location after the overflow, which should be close
+ * enough for finding the overflow, as it's at least back in
+ * the function, having returned from .text.unlikely.
+ */
+ regs->ip = ex_fixup_addr(fixup);
+
+ /*
+ * This function has been called because either a negative refcount
+ * value was seen by any of the refcount functions, or a zero
+ * refcount value was seen by refcount_dec().
+ *
+ * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
+ * wrapped around) will be set. Additionally, seeing the refcount
+ * reach 0 will set ZF (Zero Flag: result was zero). In each of
+ * these cases we want a report, since it's a boundary condition.
+ *
+ */
+ if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
+ bool zero = regs->flags & X86_EFLAGS_ZF;
+
+ refcount_error_report(regs, zero ? "hit zero" : "overflow");
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_refcount);
+
bool ex_handler_ext(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
@@ -142,7 +184,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
*/
- if ((regs->cs & 0xFFFF) != __KERNEL_CS)
+ if (regs->cs != __KERNEL_CS)
goto fail;
/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2a1fa10c6a98..b836a7274e12 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -396,14 +396,18 @@ static void dump_pagetable(unsigned long address)
pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", pgd_val(*pgd));
+ pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
goto out;
+#define pr_pde pr_cont
+#else
+#define pr_pde pr_info
#endif
p4d = p4d_offset(pgd, address);
pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
- printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+ pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+#undef pr_pde
/*
* We must not directly access the pte in the highpte
@@ -415,9 +419,9 @@ static void dump_pagetable(unsigned long address)
goto out;
pte = pte_offset_kernel(pmd, address);
- printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+ pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
out:
- printk("\n");
+ pr_cont("\n");
}
#else /* CONFIG_X86_64: */
@@ -565,7 +569,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pgd))
goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
+ pr_info("PGD %lx ", pgd_val(*pgd));
if (!pgd_present(*pgd))
goto out;
@@ -574,7 +578,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(p4d))
goto bad;
- printk("P4D %lx ", p4d_val(*p4d));
+ pr_cont("P4D %lx ", p4d_val(*p4d));
if (!p4d_present(*p4d) || p4d_large(*p4d))
goto out;
@@ -582,7 +586,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pud))
goto bad;
- printk("PUD %lx ", pud_val(*pud));
+ pr_cont("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud) || pud_large(*pud))
goto out;
@@ -590,7 +594,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pmd))
goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
+ pr_cont("PMD %lx ", pmd_val(*pmd));
if (!pmd_present(*pmd) || pmd_large(*pmd))
goto out;
@@ -598,12 +602,12 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pte))
goto bad;
- printk("PTE %lx", pte_val(*pte));
+ pr_cont("PTE %lx", pte_val(*pte));
out:
- printk("\n");
+ pr_cont("\n");
return;
bad:
- printk("BAD\n");
+ pr_info("BAD\n");
}
#endif /* CONFIG_X86_64 */
@@ -1254,10 +1258,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
- *
- * This function must have noinline because both callers
- * {,trace_}do_page_fault() have notrace on. Having this an actual function
- * guarantees there's a function trace entry.
*/
static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long error_code,
@@ -1490,27 +1490,6 @@ good_area:
}
NOKPROBE_SYMBOL(__do_page_fault);
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
- unsigned long address = read_cr2(); /* Get the faulting address */
- enum ctx_state prev_state;
-
- /*
- * We must have this function tagged with __kprobes, notrace and call
- * read_cr2() before calling anything else. To avoid calling any kind
- * of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contain all sorts of tracepoints.
- */
-
- prev_state = exception_enter();
- __do_page_fault(regs, error_code, address);
- exception_exit(prev_state);
-}
-NOKPROBE_SYMBOL(do_page_fault);
-
-#ifdef CONFIG_TRACING
static nokprobe_inline void
trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
@@ -1521,22 +1500,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
trace_page_fault_kernel(address, regs, error_code);
}
+/*
+ * We must have this function blacklisted from kprobes, tagged with notrace
+ * and call read_cr2() before calling anything else. To avoid calling any
+ * kind of tracing machinery before we've observed the CR2 value.
+ *
+ * exception_{enter,exit}() contains all sorts of tracepoints.
+ */
dotraplinkage void notrace
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- /*
- * The exception_enter and tracepoint processing could
- * trigger another page faults (user space callchain
- * reading) and destroy the original cr2 value, so read
- * the faulting address now.
- */
- unsigned long address = read_cr2();
+ unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
prev_state = exception_enter();
- trace_page_fault_entries(address, regs, error_code);
+ if (trace_pagefault_enabled())
+ trace_page_fault_entries(address, regs, error_code);
+
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
-NOKPROBE_SYMBOL(trace_do_page_fault);
-#endif /* CONFIG_TRACING */
+NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 2824607df108..6d06cf33e3de 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -18,6 +18,7 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/elf.h>
+#include <asm/mpx.h>
#if 0 /* This is just for testing */
struct page *
@@ -85,25 +86,38 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
info.flags = 0;
info.length = len;
info.low_limit = get_mmap_base(1);
+
+ /*
+ * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+ * in the full address space.
+ */
info.high_limit = in_compat_syscall() ?
- tasksize_32bit() : tasksize_64bit();
+ task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
+
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
}
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
- unsigned long addr0, unsigned long len,
+ unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
struct vm_unmapped_area_info info;
- unsigned long addr;
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = get_mmap_base(0);
+
+ /*
+ * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
+ * in the full address space.
+ */
+ if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+ info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
+
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
@@ -118,7 +132,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
+ info.high_limit = TASK_SIZE_LOW;
addr = vm_unmapped_area(&info);
}
@@ -135,6 +149,11 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (len & ~huge_page_mask(h))
return -EINVAL;
+
+ addr = mpx_unmapped_area_check(addr, len, flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab1595f4bd..31cea988fa36 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
if (!pmd)
return -ENOMEM;
ident_pmd_init(info, pmd, addr, next);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
}
return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
if (!pud)
return -ENOMEM;
ident_pud_init(info, pud, addr, next);
- set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+ set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
}
return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long next;
int result;
+ /* Set the default pagetable flags if not supplied */
+ if (!info->kernpg_flag)
+ info->kernpg_flag = _KERNPG_TABLE;
+
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr);
p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
if (result)
return result;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
* With p4d folded, pgd is equal to p4d.
* The pgd entry has to point to the pud page table in this case.
*/
pud_t *pud = pud_offset(p4d, 0);
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
}
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 673541eb3b3f..7777ccc0e9f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,7 @@
#include <asm/dma.h> /* for MAX_DMA_PFN */
#include <asm/microcode.h>
#include <asm/kaslr.h>
+#include <asm/hypervisor.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -636,6 +637,8 @@ void __init init_mem_mapping(void)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
+ hypervisor_init_mem_mapping();
+
early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
}
@@ -812,7 +815,7 @@ void __init zone_sizes_init(void)
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.loaded_mm = &init_mm,
- .state = 0,
+ .next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
EXPORT_SYMBOL_GPL(cpu_tlbstate);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 136422d7d539..048fbe8fc274 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -761,7 +761,7 @@ void __init paging_init(void)
* After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
* updating.
*/
-static void update_end_of_memory_vars(u64 start, u64 size)
+static void update_end_of_memory_vars(u64 start, u64 size)
{
unsigned long end_pfn = PFN_UP(start + size);
@@ -772,22 +772,30 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
}
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int add_pages(int nid, unsigned long start_pfn,
+ unsigned long nr_pages, bool want_memblock)
{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- init_memory_mapping(start, start + size);
-
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
WARN_ON_ONCE(ret);
/* update max_pfn, max_low_pfn and high_memory */
- update_end_of_memory_vars(start, size);
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT);
return ret;
}
+
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ init_memory_mapping(start, start + size);
+
+ return add_pages(nid, start_pfn, nr_pages, want_memblock);
+}
EXPORT_SYMBOL_GPL(arch_add_memory);
#define PAGE_INUSE 0xFD
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4c1b5fd0c7ad..34f0e1847dd6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,6 +13,8 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
+#include <linux/mem_encrypt.h>
+#include <linux/efi.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -21,6 +23,7 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
#include <asm/pat.h>
+#include <asm/setup.h>
#include "physaddr.h"
@@ -106,12 +109,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
}
/*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (is_ISA_range(phys_addr, last_addr))
- return (__force void __iomem *)phys_to_virt(phys_addr);
-
- /*
* Don't allow anybody to remap normal RAM that we're using..
*/
pfn = phys_addr >> PAGE_SHIFT;
@@ -340,13 +337,17 @@ void iounmap(volatile void __iomem *addr)
return;
/*
- * __ioremap special-cases the PCI/ISA range by not instantiating a
- * vm_area and by simply returning an address into the kernel mapping
- * of ISA space. So handle that here.
+ * The PCI/ISA range special-casing was removed from __ioremap()
+ * so this check, in theory, can be removed. However, there are
+ * cases where iounmap() is called for addresses not obtained via
+ * ioremap() (vga16fb for example). Add a warning so that these
+ * cases can be caught and fixed.
*/
if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
- (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
+ (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
+ WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
return;
+ }
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
@@ -399,12 +400,10 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
unsigned long offset = phys & ~PAGE_MASK;
void *vaddr;
- /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
- if (page_is_ram(start >> PAGE_SHIFT))
- return __va(phys);
+ /* memremap() maps if RAM, otherwise falls back to ioremap() */
+ vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
- vaddr = ioremap_cache(start, PAGE_SIZE);
- /* Only add the offset on success and return NULL if the ioremap() failed: */
+ /* Only add the offset on success and return NULL if memremap() failed */
if (vaddr)
vaddr += offset;
@@ -413,11 +412,263 @@ void *xlate_dev_mem_ptr(phys_addr_t phys)
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
{
- if (page_is_ram(phys >> PAGE_SHIFT))
- return;
+ memunmap((void *)((unsigned long)addr & PAGE_MASK));
+}
+
+/*
+ * Examine the physical address to determine if it is an area of memory
+ * that should be mapped decrypted. If the memory is not part of the
+ * kernel usable area it was accessed and created decrypted, so these
+ * areas should be mapped decrypted. And since the encryption key can
+ * change across reboots, persistent memory should also be mapped
+ * decrypted.
+ */
+static bool memremap_should_map_decrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ int is_pmem;
+
+ /*
+ * Check if the address is part of a persistent memory region.
+ * This check covers areas added by E820, EFI and ACPI.
+ */
+ is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
+ IORES_DESC_PERSISTENT_MEMORY);
+ if (is_pmem != REGION_DISJOINT)
+ return true;
+
+ /*
+ * Check if the non-volatile attribute is set for an EFI
+ * reserved area.
+ */
+ if (efi_enabled(EFI_BOOT)) {
+ switch (efi_mem_type(phys_addr)) {
+ case EFI_RESERVED_TYPE:
+ if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Check if the address is outside kernel usable area */
+ switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
+ case E820_TYPE_RESERVED:
+ case E820_TYPE_ACPI:
+ case E820_TYPE_NVS:
+ case E820_TYPE_UNUSABLE:
+ case E820_TYPE_PRAM:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/*
+ * Examine the physical address to determine if it is EFI data. Check
+ * it against the boot params structure and EFI tables and memory types.
+ */
+static bool memremap_is_efi_data(resource_size_t phys_addr,
+ unsigned long size)
+{
+ u64 paddr;
+
+ /* Check if the address is part of EFI boot/runtime data */
+ if (!efi_enabled(EFI_BOOT))
+ return false;
+
+ paddr = boot_params.efi_info.efi_memmap_hi;
+ paddr <<= 32;
+ paddr |= boot_params.efi_info.efi_memmap;
+ if (phys_addr == paddr)
+ return true;
+
+ paddr = boot_params.efi_info.efi_systab_hi;
+ paddr <<= 32;
+ paddr |= boot_params.efi_info.efi_systab;
+ if (phys_addr == paddr)
+ return true;
+
+ if (efi_is_table_address(phys_addr))
+ return true;
+
+ switch (efi_mem_type(phys_addr)) {
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_RUNTIME_SERVICES_DATA:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain.
+ */
+static bool memremap_is_setup_data(resource_size_t phys_addr,
+ unsigned long size)
+{
+ struct setup_data *data;
+ u64 paddr, paddr_next;
+
+ paddr = boot_params.hdr.setup_data;
+ while (paddr) {
+ unsigned int len;
+
+ if (phys_addr == paddr)
+ return true;
+
+ data = memremap(paddr, sizeof(*data),
+ MEMREMAP_WB | MEMREMAP_DEC);
+
+ paddr_next = data->next;
+ len = data->len;
+
+ memunmap(data);
+
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+ return true;
+
+ paddr = paddr_next;
+ }
+
+ return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain (early boot version).
+ */
+static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
+ unsigned long size)
+{
+ struct setup_data *data;
+ u64 paddr, paddr_next;
+
+ paddr = boot_params.hdr.setup_data;
+ while (paddr) {
+ unsigned int len;
+
+ if (phys_addr == paddr)
+ return true;
+
+ data = early_memremap_decrypted(paddr, sizeof(*data));
+
+ paddr_next = data->next;
+ len = data->len;
+
+ early_memunmap(data, sizeof(*data));
+
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
+ return true;
+
+ paddr = paddr_next;
+ }
+
+ return false;
+}
+
+/*
+ * Architecture function to determine if RAM remap is allowed. By default, a
+ * RAM remap will map the data as encrypted. Determine if a RAM remap should
+ * not be done so that the data will be mapped decrypted.
+ */
+bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
+ unsigned long flags)
+{
+ if (!sme_active())
+ return true;
+
+ if (flags & MEMREMAP_ENC)
+ return true;
+
+ if (flags & MEMREMAP_DEC)
+ return false;
+
+ if (memremap_is_setup_data(phys_addr, size) ||
+ memremap_is_efi_data(phys_addr, size) ||
+ memremap_should_map_decrypted(phys_addr, size))
+ return false;
+
+ return true;
+}
+
+/*
+ * Architecture override of __weak function to adjust the protection attributes
+ * used when remapping memory. By default, early_memremap() will map the data
+ * as encrypted. Determine if an encrypted mapping should not be done and set
+ * the appropriate protection attributes.
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+ unsigned long size,
+ pgprot_t prot)
+{
+ if (!sme_active())
+ return prot;
+
+ if (early_memremap_is_setup_data(phys_addr, size) ||
+ memremap_is_efi_data(phys_addr, size) ||
+ memremap_should_map_decrypted(phys_addr, size))
+ prot = pgprot_decrypted(prot);
+ else
+ prot = pgprot_encrypted(prot);
+
+ return prot;
+}
+
+bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
+{
+ return arch_memremap_can_ram_remap(phys_addr, size, 0);
+}
+
+#ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
+/* Remap memory with encryption */
+void __init *early_memremap_encrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
+}
+
+/*
+ * Remap memory with encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
+ unsigned long size)
+{
+ /* Be sure the write-protect PAT entry is set for write-protect */
+ if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
+ return NULL;
+
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
+}
+
+/* Remap memory without encryption */
+void __init *early_memremap_decrypted(resource_size_t phys_addr,
+ unsigned long size)
+{
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
+}
+
+/*
+ * Remap memory without encryption and write-protected - cannot be called
+ * before pat_init() is called
+ */
+void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
+ unsigned long size)
+{
+ /* Be sure the write-protect PAT entry is set for write-protect */
+ if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
+ return NULL;
- iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+ return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
}
+#endif /* CONFIG_ARCH_USE_MEMREMAP_PROT */
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 02c9d7553409..bc84b73684b7 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -11,8 +11,8 @@
#include <asm/e820/types.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/pgtable.h>
-extern pgd_t early_top_pgt[PTRS_PER_PGD];
extern struct range pfn_mapped[E820_MAX_ENTRIES];
static int __init map_range(struct range *range)
@@ -87,7 +87,7 @@ static struct notifier_block kasan_die_notifier = {
void __init kasan_early_init(void)
{
int i;
- pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL;
+ pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL | _PAGE_ENC;
pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE;
pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
@@ -153,7 +153,7 @@ void __init kasan_init(void)
*/
memset(kasan_zero_page, 0, PAGE_SIZE);
for (i = 0; i < PTRS_PER_PTE; i++) {
- pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+ pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC);
set_pte(&kasan_zero_pte[i], pte);
}
/* Flush TLBs again to be sure that write protection applied. */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
new file mode 100644
index 000000000000..0fbd09269757
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt.c
@@ -0,0 +1,593 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
+#include <linux/mem_encrypt.h>
+
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/setup.h>
+#include <asm/bootparam.h>
+#include <asm/set_memory.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/cmdline.h>
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[] __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
+
+/*
+ * Since SME related variables are set early in the boot process they must
+ * reside in the .data section so as not to be zeroed out when the .bss
+ * section is later cleared.
+ */
+unsigned long sme_me_mask __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sme_me_mask);
+
+/* Buffer used for early in-place encryption by BSP, no locking needed */
+static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+/*
+ * This routine does not change the underlying encryption setting of the
+ * page(s) that map this memory. It assumes that eventually the memory is
+ * meant to be accessed as either encrypted or decrypted but the contents
+ * are currently not in the desired state.
+ *
+ * This routine follows the steps outlined in the AMD64 Architecture
+ * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
+ */
+static void __init __sme_early_enc_dec(resource_size_t paddr,
+ unsigned long size, bool enc)
+{
+ void *src, *dst;
+ size_t len;
+
+ if (!sme_me_mask)
+ return;
+
+ local_flush_tlb();
+ wbinvd();
+
+ /*
+ * There are limited number of early mapping slots, so map (at most)
+ * one page at time.
+ */
+ while (size) {
+ len = min_t(size_t, sizeof(sme_early_buffer), size);
+
+ /*
+ * Create mappings for the current and desired format of
+ * the memory. Use a write-protected mapping for the source.
+ */
+ src = enc ? early_memremap_decrypted_wp(paddr, len) :
+ early_memremap_encrypted_wp(paddr, len);
+
+ dst = enc ? early_memremap_encrypted(paddr, len) :
+ early_memremap_decrypted(paddr, len);
+
+ /*
+ * If a mapping can't be obtained to perform the operation,
+ * then eventual access of that area in the desired mode
+ * will cause a crash.
+ */
+ BUG_ON(!src || !dst);
+
+ /*
+ * Use a temporary buffer, of cache-line multiple size, to
+ * avoid data corruption as documented in the APM.
+ */
+ memcpy(sme_early_buffer, src, len);
+ memcpy(dst, sme_early_buffer, len);
+
+ early_memunmap(dst, len);
+ early_memunmap(src, len);
+
+ paddr += len;
+ size -= len;
+ }
+}
+
+void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
+{
+ __sme_early_enc_dec(paddr, size, true);
+}
+
+void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
+{
+ __sme_early_enc_dec(paddr, size, false);
+}
+
+static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
+ bool map)
+{
+ unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
+ pmdval_t pmd_flags, pmd;
+
+ /* Use early_pmd_flags but remove the encryption mask */
+ pmd_flags = __sme_clr(early_pmd_flags);
+
+ do {
+ pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
+ __early_make_pgtable((unsigned long)vaddr, pmd);
+
+ vaddr += PMD_SIZE;
+ paddr += PMD_SIZE;
+ size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
+ } while (size);
+
+ __native_flush_tlb();
+}
+
+void __init sme_unmap_bootdata(char *real_mode_data)
+{
+ struct boot_params *boot_data;
+ unsigned long cmdline_paddr;
+
+ if (!sme_active())
+ return;
+
+ /* Get the command line address before unmapping the real_mode_data */
+ boot_data = (struct boot_params *)real_mode_data;
+ cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
+
+ __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false);
+
+ if (!cmdline_paddr)
+ return;
+
+ __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false);
+}
+
+void __init sme_map_bootdata(char *real_mode_data)
+{
+ struct boot_params *boot_data;
+ unsigned long cmdline_paddr;
+
+ if (!sme_active())
+ return;
+
+ __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
+
+ /* Get the command line address after mapping the real_mode_data */
+ boot_data = (struct boot_params *)real_mode_data;
+ cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
+
+ if (!cmdline_paddr)
+ return;
+
+ __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
+}
+
+void __init sme_early_init(void)
+{
+ unsigned int i;
+
+ if (!sme_me_mask)
+ return;
+
+ early_pmd_flags = __sme_set(early_pmd_flags);
+
+ __supported_pte_mask = __sme_set(__supported_pte_mask);
+
+ /* Update the protection map with memory encryption mask */
+ for (i = 0; i < ARRAY_SIZE(protection_map); i++)
+ protection_map[i] = pgprot_encrypted(protection_map[i]);
+}
+
+/* Architecture __weak replacement functions */
+void __init mem_encrypt_init(void)
+{
+ if (!sme_me_mask)
+ return;
+
+ /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
+ swiotlb_update_mem_attributes();
+
+ pr_info("AMD Secure Memory Encryption (SME) active\n");
+}
+
+void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
+{
+ WARN(PAGE_ALIGN(size) != size,
+ "size is not page-aligned (%#lx)\n", size);
+
+ /* Make the SWIOTLB buffer area decrypted */
+ set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+}
+
+static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pgd_start, pgd_end, pgd_size;
+ pgd_t *pgd_p;
+
+ pgd_start = start & PGDIR_MASK;
+ pgd_end = end & PGDIR_MASK;
+
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
+ pgd_size *= sizeof(pgd_t);
+
+ pgd_p = pgd_base + pgd_index(start);
+
+ memset(pgd_p, 0, pgd_size);
+}
+
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
+ unsigned long vaddr, pmdval_t pmd_val)
+{
+ pgd_t *pgd_p;
+ p4d_t *p4d_p;
+ pud_t *pud_p;
+ pmd_t *pmd_p;
+
+ pgd_p = pgd_base + pgd_index(vaddr);
+ if (native_pgd_val(*pgd_p)) {
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+ else
+ pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pgd_t pgd;
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_p = pgtable_area;
+ memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+ pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+
+ pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
+ } else {
+ pud_p = pgtable_area;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+ pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+ pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
+ }
+ native_set_pgd(pgd_p, pgd);
+ }
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_p += p4d_index(vaddr);
+ if (native_p4d_val(*p4d_p)) {
+ pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
+ } else {
+ p4d_t p4d;
+
+ pud_p = pgtable_area;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
+ pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+
+ p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
+ native_set_p4d(p4d_p, p4d);
+ }
+ }
+
+ pud_p += pud_index(vaddr);
+ if (native_pud_val(*pud_p)) {
+ if (native_pud_val(*pud_p) & _PAGE_PSE)
+ goto out;
+
+ pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pud_t pud;
+
+ pmd_p = pgtable_area;
+ memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
+ pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+
+ pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
+ native_set_pud(pud_p, pud);
+ }
+
+ pmd_p += pmd_index(vaddr);
+ if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
+ native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+
+out:
+ return pgtable_area;
+}
+
+static unsigned long __init sme_pgtable_calc(unsigned long len)
+{
+ unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long total;
+
+ /*
+ * Perform a relatively simplistic calculation of the pagetable
+ * entries that are needed. That mappings will be covered by 2MB
+ * PMD entries so we can conservatively calculate the required
+ * number of P4D, PUD and PMD structures needed to perform the
+ * mappings. Incrementing the count for each covers the case where
+ * the addresses cross entries.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+ p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+ pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ } else {
+ p4d_size = 0;
+ pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ }
+ pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
+ pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+ total = p4d_size + pud_size + pmd_size;
+
+ /*
+ * Now calculate the added pagetable structures needed to populate
+ * the new pagetables.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+ p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
+ pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ } else {
+ p4d_size = 0;
+ pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
+ pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
+ }
+ pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
+ pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+
+ total += p4d_size + pud_size + pmd_size;
+
+ return total;
+}
+
+void __init sme_encrypt_kernel(void)
+{
+ unsigned long workarea_start, workarea_end, workarea_len;
+ unsigned long execute_start, execute_end, execute_len;
+ unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long pgtable_area_len;
+ unsigned long paddr, pmd_flags;
+ unsigned long decrypted_base;
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ if (!sme_active())
+ return;
+
+ /*
+ * Prepare for encrypting the kernel by building new pagetables with
+ * the necessary attributes needed to encrypt the kernel in place.
+ *
+ * One range of virtual addresses will map the memory occupied
+ * by the kernel as encrypted.
+ *
+ * Another range of virtual addresses will map the memory occupied
+ * by the kernel as decrypted and write-protected.
+ *
+ * The use of write-protect attribute will prevent any of the
+ * memory from being cached.
+ */
+
+ /* Physical addresses gives us the identity mapped virtual addresses */
+ kernel_start = __pa_symbol(_text);
+ kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+ kernel_len = kernel_end - kernel_start;
+
+ /* Set the encryption workarea to be immediately after the kernel */
+ workarea_start = kernel_end;
+
+ /*
+ * Calculate required number of workarea bytes needed:
+ * executable encryption area size:
+ * stack page (PAGE_SIZE)
+ * encryption routine page (PAGE_SIZE)
+ * intermediate copy buffer (PMD_PAGE_SIZE)
+ * pagetable structures for the encryption of the kernel
+ * pagetable structures for workarea (in case not currently mapped)
+ */
+ execute_start = workarea_start;
+ execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+ execute_len = execute_end - execute_start;
+
+ /*
+ * One PGD for both encrypted and decrypted mappings and a set of
+ * PUDs and PMDs for each of the encrypted and decrypted mappings.
+ */
+ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
+ pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+
+ /* PUDs and PMDs needed in the current pagetables for the workarea */
+ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
+
+ /*
+ * The total workarea includes the executable encryption area and
+ * the pagetable area.
+ */
+ workarea_len = execute_len + pgtable_area_len;
+ workarea_end = workarea_start + workarea_len;
+
+ /*
+ * Set the address to the start of where newly created pagetable
+ * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
+ * structures are created when the workarea is added to the current
+ * pagetables and when the new encrypted and decrypted kernel
+ * mappings are populated.
+ */
+ pgtable_area = (void *)execute_end;
+
+ /*
+ * Make sure the current pagetable structure has entries for
+ * addressing the workarea.
+ */
+ pgd = (pgd_t *)native_read_cr3_pa();
+ paddr = workarea_start;
+ while (paddr < workarea_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + PMD_FLAGS);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+
+ /*
+ * A new pagetable structure is being built to allow for the kernel
+ * to be encrypted. It starts with an empty PGD that will then be
+ * populated with new PUDs and PMDs as the encrypted and decrypted
+ * kernel mappings are created.
+ */
+ pgd = pgtable_area;
+ memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
+ pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
+
+ /* Add encrypted kernel (identity) mappings */
+ pmd_flags = PMD_FLAGS | _PAGE_ENC;
+ paddr = kernel_start;
+ while (paddr < kernel_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + pmd_flags);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /*
+ * A different PGD index/entry must be used to get different
+ * pagetable entries for the decrypted mapping. Choose the next
+ * PGD index and convert it to a virtual address to be used as
+ * the base of the mapping.
+ */
+ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base <<= PGDIR_SHIFT;
+
+ /* Add decrypted, write-protected kernel (non-identity) mappings */
+ pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
+ paddr = kernel_start;
+ while (paddr < kernel_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr + decrypted_base,
+ paddr + pmd_flags);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Add decrypted workarea mappings to both kernel mappings */
+ paddr = workarea_start;
+ while (paddr < workarea_end) {
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr,
+ paddr + PMD_FLAGS);
+
+ pgtable_area = sme_populate_pgd(pgd, pgtable_area,
+ paddr + decrypted_base,
+ paddr + PMD_FLAGS);
+
+ paddr += PMD_PAGE_SIZE;
+ }
+
+ /* Perform the encryption */
+ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
+ kernel_len, workarea_start, (unsigned long)pgd);
+
+ /*
+ * At this point we are running encrypted. Remove the mappings for
+ * the decrypted areas - all that is needed for this is to remove
+ * the PGD entry/entries.
+ */
+ sme_clear_pgd(pgd, kernel_start + decrypted_base,
+ kernel_end + decrypted_base);
+
+ sme_clear_pgd(pgd, workarea_start + decrypted_base,
+ workarea_end + decrypted_base);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+}
+
+void __init __nostackprotector sme_enable(struct boot_params *bp)
+{
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ unsigned int eax, ebx, ecx, edx;
+ bool active_by_default;
+ unsigned long me_mask;
+ char buffer[16];
+ u64 msr;
+
+ /* Check for the SME support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+ /*
+ * Check for the SME feature:
+ * CPUID Fn8000_001F[EAX] - Bit 0
+ * Secure Memory Encryption support
+ * CPUID Fn8000_001F[EBX] - Bits 5:0
+ * Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(eax & 1))
+ return;
+
+ me_mask = 1UL << (ebx & 0x3f);
+
+ /* Check if SME is enabled */
+ msr = __rdmsr(MSR_K8_SYSCFG);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Fixups have not been applied to phys_base yet and we're running
+ * identity mapped, so we must obtain the address to the SME command
+ * line argument data using rip-relative addressing.
+ */
+ asm ("lea sme_cmdline_arg(%%rip), %0"
+ : "=r" (cmdline_arg)
+ : "p" (sme_cmdline_arg));
+ asm ("lea sme_cmdline_on(%%rip), %0"
+ : "=r" (cmdline_on)
+ : "p" (sme_cmdline_on));
+ asm ("lea sme_cmdline_off(%%rip), %0"
+ : "=r" (cmdline_off)
+ : "p" (sme_cmdline_off));
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
+ active_by_default = true;
+ else
+ active_by_default = false;
+
+ cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ ((u64)bp->ext_cmd_line_ptr << 32));
+
+ cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+
+ if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ sme_me_mask = me_mask;
+ else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
+ sme_me_mask = 0;
+ else
+ sme_me_mask = active_by_default ? me_mask : 0;
+}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
new file mode 100644
index 000000000000..730e6d541df1
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -0,0 +1,149 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
+
+ .text
+ .code64
+ENTRY(sme_encrypt_execute)
+
+ /*
+ * Entry parameters:
+ * RDI - virtual address for the encrypted kernel mapping
+ * RSI - virtual address for the decrypted kernel mapping
+ * RDX - length of kernel
+ * RCX - virtual address of the encryption workarea, including:
+ * - stack page (PAGE_SIZE)
+ * - encryption routine page (PAGE_SIZE)
+ * - intermediate copy buffer (PMD_PAGE_SIZE)
+ * R8 - physcial address of the pagetables to use for encryption
+ */
+
+ push %rbp
+ movq %rsp, %rbp /* RBP now has original stack pointer */
+
+ /* Set up a one page stack in the non-encrypted memory area */
+ movq %rcx, %rax /* Workarea stack page */
+ leaq PAGE_SIZE(%rax), %rsp /* Set new stack pointer */
+ addq $PAGE_SIZE, %rax /* Workarea encryption routine */
+
+ push %r12
+ movq %rdi, %r10 /* Encrypted kernel */
+ movq %rsi, %r11 /* Decrypted kernel */
+ movq %rdx, %r12 /* Kernel length */
+
+ /* Copy encryption routine into the workarea */
+ movq %rax, %rdi /* Workarea encryption routine */
+ leaq __enc_copy(%rip), %rsi /* Encryption routine */
+ movq $(.L__enc_copy_end - __enc_copy), %rcx /* Encryption routine length */
+ rep movsb
+
+ /* Setup registers for call */
+ movq %r10, %rdi /* Encrypted kernel */
+ movq %r11, %rsi /* Decrypted kernel */
+ movq %r8, %rdx /* Pagetables used for encryption */
+ movq %r12, %rcx /* Kernel length */
+ movq %rax, %r8 /* Workarea encryption routine */
+ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
+
+ call *%rax /* Call the encryption routine */
+
+ pop %r12
+
+ movq %rbp, %rsp /* Restore original stack pointer */
+ pop %rbp
+
+ ret
+ENDPROC(sme_encrypt_execute)
+
+ENTRY(__enc_copy)
+/*
+ * Routine used to encrypt kernel.
+ * This routine must be run outside of the kernel proper since
+ * the kernel will be encrypted during the process. So this
+ * routine is defined here and then copied to an area outside
+ * of the kernel where it will remain and run decrypted
+ * during execution.
+ *
+ * On entry the registers must be:
+ * RDI - virtual address for the encrypted kernel mapping
+ * RSI - virtual address for the decrypted kernel mapping
+ * RDX - address of the pagetables to use for encryption
+ * RCX - length of kernel
+ * R8 - intermediate copy buffer
+ *
+ * RAX - points to this routine
+ *
+ * The kernel will be encrypted by copying from the non-encrypted
+ * kernel space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted kernel space. The physical
+ * addresses of the two kernel space mappings are the same which
+ * results in the kernel being encrypted "in place".
+ */
+ /* Enable the new page tables */
+ mov %rdx, %cr3
+
+ /* Flush any global TLBs */
+ mov %cr4, %rdx
+ andq $~X86_CR4_PGE, %rdx
+ mov %rdx, %cr4
+ orq $X86_CR4_PGE, %rdx
+ mov %rdx, %cr4
+
+ /* Set the PAT register PA5 entry to write-protect */
+ push %rcx
+ movl $MSR_IA32_CR_PAT, %ecx
+ rdmsr
+ push %rdx /* Save original PAT value */
+ andl $0xffff00ff, %edx /* Clear PA5 */
+ orl $0x00000500, %edx /* Set PA5 to WP */
+ wrmsr
+ pop %rdx /* RDX contains original PAT value */
+ pop %rcx
+
+ movq %rcx, %r9 /* Save kernel length */
+ movq %rdi, %r10 /* Save encrypted kernel address */
+ movq %rsi, %r11 /* Save decrypted kernel address */
+
+ wbinvd /* Invalidate any cache entries */
+
+ /* Copy/encrypt 2MB at a time */
+1:
+ movq %r11, %rsi /* Source - decrypted kernel */
+ movq %r8, %rdi /* Dest - intermediate copy buffer */
+ movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ rep movsb
+
+ movq %r8, %rsi /* Source - intermediate copy buffer */
+ movq %r10, %rdi /* Dest - encrypted kernel */
+ movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ rep movsb
+
+ addq $PMD_PAGE_SIZE, %r11
+ addq $PMD_PAGE_SIZE, %r10
+ subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
+ jnz 1b /* Kernel length not zero? */
+
+ /* Restore PAT register */
+ push %rdx /* Save original PAT value */
+ movl $MSR_IA32_CR_PAT, %ecx
+ rdmsr
+ pop %rdx /* Restore original PAT value */
+ wrmsr
+
+ ret
+.L__enc_copy_end:
+ENDPROC(__enc_copy)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 229d04a83f85..a99679826846 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -37,22 +37,21 @@ struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-unsigned long tasksize_32bit(void)
+unsigned long task_size_32bit(void)
{
return IA32_PAGE_OFFSET;
}
-unsigned long tasksize_64bit(void)
+unsigned long task_size_64bit(int full_addr_space)
{
- return TASK_SIZE_MAX;
+ return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
}
static unsigned long stack_maxrandom_size(unsigned long task_size)
{
unsigned long max = 0;
- if ((current->flags & PF_RANDOMIZE) &&
- !(current->personality & ADDR_NO_RANDOMIZE)) {
- max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+ if (current->flags & PF_RANDOMIZE) {
+ max = (-1UL) & __STACK_RND_MASK(task_size == task_size_32bit());
max <<= PAGE_SHIFT;
}
@@ -79,13 +78,13 @@ static int mmap_is_legacy(void)
static unsigned long arch_rnd(unsigned int rndbits)
{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
}
unsigned long arch_mmap_rnd(void)
{
- if (!(current->flags & PF_RANDOMIZE))
- return 0;
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
@@ -142,7 +141,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
- arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+ arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/*
@@ -152,7 +151,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* mmap_base, the compat syscall uses mmap_compat_base.
*/
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
- arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+ arch_rnd(mmap32_rnd_bits), task_size_32bit());
#endif
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 1c34b767c84c..9ceaa955d2ba 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -355,10 +355,19 @@ int mpx_enable_management(void)
*/
bd_base = mpx_get_bounds_dir();
down_write(&mm->mmap_sem);
+
+ /* MPX doesn't support addresses above 47 bits yet. */
+ if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
+ pr_warn_once("%s (%d): MPX cannot handle addresses "
+ "above 47-bits. Disabling.",
+ current->comm, current->pid);
+ ret = -ENXIO;
+ goto out;
+ }
mm->context.bd_addr = bd_base;
if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
ret = -ENXIO;
-
+out:
up_write(&mm->mmap_sem);
return ret;
}
@@ -1030,3 +1039,25 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
if (ret)
force_sig(SIGSEGV, current);
}
+
+/* MPX cannot handle addresses above 47 bits yet. */
+unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
+ unsigned long flags)
+{
+ if (!kernel_managing_mpx_tables(current->mm))
+ return addr;
+ if (addr + len <= DEFAULT_MAP_WINDOW)
+ return addr;
+ if (flags & MAP_FIXED)
+ return -ENOMEM;
+
+ /*
+ * Requested len is larger than the whole area we're allowed to map in.
+ * Resetting hinting address wouldn't do much good -- fail early.
+ */
+ if (len > DEFAULT_MAP_WINDOW)
+ return -ENOMEM;
+
+ /* Look for unmap area within DEFAULT_MAP_WINDOW */
+ return 0;
+}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index a8f90ce3dedf..d805162e6045 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -75,13 +75,15 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
/*
* Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr. The return value is the number of nodes allocated.
+ * to max_addr.
+ *
+ * Returns zero on success or negative on error.
*/
static int __init split_nodes_interleave(struct numa_meminfo *ei,
struct numa_meminfo *pi,
u64 addr, u64 max_addr, int nr_nodes)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
+ nodemask_t physnode_mask = numa_nodes_parsed;
u64 size;
int big;
int nid = 0;
@@ -116,9 +118,6 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
return -1;
}
- for (i = 0; i < pi->nr_blks; i++)
- node_set(pi->blk[i].nid, physnode_mask);
-
/*
* Continue to fill physical nodes with fake nodes until there is no
* memory left on any of them.
@@ -200,13 +199,15 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
/*
* Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ * `addr' to `max_addr'.
+ *
+ * Returns zero on success or negative on error.
*/
static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
struct numa_meminfo *pi,
u64 addr, u64 max_addr, u64 size)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
+ nodemask_t physnode_mask = numa_nodes_parsed;
u64 min_size;
int nid = 0;
int i, ret;
@@ -231,9 +232,6 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
}
size &= FAKE_NODE_MIN_HASH_MASK;
- for (i = 0; i < pi->nr_blks; i++)
- node_set(pi->blk[i].nid, physnode_mask);
-
/*
* Fill physical nodes with fake nodes of size until there is no memory
* left on any of them.
@@ -280,6 +278,22 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
return 0;
}
+int __init setup_emu2phys_nid(int *dfl_phys_nid)
+{
+ int i, max_emu_nid = 0;
+
+ *dfl_phys_nid = NUMA_NO_NODE;
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
+ if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
+ max_emu_nid = i;
+ if (*dfl_phys_nid == NUMA_NO_NODE)
+ *dfl_phys_nid = emu_nid_to_phys[i];
+ }
+ }
+
+ return max_emu_nid;
+}
+
/**
* numa_emulation - Emulate NUMA nodes
* @numa_meminfo: NUMA configuration to massage
@@ -376,23 +390,18 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
* Determine the max emulated nid and the default phys nid to use
* for unmapped nodes.
*/
- max_emu_nid = 0;
- dfl_phys_nid = NUMA_NO_NODE;
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
- if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
- max_emu_nid = i;
- if (dfl_phys_nid == NUMA_NO_NODE)
- dfl_phys_nid = emu_nid_to_phys[i];
- }
- }
- if (dfl_phys_nid == NUMA_NO_NODE) {
- pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
- goto no_emu;
- }
+ max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid);
/* commit */
*numa_meminfo = ei;
+ /* Make sure numa_nodes_parsed only contains emulated nodes */
+ nodes_clear(numa_nodes_parsed);
+ for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
+ if (ei.blk[i].start != ei.blk[i].end &&
+ ei.blk[i].nid != NUMA_NO_NODE)
+ node_set(ei.blk[i].nid, numa_nodes_parsed);
+
/*
* Transform __apicid_to_node table to use emulated nids by
* reverse-mapping phys_nid. The maps should always exist but fall
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 757b0bcdf712..dfb7d657cf43 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1775,6 +1775,70 @@ int set_memory_4k(unsigned long addr, int numpages)
__pgprot(0), 1, 0, NULL);
}
+static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
+{
+ struct cpa_data cpa;
+ unsigned long start;
+ int ret;
+
+ /* Nothing to do if the SME is not active */
+ if (!sme_active())
+ return 0;
+
+ /* Should not be working on unaligned addresses */
+ if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
+ addr &= PAGE_MASK;
+
+ start = addr;
+
+ memset(&cpa, 0, sizeof(cpa));
+ cpa.vaddr = &addr;
+ cpa.numpages = numpages;
+ cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
+ cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
+ cpa.pgd = init_mm.pgd;
+
+ /* Must avoid aliasing mappings in the highmem code */
+ kmap_flush_unused();
+ vm_unmap_aliases();
+
+ /*
+ * Before changing the encryption attribute, we need to flush caches.
+ */
+ if (static_cpu_has(X86_FEATURE_CLFLUSH))
+ cpa_flush_range(start, numpages, 1);
+ else
+ cpa_flush_all(1);
+
+ ret = __change_page_attr_set_clr(&cpa, 1);
+
+ /*
+ * After changing the encryption attribute, we need to flush TLBs
+ * again in case any speculative TLB caching occurred (but no need
+ * to flush caches again). We could just use cpa_flush_all(), but
+ * in case TLB flushing gets optimized in the cpa_flush_range()
+ * path use the same logic as above.
+ */
+ if (static_cpu_has(X86_FEATURE_CLFLUSH))
+ cpa_flush_range(start, numpages, 0);
+ else
+ cpa_flush_all(0);
+
+ return ret;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ return __set_memory_enc_dec(addr, numpages, true);
+}
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ return __set_memory_enc_dec(addr, numpages, false);
+}
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
+
int set_pages_uc(struct page *page, int numpages)
{
unsigned long addr = (unsigned long)page_address(page);
@@ -2020,6 +2084,9 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
if (!(page_flags & _PAGE_RW))
cpa.mask_clr = __pgprot(_PAGE_RW);
+ if (!(page_flags & _PAGE_ENC))
+ cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
+
cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
retval = __change_page_attr_set_clr(&cpa, 0);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 45979502f64b..fe7d57a8fb60 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -293,7 +293,7 @@ void init_cache_modes(void)
* pat_init - Initialize PAT MSR and PAT table
*
* This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC and WT.
+ * to enable additional cache attributes, WC, WT and WP.
*
* This function must be called on all CPUs using the specific sequence of
* operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
@@ -352,7 +352,7 @@ void pat_init(void)
* 010 2 UC-: _PAGE_CACHE_MODE_UC_MINUS
* 011 3 UC : _PAGE_CACHE_MODE_UC
* 100 4 WB : Reserved
- * 101 5 WC : Reserved
+ * 101 5 WP : _PAGE_CACHE_MODE_WP
* 110 6 UC-: Reserved
* 111 7 WT : _PAGE_CACHE_MODE_WT
*
@@ -360,7 +360,7 @@ void pat_init(void)
* corresponding types in the presence of PAT errata.
*/
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, WT);
+ PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
}
if (!boot_cpu_done) {
@@ -744,6 +744,9 @@ EXPORT_SYMBOL(arch_io_free_memtype_wc);
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
+ if (!phys_mem_access_encrypted(pfn << PAGE_SHIFT, size))
+ vma_prot = pgprot_decrypted(vma_prot);
+
return vma_prot;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 508a708eb9a6..218834a3e9ad 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -56,7 +56,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
paravirt_release_pte(page_to_pfn(pte));
- tlb_remove_page(tlb, pte);
+ tlb_remove_table(tlb, pte);
}
#if CONFIG_PGTABLE_LEVELS > 2
@@ -72,21 +72,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
tlb->need_flush_all = 1;
#endif
pgtable_pmd_page_dtor(page);
- tlb_remove_page(tlb, page);
+ tlb_remove_table(tlb, page);
}
#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
- tlb_remove_page(tlb, virt_to_page(pud));
+ tlb_remove_table(tlb, virt_to_page(pud));
}
#if CONFIG_PGTABLE_LEVELS > 4
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
- tlb_remove_page(tlb, virt_to_page(p4d));
+ tlb_remove_table(tlb, virt_to_page(p4d));
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 014d07a80053..dbbcfd59726a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,42 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
+atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+
+static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
+ u16 *new_asid, bool *need_flush)
+{
+ u16 asid;
+
+ if (!static_cpu_has(X86_FEATURE_PCID)) {
+ *new_asid = 0;
+ *need_flush = true;
+ return;
+ }
+
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+ if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
+ next->context.ctx_id)
+ continue;
+
+ *new_asid = asid;
+ *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
+ next_tlb_gen);
+ return;
+ }
+
+ /*
+ * We don't currently own an ASID slot on this CPU.
+ * Allocate a slot.
+ */
+ *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
+ if (*new_asid >= TLB_NR_DYN_ASIDS) {
+ *new_asid = 0;
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
+ }
+ *need_flush = true;
+}
+
void leave_mm(int cpu)
{
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -43,12 +79,11 @@ void leave_mm(int cpu)
if (loaded_mm == &init_mm)
return;
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
- BUG();
+ /* Warn if we're not lazy. */
+ WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
switch_mm(NULL, &init_mm, NULL);
}
-EXPORT_SYMBOL_GPL(leave_mm);
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
@@ -63,115 +98,263 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
- unsigned cpu = smp_processor_id();
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+ u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ unsigned cpu = smp_processor_id();
+ u64 next_tlb_gen;
/*
- * NB: The scheduler will call us with prev == next when
- * switching from lazy TLB mode to normal mode if active_mm
- * isn't changing. When this happens, there is no guarantee
- * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
+ * NB: The scheduler will call us with prev == next when switching
+ * from lazy TLB mode to normal mode if active_mm isn't changing.
+ * When this happens, we don't assume that CR3 (and hence
+ * cpu_tlbstate.loaded_mm) matches next.
*
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
*/
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ /* We don't want flush_tlb_func_* to run concurrently with us. */
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING))
+ WARN_ON_ONCE(!irqs_disabled());
+
+ /*
+ * Verify that CR3 is what we think it is. This will catch
+ * hypothetical buggy code that directly switches to swapper_pg_dir
+ * without going through leave_mm() / switch_mm_irqs_off() or that
+ * does something like write_cr3(read_cr3_pa()).
+ */
+ VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
if (real_prev == next) {
+ VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
+ next->context.ctx_id);
+
+ if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ /*
+ * There's nothing to do: we weren't lazy, and we
+ * aren't changing our mm. We don't need to flush
+ * anything, nor do we need to update CR3, CR4, or
+ * LDTR.
+ */
+ return;
+ }
+
+ /* Resume remote flushes and then read tlb_gen. */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+
+ if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
+ next_tlb_gen) {
+ /*
+ * Ideally, we'd have a flush_tlb() variant that
+ * takes the known CR3 value as input. This would
+ * be faster on Xen PV and on hypothetical CPUs
+ * on which INVPCID is fast.
+ */
+ this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
+ next_tlb_gen);
+ write_cr3(__sme_pa(next->pgd) | prev_asid);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_FLUSH_ALL);
+ }
+
/*
- * There's nothing to do: we always keep the per-mm control
- * regs in sync with cpu_tlbstate.loaded_mm. Just
- * sanity-check mm_cpumask.
+ * We just exited lazy mode, which means that CR4 and/or LDTR
+ * may be stale. (Changes to the required CR4 and LDTR states
+ * are not reflected in tlb_gen.)
*/
- if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
- cpumask_set_cpu(cpu, mm_cpumask(next));
- return;
- }
+ } else {
+ u16 new_asid;
+ bool need_flush;
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ /*
+ * If our current stack is in vmalloc space and isn't
+ * mapped in the new pgd, we'll double-fault. Forcibly
+ * map it.
+ */
+ unsigned int index = pgd_index(current_stack_pointer());
+ pgd_t *pgd = next->pgd + index;
+
+ if (unlikely(pgd_none(*pgd)))
+ set_pgd(pgd, init_mm.pgd[index]);
+ }
+
+ /* Stop remote flushes for the previous mm */
+ if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+
+ VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
- * If our current stack is in vmalloc space and isn't
- * mapped in the new pgd, we'll double-fault. Forcibly
- * map it.
+ * Start remote flushes and then read tlb_gen.
*/
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
-
- pgd_t *pgd = next->pgd + stack_pgd_index;
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+
+ choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
+
+ if (need_flush) {
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+ write_cr3(__sme_pa(next->pgd) | new_asid);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_FLUSH_ALL);
+ } else {
+ /* The new ASID is already up to date. */
+ write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
+ }
- if (unlikely(pgd_none(*pgd)))
- set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
- this_cpu_write(cpu_tlbstate.loaded_mm, next);
+ load_mm_cr4(next);
+ switch_ldt(real_prev, next);
+}
- WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
- cpumask_set_cpu(cpu, mm_cpumask(next));
+/*
+ * Call this when reinitializing a CPU. It fixes the following potential
+ * problems:
+ *
+ * - The ASID changed from what cpu_tlbstate thinks it is (most likely
+ * because the CPU was taken down and came back up with CR3's PCID
+ * bits clear. CPU hotplug can do this.
+ *
+ * - The TLB contains junk in slots corresponding to inactive ASIDs.
+ *
+ * - The CPU went so far out to lunch that it may have missed a TLB
+ * flush.
+ */
+void initialize_tlbstate_and_flush(void)
+{
+ int i;
+ struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
+ unsigned long cr3 = __read_cr3();
- /*
- * Re-load page tables.
- *
- * This logic has an ordering constraint:
- *
- * CPU 0: Write to a PTE for 'next'
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
- * CPU 1: set bit 1 in next's mm_cpumask
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
- *
- * We need to prevent an outcome in which CPU 1 observes
- * the new PTE value and CPU 0 observes bit 1 clear in
- * mm_cpumask. (If that occurs, then the IPI will never
- * be sent, and CPU 0's TLB will contain a stale entry.)
- *
- * The bad outcome can occur if either CPU's load is
- * reordered before that CPU's store, so both CPUs must
- * execute full barriers to prevent this from happening.
- *
- * Thus, switch_mm needs a full barrier between the
- * store to mm_cpumask and any operation that could load
- * from next->pgd. TLB fills are special and can happen
- * due to instruction fetches or for no reason at all,
- * and neither LOCK nor MFENCE orders them.
- * Fortunately, load_cr3() is serializing and gives the
- * ordering guarantee we need.
- */
- load_cr3(next->pgd);
+ /* Assert that CR3 already references the right mm. */
+ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
/*
- * This gets called via leave_mm() in the idle path where RCU
- * functions differently. Tracing normally uses RCU, so we have to
- * call the tracepoint specially here.
+ * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
+ * doesn't work like other CR4 bits because it can only be set from
+ * long mode.)
*/
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ WARN_ON(boot_cpu_has(X86_CR4_PCIDE) &&
+ !(cr4_read_shadow() & X86_CR4_PCIDE));
- /* Stop flush ipis for the previous mm */
- WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
- real_prev != &init_mm);
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+ /* Force ASID 0 and force a TLB flush. */
+ write_cr3(cr3 & ~CR3_PCID_MASK);
- /* Load per-mm CR4 and LDTR state */
- load_mm_cr4(next);
- switch_ldt(real_prev, next);
+ /* Reinitialize tlbstate. */
+ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
+ this_cpu_write(cpu_tlbstate.next_asid, 1);
+ this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
+ this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+
+ for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
+ this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
}
+/*
+ * flush_tlb_func_common()'s memory ordering requirement is that any
+ * TLB fills that happen after we flush the TLB are ordered after we
+ * read active_mm's tlb_gen. We don't need any explicit barriers
+ * because all x86 flush operations are serializing and the
+ * atomic64_read operation won't be reordered by the compiler.
+ */
static void flush_tlb_func_common(const struct flush_tlb_info *f,
bool local, enum tlb_flush_reason reason)
{
+ /*
+ * We have three different tlb_gen values in here. They are:
+ *
+ * - mm_tlb_gen: the latest generation.
+ * - local_tlb_gen: the generation that this CPU has already caught
+ * up to.
+ * - f->new_tlb_gen: the generation that the requester of the flush
+ * wants us to catch up to.
+ */
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+ u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
- if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
- leave_mm(smp_processor_id());
+ VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
+ loaded_mm->context.ctx_id);
+
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+ /*
+ * We're in lazy mode -- don't flush. We can get here on
+ * remote flushes due to races and on local flushes if a
+ * kernel thread coincidentally flushes the mm it's lazily
+ * still using.
+ */
return;
}
- if (f->end == TLB_FLUSH_ALL) {
- local_flush_tlb();
- if (local)
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- trace_tlb_flush(reason, TLB_FLUSH_ALL);
- } else {
+ if (unlikely(local_tlb_gen == mm_tlb_gen)) {
+ /*
+ * There's nothing to do: we're already up to date. This can
+ * happen if two concurrent flushes happen -- the first flush to
+ * be handled can catch us all the way up, leaving no work for
+ * the second flush.
+ */
+ trace_tlb_flush(reason, 0);
+ return;
+ }
+
+ WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
+ WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
+
+ /*
+ * If we get to this point, we know that our TLB is out of date.
+ * This does not strictly imply that we need to flush (it's
+ * possible that f->new_tlb_gen <= local_tlb_gen), but we're
+ * going to need to flush in the very near future, so we might
+ * as well get it over with.
+ *
+ * The only question is whether to do a full or partial flush.
+ *
+ * We do a partial flush if requested and two extra conditions
+ * are met:
+ *
+ * 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
+ * we've always done all needed flushes to catch up to
+ * local_tlb_gen. If, for example, local_tlb_gen == 2 and
+ * f->new_tlb_gen == 3, then we know that the flush needed to bring
+ * us up to date for tlb_gen 3 is the partial flush we're
+ * processing.
+ *
+ * As an example of why this check is needed, suppose that there
+ * are two concurrent flushes. The first is a full flush that
+ * changes context.tlb_gen from 1 to 2. The second is a partial
+ * flush that changes context.tlb_gen from 2 to 3. If they get
+ * processed on this CPU in reverse order, we'll see
+ * local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
+ * If we were to use __flush_tlb_single() and set local_tlb_gen to
+ * 3, we'd be break the invariant: we'd update local_tlb_gen above
+ * 1 without the full flush that's needed for tlb_gen 2.
+ *
+ * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
+ * Partial TLB flushes are not all that much cheaper than full TLB
+ * flushes, so it seems unlikely that it would be a performance win
+ * to do a partial flush if that won't bring our TLB fully up to
+ * date. By doing a full flush instead, we can increase
+ * local_tlb_gen all the way to mm_tlb_gen and we can probably
+ * avoid another flush in the very near future.
+ */
+ if (f->end != TLB_FLUSH_ALL &&
+ f->new_tlb_gen == local_tlb_gen + 1 &&
+ f->new_tlb_gen == mm_tlb_gen) {
+ /* Partial flush */
unsigned long addr;
unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+
addr = f->start;
while (addr < f->end) {
__flush_tlb_single(addr);
@@ -180,7 +363,16 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
if (local)
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
trace_tlb_flush(reason, nr_pages);
+ } else {
+ /* Full flush. */
+ local_flush_tlb();
+ if (local)
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ trace_tlb_flush(reason, TLB_FLUSH_ALL);
}
+
+ /* Both paths above update our state to mm_tlb_gen. */
+ this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
}
static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
@@ -214,6 +406,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
(info->end - info->start) >> PAGE_SHIFT);
if (is_uv_system()) {
+ /*
+ * This whole special case is confused. UV has a "Broadcast
+ * Assist Unit", which seems to be a fancy way to send IPIs.
+ * Back when x86 used an explicit TLB flush IPI, UV was
+ * optimized to use its own mechanism. These days, x86 uses
+ * smp_call_function_many(), but UV still uses a manual IPI,
+ * and that IPI's action is out of date -- it does a manual
+ * flush instead of calling flush_tlb_func_remote(). This
+ * means that the percpu tlb_gen variables won't be updated
+ * and we'll do pointless flushes on future context switches.
+ *
+ * Rather than hooking native_flush_tlb_others() here, I think
+ * that UV should be updated so that smp_call_function_many(),
+ * etc, are optimal on UV.
+ */
unsigned int cpu;
cpu = smp_processor_id();
@@ -250,8 +457,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
cpu = get_cpu();
- /* Synchronize with switch_mm. */
- smp_mb();
+ /* This is also a barrier that synchronizes with switch_mm(). */
+ info.new_tlb_gen = inc_mm_tlb_gen(mm);
/* Should we flush just the requested range? */
if ((end != TLB_FLUSH_ALL) &&
@@ -273,6 +480,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), &info);
+
put_cpu();
}
@@ -281,8 +489,6 @@ static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
__flush_tlb_all();
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
- leave_mm(smp_processor_id());
}
void flush_tlb_all(void)
@@ -335,6 +541,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
flush_tlb_others(&batch->cpumask, &info);
+
cpumask_clear(&batch->cpumask);
put_cpu();
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1324f280e06..8c9573660d51 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -94,7 +94,9 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define X86_JNE 0x75
#define X86_JBE 0x76
#define X86_JA 0x77
+#define X86_JL 0x7C
#define X86_JGE 0x7D
+#define X86_JLE 0x7E
#define X86_JG 0x7F
static void bpf_flush_icache(void *start, void *end)
@@ -285,7 +287,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
offsetof(struct bpf_array, map.max_entries));
EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -294,21 +296,20 @@ static void emit_bpf_tail_call(u8 **pprog)
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
- EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
/* if (prog == NULL)
* goto out;
*/
- EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */
+ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
#define OFFSET3 10
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
@@ -888,9 +889,13 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
/* cmp dst_reg, src_reg */
EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
add_2reg(0xC0, dst_reg, src_reg));
@@ -911,9 +916,13 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
/* cmp dst_reg, imm8/32 */
EMIT1(add_1mod(0x48, dst_reg));
@@ -935,18 +944,34 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
/* GT is unsigned '>', JA in x86 */
jmp_cond = X86_JA;
break;
+ case BPF_JLT:
+ /* LT is unsigned '<', JB in x86 */
+ jmp_cond = X86_JB;
+ break;
case BPF_JGE:
/* GE is unsigned '>=', JAE in x86 */
jmp_cond = X86_JAE;
break;
+ case BPF_JLE:
+ /* LE is unsigned '<=', JBE in x86 */
+ jmp_cond = X86_JBE;
+ break;
case BPF_JSGT:
/* signed '>', GT in x86 */
jmp_cond = X86_JG;
break;
+ case BPF_JSLT:
+ /* signed '<', LT in x86 */
+ jmp_cond = X86_JL;
+ break;
case BPF_JSGE:
/* signed '>=', GE in x86 */
jmp_cond = X86_JGE;
break;
+ case BPF_JSLE:
+ /* signed '<=', LE in x86 */
+ jmp_cond = X86_JLE;
+ break;
default: /* to silence gcc warning */
return -EFAULT;
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index dbe2132b0ed4..7a5350d08cef 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -674,7 +674,7 @@ int pcibios_add_device(struct pci_dev *dev)
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = ioremap(pa_data, sizeof(*rom));
+ data = memremap(pa_data, sizeof(*rom), MEMREMAP_WB);
if (!data)
return -ENOMEM;
@@ -693,7 +693,7 @@ int pcibios_add_device(struct pci_dev *dev)
}
}
pa_data = data->next;
- iounmap(data);
+ memunmap(data);
}
set_dma_domain_ops(dev);
set_dev_domain_options(dev);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 11e407489db0..f2228b150faa 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -618,3 +618,20 @@ static void quirk_apple_mbp_poweroff(struct pci_dev *pdev)
dev_info(dev, "can't work around MacBook Pro poweroff issue\n");
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff);
+
+/*
+ * VMD-enabled root ports will change the source ID for all messages
+ * to the VMD device. Rather than doing device matching with the source
+ * ID, the AER driver should traverse the child device tree, reading
+ * AER registers to find the faulting device.
+ */
+static void quirk_no_aersid(struct pci_dev *pdev)
+{
+ /* VMD Domain */
+ if (is_vmd(pdev->bus))
+ pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 5a18aedcb341..b901ece278dd 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,16 +215,23 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
struct irq_alloc_info info;
int polarity;
int ret;
+ u8 gsi;
if (dev->irq_managed && dev->irq > 0)
return 0;
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (ret < 0) {
+ dev_warn(&dev->dev, "Failed to read interrupt line: %d\n", ret);
+ return ret;
+ }
+
switch (intel_mid_identify_cpu()) {
case INTEL_MID_CPU_CHIP_TANGIER:
polarity = IOAPIC_POL_HIGH;
/* Special treatment for IRQ0 */
- if (dev->irq == 0) {
+ if (gsi == 0) {
/*
* Skip HS UART common registers device since it has
* IRQ0 assigned and not used by the kernel.
@@ -253,10 +260,11 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
* IOAPIC RTE entries, so we just enable RTE for the device.
*/
- ret = mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC, &info);
+ ret = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
if (ret < 0)
return ret;
+ dev->irq = ret;
dev->irq_managed = 1;
return 0;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f084d8718ac4..928b6dceeca0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1032,25 +1032,6 @@ void __init efi_enter_virtual_mode(void)
efi_dump_pagetable();
}
-/*
- * Convenience functions to obtain memory types and attributes
- */
-u32 efi_mem_type(unsigned long phys_addr)
-{
- efi_memory_desc_t *md;
-
- if (!efi_enabled(EFI_MEMMAP))
- return 0;
-
- for_each_efi_memory_desc(md) {
- if ((md->phys_addr <= phys_addr) &&
- (phys_addr < (md->phys_addr +
- (md->num_pages << EFI_PAGE_SHIFT))))
- return md->type;
- }
- return 0;
-}
-
static int __init arch_parse_efi_cmdline(char *str)
{
if (!str) {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 9bf72f5bfedb..12e83888e5b9 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -327,7 +327,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size)
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
- unsigned long pfn, text;
+ unsigned long pfn, text, pf;
struct page *page;
unsigned npages;
pgd_t *pgd;
@@ -335,7 +335,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
- efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+ /*
+ * Since the PGD is encrypted, set the encryption mask so that when
+ * this value is loaded into cr3 the PGD will be decrypted during
+ * the pagetable walk.
+ */
+ efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
pgd = efi_pgd;
/*
@@ -345,7 +350,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* phys_efi_set_virtual_address_map().
*/
pfn = pa_memmap >> PAGE_SHIFT;
- if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
+ pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
return 1;
}
@@ -388,7 +394,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
text = __pa(_text);
pfn = text >> PAGE_SHIFT;
- if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+ pf = _PAGE_RW | _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
pr_err("Failed to map kernel text 1:1\n");
return 1;
}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 5a0483e7bf66..dc036e511f48 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
return 0;
}
-static struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
.setup = tng_bt_sfi_setup,
};
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 9e304e2ea4f5..4f5fa65a1011 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -30,13 +30,13 @@ static int tangier_probe(struct platform_device *pdev)
{
struct irq_alloc_info info;
struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
- int gsi, irq;
+ int gsi = TANGIER_EXT_TIMER0_MSI;
+ int irq;
if (!pdata)
return -EINVAL;
/* IOAPIC builds identity mapping between GSI and IRQ on MID */
- gsi = pdata->irq;
ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
if (irq < 0) {
@@ -44,11 +44,11 @@ static int tangier_probe(struct platform_device *pdev)
return irq;
}
+ pdata->irq = irq;
return 0;
}
static struct intel_mid_wdt_pdata tangier_pdata = {
- .irq = TANGIER_EXT_TIMER0_MSI,
.probe = tangier_probe,
};
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 12a272582cdc..86676cec99a1 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -183,6 +183,7 @@ void __init x86_intel_mid_early_setup(void)
x86_init.timers.timer_init = intel_mid_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
+ x86_init.timers.wallclock_init = intel_mid_rtc_init;
x86_init.irqs.pre_vector_init = x86_init_noop;
@@ -191,7 +192,6 @@ void __init x86_intel_mid_early_setup(void)
x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
- x86_init.timers.wallclock_init = intel_mid_rtc_init;
x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
x86_init.pci.init = intel_mid_pci_init;
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index ef03852ea6e8..49ec5b94c71f 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -444,7 +444,7 @@ static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states)
static int pnw_set_initial_state(struct mid_pwr *pwr)
{
/* On Penwell SRAM must stay powered on */
- const u32 states[] = {
+ static const u32 states[] = {
0xf00fffff, /* PM_SSC(0) */
0xffffffff, /* PM_SSC(1) */
0xffffffff, /* PM_SSC(2) */
@@ -455,7 +455,7 @@ static int pnw_set_initial_state(struct mid_pwr *pwr)
static int tng_set_initial_state(struct mid_pwr *pwr)
{
- const u32 states[] = {
+ static const u32 states[] = {
0xffffffff, /* PM_SSC(0) */
0xffffffff, /* PM_SSC(1) */
0xffffffff, /* PM_SSC(2) */
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3e4bdb442fbc..f44c0bc95aa2 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -26,7 +26,7 @@
static struct bau_operations ops __ro_after_init;
/* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
-static int timeout_base_ns[] = {
+static const int timeout_base_ns[] = {
20,
160,
1280,
@@ -1216,7 +1216,7 @@ static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
* set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
* Such a message must be ignored.
*/
-void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
+static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
{
unsigned long mmr_image;
unsigned char swack_vec;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 78459a6d455a..4d68d59f457d 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -181,6 +181,7 @@ static void fix_processor_context(void)
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
+ initialize_tlbstate_and_flush();
fpu__resume_cpu();
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index cd4be19c36dc..1f71980fc5e0 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,6 +1,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/memblock.h>
+#include <linux/mem_encrypt.h>
#include <asm/set_memory.h>
#include <asm/pgtable.h>
@@ -59,6 +60,13 @@ static void __init setup_real_mode(void)
base = (unsigned char *)real_mode_header;
+ /*
+ * If SME is active, the trampoline area will need to be in
+ * decrypted memory in order to bring up other processors
+ * successfully.
+ */
+ set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
+
memcpy(base, real_mode_blob, size);
phys_base = __pa(base);
@@ -100,6 +108,10 @@ static void __init setup_real_mode(void)
trampoline_cr4_features = &trampoline_header->cr4;
*trampoline_cr4_features = mmu_cr4_features;
+ trampoline_header->flags = 0;
+ if (sme_active())
+ trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
+
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_top_pgt[511].pgd;
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index dac7b20d2f9d..614fd7064d0a 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#include <asm/realmode.h>
#include "realmode.h"
.text
@@ -92,6 +93,28 @@ ENTRY(startup_32)
movl %edx, %fs
movl %edx, %gs
+ /*
+ * Check for memory encryption support. This is a safety net in
+ * case BIOS hasn't done the necessary step of setting the bit in
+ * the MSR for this AP. If SME is active and we've gotten this far
+ * then it is safe for us to set the MSR bit and continue. If we
+ * don't we'll eventually crash trying to execute encrypted
+ * instructions.
+ */
+ bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+ jnc .Ldone
+ movl $MSR_K8_SYSCFG, %ecx
+ rdmsr
+ bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+ jc .Ldone
+
+ /*
+ * Memory encryption is enabled but the SME enable bit for this
+ * CPU has has not been set. It is safe to set it, so do so.
+ */
+ wrmsr
+.Ldone:
+
movl pa_tr_cr4, %eax
movl %eax, %cr4 # Enable PAE mode
@@ -147,6 +170,7 @@ GLOBAL(trampoline_header)
tr_start: .space 8
GLOBAL(tr_efer) .space 8
GLOBAL(tr_cr4) .space 4
+ GLOBAL(tr_flags) .space 4
END(trampoline_header)
#include "trampoline_common.S"
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ae4cd58c0c7a..02250b2633b8 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -50,7 +50,7 @@ void foo(void)
DEFINE(HOST_GS, GS);
DEFINE(HOST_ORIG_AX, ORIG_EAX);
#else
-#if defined(PTRACE_GETREGSET) && defined(PTRACE_SETREGSET)
+#ifdef FP_XSTATE_MAGIC1
DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long));
#else
DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long));
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 027987638e98..1ecd419811a2 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -17,6 +17,9 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
+ # XEN_PV is not ready to work with 5-level paging.
+ # Changes to hypervisor are also required.
+ depends on !X86_5LEVEL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
@@ -75,4 +78,6 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
depends on XEN && XEN_PVHVM && ACPI
+ # Pre-built page tables are not ready to handle 5-level paging.
+ depends on !X86_5LEVEL
def_bool n
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 87d791356ea9..de503c225ae1 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -12,6 +12,7 @@
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
+#include <asm/early_ioremap.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
@@ -21,38 +22,50 @@
#include "mmu.h"
#include "smp.h"
-void __ref xen_hvm_init_shared_info(void)
+static unsigned long shared_info_pfn;
+
+void xen_hvm_init_shared_info(void)
{
struct xen_add_to_physmap xatp;
- u64 pa;
-
- if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
- /*
- * Search for a free page starting at 4kB physical address.
- * Low memory is preferred to avoid an EPT large page split up
- * by the mapping.
- * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
- * the BIOS used for HVM guests is well behaved and won't
- * clobber memory other than the first 4kB.
- */
- for (pa = PAGE_SIZE;
- !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
- memblock_is_reserved(pa);
- pa += PAGE_SIZE)
- ;
-
- memblock_reserve(pa, PAGE_SIZE);
- HYPERVISOR_shared_info = __va(pa);
- }
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
+ xatp.gpfn = shared_info_pfn;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
}
+static void __init reserve_shared_info(void)
+{
+ u64 pa;
+
+ /*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+ for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+ ;
+
+ shared_info_pfn = PHYS_PFN(pa);
+
+ memblock_reserve(pa, PAGE_SIZE);
+ HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE);
+}
+
+static void __init xen_hvm_init_mem_mapping(void)
+{
+ early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
+}
+
static void __init init_hvm_pv_info(void)
{
int major, minor;
@@ -153,6 +166,7 @@ static void __init xen_hvm_guest_init(void)
init_hvm_pv_info();
+ reserve_shared_info();
xen_hvm_init_shared_info();
/*
@@ -218,5 +232,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm = {
.init_platform = xen_hvm_guest_init,
.pin_vcpu = xen_pin_vcpu,
.x2apic_available = xen_x2apic_para_available,
+ .init_mem_mapping = xen_hvm_init_mem_mapping,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4ddb3f37..ae2a2e2d6362 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -263,6 +263,13 @@ static void __init xen_init_capabilities(void)
setup_clear_cpu_cap(X86_FEATURE_MTRR);
setup_clear_cpu_cap(X86_FEATURE_ACC);
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+ setup_clear_cpu_cap(X86_FEATURE_SME);
+
+ /*
+ * Xen PV would need some work to support PCID: CR3 handling as well
+ * as xen_flush_tlb_others() would need updating.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
if (!xen_initial_domain())
setup_clear_cpu_cap(X86_FEATURE_ACPI);
@@ -494,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
static inline bool desc_equal(const struct desc_struct *d1,
const struct desc_struct *d2)
{
- return d1->a == d2->a && d1->b == d2->b;
+ return !memcmp(d1, d2, sizeof(*d1));
}
static void load_TLS_descriptor(struct thread_struct *t,
@@ -579,59 +586,91 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
+#ifdef CONFIG_X86_64
+struct trap_array_entry {
+ void (*orig)(void);
+ void (*xen)(void);
+ bool ist_okay;
+};
+
+static struct trap_array_entry trap_array[] = {
+ { debug, xen_xendebug, true },
+ { int3, xen_xenint3, true },
+ { double_fault, xen_double_fault, true },
+#ifdef CONFIG_X86_MCE
+ { machine_check, xen_machine_check, true },
+#endif
+ { nmi, xen_nmi, true },
+ { overflow, xen_overflow, false },
+#ifdef CONFIG_IA32_EMULATION
+ { entry_INT80_compat, xen_entry_INT80_compat, false },
+#endif
+ { page_fault, xen_page_fault, false },
+ { divide_error, xen_divide_error, false },
+ { bounds, xen_bounds, false },
+ { invalid_op, xen_invalid_op, false },
+ { device_not_available, xen_device_not_available, false },
+ { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
+ { invalid_TSS, xen_invalid_TSS, false },
+ { segment_not_present, xen_segment_not_present, false },
+ { stack_segment, xen_stack_segment, false },
+ { general_protection, xen_general_protection, false },
+ { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
+ { coprocessor_error, xen_coprocessor_error, false },
+ { alignment_check, xen_alignment_check, false },
+ { simd_coprocessor_error, xen_simd_coprocessor_error, false },
+};
+
+static bool get_trap_addr(void **addr, unsigned int ist)
+{
+ unsigned int nr;
+ bool ist_okay = false;
+
+ /*
+ * Replace trap handler addresses by Xen specific ones.
+ * Check for known traps using IST and whitelist them.
+ * The debugger ones are the only ones we care about.
+ * Xen will handle faults like double_fault, * so we should never see
+ * them. Warn if there's an unexpected IST-using fault handler.
+ */
+ for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
+ struct trap_array_entry *entry = trap_array + nr;
+
+ if (*addr == entry->orig) {
+ *addr = entry->xen;
+ ist_okay = entry->ist_okay;
+ break;
+ }
+ }
+
+ if (WARN_ON(ist != 0 && !ist_okay))
+ return false;
+
+ return true;
+}
+#endif
+
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
{
unsigned long addr;
- if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
+ if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
return 0;
info->vector = vector;
- addr = gate_offset(*val);
+ addr = gate_offset(val);
#ifdef CONFIG_X86_64
- /*
- * Look for known traps using IST, and substitute them
- * appropriately. The debugger ones are the only ones we care
- * about. Xen will handle faults like double_fault,
- * so we should never see them. Warn if
- * there's an unexpected IST-using fault handler.
- */
- if (addr == (unsigned long)debug)
- addr = (unsigned long)xen_debug;
- else if (addr == (unsigned long)int3)
- addr = (unsigned long)xen_int3;
- else if (addr == (unsigned long)stack_segment)
- addr = (unsigned long)xen_stack_segment;
- else if (addr == (unsigned long)double_fault) {
- /* Don't need to handle these */
+ if (!get_trap_addr((void **)&addr, val->bits.ist))
return 0;
-#ifdef CONFIG_X86_MCE
- } else if (addr == (unsigned long)machine_check) {
- /*
- * when xen hypervisor inject vMCE to guest,
- * use native mce handler to handle it
- */
- ;
-#endif
- } else if (addr == (unsigned long)nmi)
- /*
- * Use the native version as well.
- */
- ;
- else {
- /* Some other trap using IST? */
- if (WARN_ON(val->ist != 0))
- return 0;
- }
#endif /* CONFIG_X86_64 */
info->address = addr;
- info->cs = gate_segment(*val);
- info->flags = val->dpl;
+ info->cs = gate_segment(val);
+ info->flags = val->bits.dpl;
/* interrupt gates clear IF */
- if (val->type == GATE_INTERRUPT)
+ if (val->bits.type == GATE_INTERRUPT)
info->flags |= 1 << 2;
return 1;
@@ -981,59 +1020,6 @@ void __ref xen_setup_vcpu_info_placement(void)
}
}
-static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
- unsigned long addr, unsigned len)
-{
- char *start, *end, *reloc;
- unsigned ret;
-
- start = end = reloc = NULL;
-
-#define SITE(op, x) \
- case PARAVIRT_PATCH(op.x): \
- if (xen_have_vcpu_info_placement) { \
- start = (char *)xen_##x##_direct; \
- end = xen_##x##_direct_end; \
- reloc = xen_##x##_direct_reloc; \
- } \
- goto patch_site
-
- switch (type) {
- SITE(pv_irq_ops, irq_enable);
- SITE(pv_irq_ops, irq_disable);
- SITE(pv_irq_ops, save_fl);
- SITE(pv_irq_ops, restore_fl);
-#undef SITE
-
- patch_site:
- if (start == NULL || (end-start) > len)
- goto default_patch;
-
- ret = paravirt_patch_insns(insnbuf, len, start, end);
-
- /* Note: because reloc is assigned from something that
- appears to be an array, gcc assumes it's non-null,
- but doesn't know its relationship with start and
- end. */
- if (reloc > start && reloc < end) {
- int reloc_off = reloc - start;
- long *relocp = (long *)(insnbuf + reloc_off);
- long delta = start - (char *)addr;
-
- *relocp += delta;
- }
- break;
-
- default_patch:
- default:
- ret = paravirt_patch_default(type, clobbers, insnbuf,
- addr, len);
- break;
- }
-
- return ret;
-}
-
static const struct pv_info xen_info __initconst = {
.shared_kernel_pmd = 0,
@@ -1043,10 +1029,6 @@ static const struct pv_info xen_info __initconst = {
.name = "Xen",
};
-static const struct pv_init_ops xen_init_ops __initconst = {
- .patch = xen_patch,
-};
-
static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.cpuid = xen_cpuid,
@@ -1244,7 +1226,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_init_ops = xen_init_ops;
+ pv_init_ops.patch = paravirt_patch_default;
pv_cpu_ops = xen_cpu_ops;
x86_platform.get_nmi_reason = xen_get_nmi_reason;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 33e92955e09d..d4eff5676cfa 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
.safe_halt = xen_safe_halt,
.halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
};
void __init xen_init_irq_ops(void)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3be06f3caf3c..3e15345abfe7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -84,7 +84,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
else
rmd->mfn++;
- rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
+ rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
rmd->mmu_update->val = pte_val_ma(pte);
rmd->mmu_update++;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index cab28cf2cffb..6b983b300666 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -162,26 +162,6 @@ static bool xen_page_pinned(void *ptr)
return PagePinned(page);
}
-void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
-{
- struct multicall_space mcs;
- struct mmu_update *u;
-
- trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
-
- mcs = xen_mc_entry(sizeof(*u));
- u = mcs.args;
-
- /* ptep might be kmapped when using 32-bit HIGHPTE */
- u->ptr = virt_to_machine(ptep).maddr;
- u->val = pte_val_ma(pteval);
-
- MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
-
- xen_mc_issue(PARAVIRT_LAZY_MMU);
-}
-EXPORT_SYMBOL_GPL(xen_set_domain_pte);
-
static void xen_extend_mmu_update(const struct mmu_update *update)
{
struct multicall_space mcs;
@@ -1005,14 +985,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
- && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+ if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
}
return;
}
- cpumask_copy(mask, mm_cpumask(mm));
/*
* It's possible that a vcpu may have a stale reference to our
@@ -1021,6 +999,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
* look at its actual current cr3 value, and force it to flush
* if needed.
*/
+ cpumask_clear(mask);
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
cpumask_set_cpu(cpu, mask);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 276da636dd39..6083ba462f35 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -212,8 +212,7 @@ void __ref xen_build_mfn_list_list(void)
unsigned int level, topidx, mididx;
unsigned long *mid_mfn_p;
- if (xen_feature(XENFEAT_auto_translated_physmap) ||
- xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
+ if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
return;
/* Pre-initialize p2m_top_mfn to be completely missing */
@@ -269,9 +268,6 @@ void __ref xen_build_mfn_list_list(void)
void xen_setup_mfn_list_list(void)
{
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
@@ -291,9 +287,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
{
unsigned long pfn;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return;
-
xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
@@ -540,9 +533,6 @@ int xen_alloc_p2m_entry(unsigned long pfn)
unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
unsigned long p2m_pfn;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
ptep = lookup_address(addr, &level);
BUG_ON(!ptep || level != PG_LEVEL_4K);
pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
@@ -640,9 +630,6 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
if (unlikely(pfn_s >= xen_p2m_size))
return 0;
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
- return pfn_e - pfn_s;
-
if (pfn_s > pfn_e)
return 0;
@@ -660,10 +647,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
pte_t *ptep;
unsigned int level;
- /* don't track P2M changes in autotranslate guests */
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
- return true;
-
if (unlikely(pfn >= xen_p2m_size)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -711,9 +694,6 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
int i, ret = 0;
pte_t *pte;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
if (kmap_ops) {
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
kmap_ops, count);
@@ -756,9 +736,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
{
int i, ret = 0;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return 0;
-
for (i = 0; i < count; i++) {
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c81046323ebc..ac55c02f98e9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -340,8 +340,6 @@ static void __init xen_do_set_identity_and_remap_chunk(
WARN_ON(size == 0);
- BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
-
mfn_save = virt_to_mfn(buf);
for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
@@ -1024,8 +1022,7 @@ void __init xen_pvmmu_arch_setup(void)
void __init xen_arch_setup(void)
{
xen_panic_handler_init();
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- xen_pvmmu_arch_setup();
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index eff224df813f..dcd31fa39b5d 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -1,14 +1,8 @@
/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
*
* We only bother with direct forms (ie, vcpu in percpu data) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
*/
#include <asm/asm-offsets.h>
@@ -16,7 +10,7 @@
#include <asm/processor-flags.h>
#include <asm/frame.h>
-#include "xen-asm.h"
+#include <linux/linkage.h>
/*
* Enable events. This clears the event mask and tests the pending
@@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jz 1f
-2: call check_events
+ call check_events
1:
-ENDPATCH(xen_irq_enable_direct)
FRAME_END
ret
ENDPROC(xen_irq_enable_direct)
- RELOC(xen_irq_enable_direct, 2b+1)
/*
@@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
*/
ENTRY(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
ret
- ENDPROC(xen_irq_disable_direct)
- RELOC(xen_irq_disable_direct, 0)
+ENDPROC(xen_irq_disable_direct)
/*
* (xen_)save_fl is used to get the current interrupt enable status.
@@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah, %ah
-ENDPATCH(xen_save_fl_direct)
ret
ENDPROC(xen_save_fl_direct)
- RELOC(xen_save_fl_direct, 0)
/*
@@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct)
/* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jnz 1f
-2: call check_events
+ call check_events
1:
-ENDPATCH(xen_restore_fl_direct)
FRAME_END
ret
ENDPROC(xen_restore_fl_direct)
- RELOC(xen_restore_fl_direct, 2b+1)
/*
diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h
deleted file mode 100644
index 465276467a47..000000000000
--- a/arch/x86/xen/xen-asm.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _XEN_XEN_ASM_H
-#define _XEN_XEN_ASM_H
-
-#include <linux/linkage.h>
-
-#define RELOC(x, v) .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x) .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-#endif
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index feb6d40a0860..1200e262a116 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,14 +1,8 @@
/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
*
* We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
*/
#include <asm/thread_info.h>
@@ -18,21 +12,10 @@
#include <xen/interface/xen.h>
-#include "xen-asm.h"
+#include <linux/linkage.h>
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-check_events:
- push %eax
- push %ecx
- push %edx
- call xen_force_evtchn_callback
- pop %edx
- pop %ecx
- pop %eax
- ret
+/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+#define XEN_EFLAGS_NMI 0x80000000
/*
* This is run where a normal iret would be run, with the same stack setup:
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..dae2cc33afb5 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -1,14 +1,8 @@
/*
- * Asm versions of Xen pv-ops, suitable for either direct use or
- * inlining. The inline versions are the same as the direct-use
- * versions, with the pre- and post-amble chopped off.
- *
- * This code is encoded for size rather than absolute efficiency, with
- * a view to being able to inline as much as possible.
+ * Asm versions of Xen pv-ops, suitable for direct use.
*
* We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C, since
- * they're generally too large to inline anyway.
+ * operations here; the indirect forms are better handled in C.
*/
#include <asm/errno.h>
@@ -20,13 +14,44 @@
#include <xen/interface/xen.h>
-#include "xen-asm.h"
+#include <linux/linkage.h>
+
+.macro xen_pv_trap name
+ENTRY(xen_\name)
+ pop %rcx
+ pop %r11
+ jmp \name
+END(xen_\name)
+.endm
-ENTRY(xen_adjust_exception_frame)
- mov 8+0(%rsp), %rcx
- mov 8+8(%rsp), %r11
- ret $16
-ENDPROC(xen_adjust_exception_frame)
+xen_pv_trap divide_error
+xen_pv_trap debug
+xen_pv_trap xendebug
+xen_pv_trap int3
+xen_pv_trap xenint3
+xen_pv_trap nmi
+xen_pv_trap overflow
+xen_pv_trap bounds
+xen_pv_trap invalid_op
+xen_pv_trap device_not_available
+xen_pv_trap double_fault
+xen_pv_trap coprocessor_segment_overrun
+xen_pv_trap invalid_TSS
+xen_pv_trap segment_not_present
+xen_pv_trap stack_segment
+xen_pv_trap general_protection
+xen_pv_trap page_fault
+xen_pv_trap spurious_interrupt_bug
+xen_pv_trap coprocessor_error
+xen_pv_trap alignment_check
+#ifdef CONFIG_X86_MCE
+xen_pv_trap machine_check
+#endif /* CONFIG_X86_MCE */
+xen_pv_trap simd_coprocessor_error
+#ifdef CONFIG_IA32_EMULATION
+xen_pv_trap entry_INT80_compat
+#endif
+xen_pv_trap hypervisor_callback
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
@@ -46,9 +71,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
*/
ENTRY(xen_iret)
pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_iret)
-RELOC(xen_iret, 1b+1)
+ jmp hypercall_iret
ENTRY(xen_sysret64)
/*
@@ -65,9 +88,7 @@ ENTRY(xen_sysret64)
pushq %rcx
pushq $VGCF_in_syscall
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret64)
-RELOC(xen_sysret64, 1b+1)
+ jmp hypercall_iret
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
@@ -82,34 +103,47 @@ RELOC(xen_sysret64, 1b+1)
* rip
* r11
* rsp->rcx
- *
- * In all the entrypoints, we undo all that to make it look like a
- * CPU-generated syscall/sysenter and jump to the normal entrypoint.
*/
-.macro undo_xen_syscall
- mov 0*8(%rsp), %rcx
- mov 1*8(%rsp), %r11
- mov 5*8(%rsp), %rsp
-.endm
-
/* Normal 64-bit system call target */
ENTRY(xen_syscall_target)
- undo_xen_syscall
- jmp entry_SYSCALL_64_after_swapgs
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER_DS and __USER_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER_DS, 4*8(%rsp)
+ movq $__USER_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_64_after_hwframe
ENDPROC(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
ENTRY(xen_syscall32_target)
- undo_xen_syscall
- jmp entry_SYSCALL_compat
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSCALL_compat_after_hwframe
ENDPROC(xen_syscall32_target)
/* 32-bit compat sysenter target */
ENTRY(xen_sysenter_target)
- undo_xen_syscall
+ mov 0*8(%rsp), %rcx
+ mov 1*8(%rsp), %r11
+ mov 5*8(%rsp), %rsp
jmp entry_SYSENTER_compat
ENDPROC(xen_sysenter_target)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 72a8e6adebe6..a7525e95d53f 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -58,7 +58,7 @@ ENTRY(hypercall_page)
#else
ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map)
/* Map the p2m table to a 512GB-aligned user address. */
- ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE)
+ ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad (PUD_SIZE * PTRS_PER_PUD))
#endif
#ifdef CONFIG_XEN_PV
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d5004477db6..c8a6d224f7ed 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -129,23 +129,15 @@ static inline void __init xen_efi_init(void)
}
#endif
-/* Declare an asm function, along with symbols needed to make it
- inlineable */
-#define DECL_ASM(ret, name, ...) \
- __visible ret name(__VA_ARGS__); \
- extern char name##_end[] __visible; \
- extern char name##_reloc[] __visible
-
-DECL_ASM(void, xen_irq_enable_direct, void);
-DECL_ASM(void, xen_irq_disable_direct, void);
-DECL_ASM(unsigned long, xen_save_fl_direct, void);
-DECL_ASM(void, xen_restore_fl_direct, unsigned long);
+__visible void xen_irq_enable_direct(void);
+__visible void xen_irq_disable_direct(void);
+__visible unsigned long xen_save_fl_direct(void);
+__visible void xen_restore_fl_direct(unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
-__visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void);
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 2d716ebc5a5e..dff7cc39437c 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,5 +1,6 @@
generic-y += bug.h
generic-y += clkdev.h
+generic-y += device.h
generic-y += div64.h
generic-y += dma-contiguous.h
generic-y += emergency-restart.h
@@ -17,6 +18,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += param.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += rwsem.h
diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h
deleted file mode 100644
index 1deeb8ebbb1b..000000000000
--- a/arch/xtensa/include/asm/device.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#ifndef _ASM_XTENSA_DEVICE_H
-#define _ASM_XTENSA_DEVICE_H
-
-struct dev_archdata {
-};
-
-struct pdev_archdata {
-};
-
-#endif /* _ASM_XTENSA_DEVICE_H */
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index b39531babec0..eaaf1ebcc7a4 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -44,18 +44,10 @@
: "r" (uaddr), "I" (-EFAULT), "r" (oparg) \
: "memory")
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
- return -EFAULT;
#if !XCHAL_HAVE_S32C1I
return -ENOSYS;
@@ -89,19 +81,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_enable();
- if (ret)
- return ret;
+ if (!ret)
+ *oval = oldval;
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: return (oldval == cmparg);
- case FUTEX_OP_CMP_NE: return (oldval != cmparg);
- case FUTEX_OP_CMP_LT: return (oldval < cmparg);
- case FUTEX_OP_CMP_GE: return (oldval >= cmparg);
- case FUTEX_OP_CMP_LE: return (oldval <= cmparg);
- case FUTEX_OP_CMP_GT: return (oldval > cmparg);
- }
-
- return -ENOSYS;
+ return ret;
}
static inline int
diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h
deleted file mode 100644
index 0a70e780ef2a..000000000000
--- a/arch/xtensa/include/asm/param.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * include/asm-xtensa/param.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-#ifndef _XTENSA_PARAM_H
-#define _XTENSA_PARAM_H
-
-#include <uapi/asm/param.h>
-
-# define HZ CONFIG_HZ /* internal timer frequency */
-# define USER_HZ 100 /* for user interfaces in "ticks" */
-# define CLOCKS_PER_SEC (USER_HZ) /* frequnzy at which times() counts */
-#endif /* _XTENSA_PARAM_H */
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index a36221cf6363..3bb49681ee24 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -33,11 +33,6 @@
#define arch_spin_is_locked(x) ((x)->slock != 0)
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_cond_load_acquire(&lock->slock, !VAL);
-}
-
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 24365b30aae9..b15b278aa314 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -103,20 +103,12 @@
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
/* compatibility flags */
#define MAP_FILE 0
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
-
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 3eed2761c149..220059999e74 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -113,4 +113,6 @@
#define SO_PEERGROUPS 59
+#define SO_ZEROCOPY 60
+
#endif /* _XTENSA_SOCKET_H */
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 33bfa5270d95..08175df7a69e 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -273,8 +273,8 @@ void __init init_arch(bp_tag_t *bp_start)
* Initialize system. Setup memory and reserve regions.
*/
-extern char _end;
-extern char _stext;
+extern char _end[];
+extern char _stext[];
extern char _WindowVectors_text_start;
extern char _WindowVectors_text_end;
extern char _DebugInterruptVector_literal_start;
@@ -333,7 +333,7 @@ void __init setup_arch(char **cmdline_p)
}
#endif
- mem_reserve(__pa(&_stext), __pa(&_end));
+ mem_reserve(__pa(_stext), __pa(_end));
#ifdef CONFIG_VECTORS_OFFSET
mem_reserve(__pa(&_WindowVectors_text_start),
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index d159e9b9c018..672391003e40 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v)
}
EXPORT_SYMBOL(__sync_fetch_and_or_4);
-#ifdef CONFIG_NET
/*
* Networking support
*/
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
-#endif /* CONFIG_NET */
/*
* Architecture-specific symbols
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 1a804a2f9a5b..3c75c4e597da 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr)
clear_page_alias(kvaddr, paddr);
preempt_enable();
}
+EXPORT_SYMBOL(clear_user_highpage);
void copy_user_highpage(struct page *dst, struct page *src,
unsigned long vaddr, struct vm_area_struct *vma)
@@ -119,10 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src,
copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr);
preempt_enable();
}
-
-#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+EXPORT_SYMBOL(copy_user_highpage);
/*
* Any time the kernel writes to a user page cache page, or it is about to
@@ -176,7 +174,7 @@ void flush_dcache_page(struct page *page)
/* There shouldn't be an entry in the cache for this page anymore. */
}
-
+EXPORT_SYMBOL(flush_dcache_page);
/*
* For now, flush the whole cache. FIXME??
@@ -188,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma,
__flush_invalidate_dcache_all();
__invalidate_icache_all();
}
+EXPORT_SYMBOL(local_flush_cache_range);
/*
* Remove any entry in the cache for this page.
@@ -207,8 +206,9 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
__flush_invalidate_dcache_page_alias(virt, phys);
__invalidate_icache_page_alias(virt, phys);
}
+EXPORT_SYMBOL(local_flush_cache_page);
-#endif
+#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */
void
update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
@@ -225,7 +225,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
flush_tlb_page(vma, addr);
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) {
unsigned long phys = page_to_phys(page);
@@ -256,7 +256,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
* flush_dcache_page() on the page.
*/
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
+#if (DCACHE_WAY_SIZE > PAGE_SIZE)
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, const void *src,