summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/alpha/include/uapi/asm/ioctls.h1
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/processor.h2
-rw-r--r--arch/arc/include/uapi/asm/Kbuild2
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arm/Kconfig31
-rw-r--r--arch/arm/boot/compressed/efi-header.S3
-rw-r--r--arch/arm/boot/dts/aspeed-g4.dtsi1
-rw-r--r--arch/arm/boot/dts/aspeed-g5.dtsi1
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/dmi.h19
-rw-r--r--arch/arm/include/uapi/asm/Kbuild2
-rw-r--r--arch/arm/kernel/hw_breakpoint.c13
-rw-r--r--arch/arm/kernel/patch.c2
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/arm/kernel/smp.c3
-rw-r--r--arch/arm/kernel/smp_twd.c6
-rw-r--r--arch/arm/kernel/time.c2
-rw-r--r--arch/arm/kernel/topology.c221
-rw-r--r--arch/arm/mach-aspeed/Kconfig2
-rw-r--r--arch/arm/mach-bcm/Kconfig2
-rw-r--r--arch/arm/mach-clps711x/Kconfig2
-rw-r--r--arch/arm/mach-mediatek/mediatek.c2
-rw-r--r--arch/arm/mach-moxart/Kconfig2
-rw-r--r--arch/arm/mach-omap2/dma.c24
-rw-r--r--arch/arm/mach-omap2/timer.c10
-rw-r--r--arch/arm/mach-rockchip/rockchip.c2
-rw-r--r--arch/arm/mach-rpc/ecard.c40
-rw-r--r--arch/arm/mach-s3c24xx/Kconfig2
-rw-r--r--arch/arm/mach-s3c64xx/Kconfig2
-rw-r--r--arch/arm/mach-shmobile/setup-rcar-gen2.c2
-rw-r--r--arch/arm/mach-spear/spear13xx.c2
-rw-r--r--arch/arm/mach-sunxi/sunxi.c2
-rw-r--r--arch/arm/mach-u300/core.c2
-rw-r--r--arch/arm/mach-zynq/common.c2
-rw-r--r--arch/arm/mm/mmu.c8
-rw-r--r--arch/arm/probes/kprobes/core.c3
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/Kconfig.platforms4
-rw-r--r--arch/arm64/boot/dts/hisilicon/hi6220.dtsi64
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi32
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/kernel/efi.c15
-rw-r--r--arch/arm64/kernel/insn.c5
-rw-r--r--arch/arm64/kernel/smp.c3
-rw-r--r--arch/arm64/kernel/time.c2
-rw-r--r--arch/arm64/kernel/topology.c226
-rw-r--r--arch/arm64/kernel/vdso.c5
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S1
-rw-r--r--arch/blackfin/include/asm/processor.h5
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/processor.h5
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/cris/arch-v10/kernel/process.c8
-rw-r--r--arch/cris/arch-v32/kernel/process.c8
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/processor.h2
-rw-r--r--arch/cris/include/uapi/asm/Kbuild2
-rw-r--r--arch/frv/include/asm/processor.h5
-rw-r--r--arch/frv/kernel/process.c9
-rw-r--r--arch/h8300/Kconfig2
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/h8300/include/asm/processor.h4
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild2
-rw-r--r--arch/h8300/kernel/process.c5
-rw-r--r--arch/h8300/kernel/setup.c2
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/processor.h3
-rw-r--r--arch/hexagon/include/uapi/asm/Kbuild2
-rw-r--r--arch/hexagon/kernel/process.c8
-rw-r--r--arch/ia64/hp/sim/simserial.c13
-rw-r--r--arch/ia64/include/asm/io.h2
-rw-r--r--arch/ia64/include/asm/processor.h17
-rw-r--r--arch/ia64/include/asm/siginfo.h23
-rw-r--r--arch/ia64/include/uapi/asm/siginfo.h1
-rw-r--r--arch/ia64/sn/kernel/iomv.c2
-rw-r--r--arch/m32r/include/asm/processor.h2
-rw-r--r--arch/m32r/include/uapi/asm/Kbuild2
-rw-r--r--arch/m32r/include/uapi/asm/siginfo.h6
-rw-r--r--arch/m32r/kernel/process.c8
-rw-r--r--arch/m68k/configs/amiga_defconfig8
-rw-r--r--arch/m68k/configs/apollo_defconfig8
-rw-r--r--arch/m68k/configs/atari_defconfig8
-rw-r--r--arch/m68k/configs/bvme6000_defconfig8
-rw-r--r--arch/m68k/configs/hp300_defconfig8
-rw-r--r--arch/m68k/configs/mac_defconfig8
-rw-r--r--arch/m68k/configs/multi_defconfig8
-rw-r--r--arch/m68k/configs/mvme147_defconfig8
-rw-r--r--arch/m68k/configs/mvme16x_defconfig8
-rw-r--r--arch/m68k/configs/q40_defconfig8
-rw-r--r--arch/m68k/configs/sun3_defconfig8
-rw-r--r--arch/m68k/configs/sun3x_defconfig8
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/processor.h2
-rw-r--r--arch/m68k/include/asm/signal.h5
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/kernel/process.c14
-rw-r--r--arch/m68k/kernel/signal.c16
-rw-r--r--arch/metag/kernel/smp.c3
-rw-r--r--arch/microblaze/Kconfig2
-rw-r--r--arch/microblaze/configs/mmu_defconfig16
-rw-r--r--arch/microblaze/configs/nommu_defconfig17
-rw-r--r--arch/microblaze/include/asm/Kbuild45
-rw-r--r--arch/microblaze/include/asm/bitops.h1
-rw-r--r--arch/microblaze/include/asm/bug.h1
-rw-r--r--arch/microblaze/include/asm/bugs.h1
-rw-r--r--arch/microblaze/include/asm/div64.h1
-rw-r--r--arch/microblaze/include/asm/emergency-restart.h1
-rw-r--r--arch/microblaze/include/asm/fb.h1
-rw-r--r--arch/microblaze/include/asm/hardirq.h1
-rw-r--r--arch/microblaze/include/asm/irq_regs.h1
-rw-r--r--arch/microblaze/include/asm/kdebug.h1
-rw-r--r--arch/microblaze/include/asm/kmap_types.h6
-rw-r--r--arch/microblaze/include/asm/linkage.h1
-rw-r--r--arch/microblaze/include/asm/local.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/parport.h1
-rw-r--r--arch/microblaze/include/asm/percpu.h1
-rw-r--r--arch/microblaze/include/asm/processor.h6
-rw-r--r--arch/microblaze/include/asm/serial.h1
-rw-r--r--arch/microblaze/include/asm/shmparam.h1
-rw-r--r--arch/microblaze/include/asm/topology.h1
-rw-r--r--arch/microblaze/include/asm/ucontext.h1
-rw-r--r--arch/microblaze/include/asm/unistd.h2
-rw-r--r--arch/microblaze/include/asm/vga.h1
-rw-r--r--arch/microblaze/include/asm/xor.h1
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/uapi/asm/bitsperlong.h1
-rw-r--r--arch/microblaze/include/uapi/asm/errno.h1
-rw-r--r--arch/microblaze/include/uapi/asm/fcntl.h1
-rw-r--r--arch/microblaze/include/uapi/asm/ioctl.h1
-rw-r--r--arch/microblaze/include/uapi/asm/ioctls.h1
-rw-r--r--arch/microblaze/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/microblaze/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/microblaze/include/uapi/asm/mman.h1
-rw-r--r--arch/microblaze/include/uapi/asm/msgbuf.h1
-rw-r--r--arch/microblaze/include/uapi/asm/param.h1
-rw-r--r--arch/microblaze/include/uapi/asm/poll.h1
-rw-r--r--arch/microblaze/include/uapi/asm/resource.h1
-rw-r--r--arch/microblaze/include/uapi/asm/sembuf.h1
-rw-r--r--arch/microblaze/include/uapi/asm/shmbuf.h1
-rw-r--r--arch/microblaze/include/uapi/asm/siginfo.h1
-rw-r--r--arch/microblaze/include/uapi/asm/signal.h1
-rw-r--r--arch/microblaze/include/uapi/asm/socket.h1
-rw-r--r--arch/microblaze/include/uapi/asm/sockios.h1
-rw-r--r--arch/microblaze/include/uapi/asm/stat.h1
-rw-r--r--arch/microblaze/include/uapi/asm/statfs.h1
-rw-r--r--arch/microblaze/include/uapi/asm/swab.h1
-rw-r--r--arch/microblaze/include/uapi/asm/termbits.h1
-rw-r--r--arch/microblaze/include/uapi/asm/termios.h1
-rw-r--r--arch/microblaze/include/uapi/asm/unistd.h1
-rw-r--r--arch/microblaze/kernel/dma.c3
-rw-r--r--arch/microblaze/kernel/entry.S28
-rw-r--r--arch/microblaze/kernel/process.c17
-rw-r--r--arch/microblaze/kernel/setup.c2
-rw-r--r--arch/microblaze/kernel/syscall_table.S1
-rw-r--r--arch/microblaze/kernel/timer.c4
-rw-r--r--arch/microblaze/mm/highmem.c24
-rw-r--r--arch/mips/generic/init.c2
-rw-r--r--arch/mips/include/uapi/asm/ioctls.h1
-rw-r--r--arch/mips/kernel/entry.S3
-rw-r--r--arch/mips/kernel/head.S2
-rw-r--r--arch/mips/kernel/jump_label.c2
-rw-r--r--arch/mips/kernel/pm-cps.c9
-rw-r--r--arch/mips/kernel/traps.c2
-rw-r--r--arch/mips/kvm/tlb.c6
-rw-r--r--arch/mips/math-emu/dp_maddf.c5
-rw-r--r--arch/mips/math-emu/sp_maddf.c5
-rw-r--r--arch/mips/mm/dma-default.c23
-rw-r--r--arch/mips/mti-malta/malta-time.c2
-rw-r--r--arch/mips/pic32/pic32mzda/time.c2
-rw-r--r--arch/mips/pistachio/time.c2
-rw-r--r--arch/mips/ralink/Kconfig2
-rw-r--r--arch/mips/ralink/cevt-rt3352.c2
-rw-r--r--arch/mips/ralink/clk.c2
-rw-r--r--arch/mips/ralink/timer-gic.c2
-rw-r--r--arch/mips/sgi-ip22/ip22-gio.c16
-rw-r--r--arch/mips/xilfpga/time.c2
-rw-r--r--arch/mn10300/include/asm/processor.h5
-rw-r--r--arch/mn10300/include/uapi/asm/Kbuild2
-rw-r--r--arch/mn10300/include/uapi/asm/siginfo.h1
-rw-r--r--arch/mn10300/kernel/process.c8
-rw-r--r--arch/nios2/Kconfig2
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/processor.h3
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild1
-rw-r--r--arch/nios2/kernel/time.c4
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/processor.h5
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild2
-rw-r--r--arch/openrisc/kernel/process.c5
-rw-r--r--arch/parisc/include/asm/dma-mapping.h11
-rw-r--r--arch/parisc/include/asm/io.h16
-rw-r--r--arch/parisc/include/asm/mmu_context.h15
-rw-r--r--arch/parisc/include/asm/pdc.h18
-rw-r--r--arch/parisc/include/asm/pdcpat.h35
-rw-r--r--arch/parisc/include/asm/pgtable.h3
-rw-r--r--arch/parisc/include/asm/processor.h7
-rw-r--r--arch/parisc/include/asm/uaccess.h11
-rw-r--r--arch/parisc/include/uapi/asm/ioctls.h1
-rw-r--r--arch/parisc/include/uapi/asm/pdc.h12
-rw-r--r--arch/parisc/kernel/Makefile2
-rw-r--r--arch/parisc/kernel/asm-offsets.c5
-rw-r--r--arch/parisc/kernel/drivers.c23
-rw-r--r--arch/parisc/kernel/firmware.c108
-rw-r--r--arch/parisc/kernel/hpmc.S5
-rw-r--r--arch/parisc/kernel/inventory.c9
-rw-r--r--arch/parisc/kernel/pdt.c143
-rw-r--r--arch/parisc/kernel/process.c5
-rw-r--r--arch/parisc/kernel/processor.c5
-rw-r--r--arch/parisc/kernel/syscall_table.S2
-rw-r--r--arch/parisc/kernel/time.c24
-rw-r--r--arch/parisc/lib/lusercopy.S25
-rw-r--r--arch/parisc/mm/fault.c12
-rw-r--r--arch/parisc/mm/init.c3
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/kprobes.h1
-rw-r--r--arch/powerpc/include/asm/processor.h6
-rw-r--r--arch/powerpc/include/asm/topology.h6
-rw-r--r--arch/powerpc/include/asm/uaccess.h8
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S11
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c31
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S59
-rw-r--r--arch/powerpc/kvm/book3s_hv.c65
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S75
-rw-r--r--arch/powerpc/mm/numa.c22
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c94
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c7
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c10
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c16
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c7
-rw-r--r--arch/powerpc/platforms/pseries/vio.c72
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/crypto/Makefile3
-rw-r--r--arch/s390/crypto/arch_random.c1
-rw-r--r--arch/s390/include/asm/Kbuild2
-rw-r--r--arch/s390/include/asm/device.h10
-rw-r--r--arch/s390/include/asm/eadm.h6
-rw-r--r--arch/s390/include/asm/elf.h32
-rw-r--r--arch/s390/include/asm/fb.h12
-rw-r--r--arch/s390/include/asm/io.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h17
-rw-r--r--arch/s390/include/asm/mmu_context.h4
-rw-r--r--arch/s390/include/asm/nmi.h7
-rw-r--r--arch/s390/include/asm/page.h3
-rw-r--r--arch/s390/include/asm/pci.h15
-rw-r--r--arch/s390/include/asm/pci_insn.h2
-rw-r--r--arch/s390/include/asm/pgalloc.h25
-rw-r--r--arch/s390/include/asm/pgtable.h105
-rw-r--r--arch/s390/include/asm/processor.h11
-rw-r--r--arch/s390/include/asm/ptrace.h54
-rw-r--r--arch/s390/include/asm/sigp.h2
-rw-r--r--arch/s390/include/asm/sysinfo.h4
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/s390/include/asm/tlb.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/dumpstack.c33
-rw-r--r--arch/s390/kernel/entry.S30
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/jump_label.c2
-rw-r--r--arch/s390/kernel/kprobes.c4
-rw-r--r--arch/s390/kernel/nmi.c84
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c10
-rw-r--r--arch/s390/kernel/perf_event.c3
-rw-r--r--arch/s390/kernel/process.c25
-rw-r--r--arch/s390/kernel/ptrace.c15
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/s390/kernel/traps.c1
-rw-r--r--arch/s390/kernel/uprobes.c12
-rw-r--r--arch/s390/kernel/vdso.c91
-rw-r--r--arch/s390/kernel/vtime.c14
-rw-r--r--arch/s390/kvm/gaccess.c37
-rw-r--r--arch/s390/kvm/gaccess.h4
-rw-r--r--arch/s390/kvm/guestdbg.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/priv.c8
-rw-r--r--arch/s390/mm/dump_pagetables.c23
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/mm/gmap.c11
-rw-r--r--arch/s390/mm/gup.c33
-rw-r--r--arch/s390/mm/hugetlbpage.c30
-rw-r--r--arch/s390/mm/init.c6
-rw-r--r--arch/s390/mm/mmap.c4
-rw-r--r--arch/s390/mm/pageattr.c30
-rw-r--r--arch/s390/mm/pgalloc.c57
-rw-r--r--arch/s390/mm/pgtable.c6
-rw-r--r--arch/s390/mm/vmem.c44
-rw-r--r--arch/s390/pci/pci.c173
-rw-r--r--arch/s390/pci/pci_clp.c77
-rw-r--r--arch/s390/pci/pci_dma.c4
-rw-r--r--arch/s390/pci/pci_event.c14
-rw-r--r--arch/s390/pci/pci_insn.c10
-rw-r--r--arch/s390/tools/gen_facilities.c2
-rw-r--r--arch/score/include/asm/processor.h1
-rw-r--r--arch/score/include/uapi/asm/Kbuild2
-rw-r--r--arch/score/include/uapi/asm/siginfo.h6
-rw-r--r--arch/score/kernel/process.c5
-rw-r--r--arch/sh/boards/Kconfig2
-rw-r--r--arch/sh/boards/of-generic.c2
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sh/include/uapi/asm/Kbuild2
-rw-r--r--arch/sh/include/uapi/asm/ioctls.h1
-rw-r--r--arch/sparc/include/asm/processor_32.h3
-rw-r--r--arch/sparc/include/asm/processor_64.h2
-rw-r--r--arch/sparc/include/asm/siginfo.h13
-rw-r--r--arch/sparc/include/uapi/asm/ioctls.h3
-rw-r--r--arch/sparc/kernel/jump_label.c2
-rw-r--r--arch/sparc/kernel/process_32.c8
-rw-r--r--arch/sparc/kernel/process_64.c19
-rw-r--r--arch/sparc/kernel/vio.c18
-rw-r--r--arch/tile/include/asm/processor.h7
-rw-r--r--arch/tile/kernel/jump_label.c2
-rw-r--r--arch/tile/lib/atomic_asm_32.S3
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/include/asm/processor-generic.h2
-rw-r--r--arch/um/kernel/um_arch.c6
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/eboot.c73
-rw-r--r--arch/x86/boot/compressed/head_64.S86
-rw-r--r--arch/x86/boot/compressed/kaslr.c194
-rw-r--r--arch/x86/boot/compressed/misc.c6
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/compressed/pagetable.c18
-rw-r--r--arch/x86/boot/copy.S20
-rw-r--r--arch/x86/boot/string.c8
-rw-r--r--arch/x86/boot/string.h1
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/sha1-mb/Makefile2
-rw-r--r--arch/x86/crypto/sha256-mb/Makefile2
-rw-r--r--arch/x86/entry/entry_64.S3
-rw-r--r--arch/x86/events/core.c28
-rw-r--r--arch/x86/events/intel/core.c78
-rw-r--r--arch/x86/events/intel/cqm.c16
-rw-r--r--arch/x86/events/intel/lbr.c4
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/perf_event.h3
-rw-r--r--arch/x86/include/asm/amd_nb.h3
-rw-r--r--arch/x86/include/asm/apic.h36
-rw-r--r--arch/x86/include/asm/atomic.h13
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/irq.h1
-rw-r--r--arch/x86/include/asm/irq_remapping.h3
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/include/asm/mce.h4
-rw-r--r--arch/x86/include/asm/mmu.h6
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pci.h8
-rw-r--r--arch/x86/include/asm/pgtable-3level.h47
-rw-r--r--arch/x86/include/asm/pgtable.h55
-rw-r--r--arch/x86/include/asm/pgtable_64.h22
-rw-r--r--arch/x86/include/asm/processor-flags.h36
-rw-r--r--arch/x86/include/asm/processor.h15
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/special_insns.h10
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/include/asm/tlbbatch.h14
-rw-r--r--arch/x86/include/asm/tlbflush.h114
-rw-r--r--arch/x86/include/asm/uv/uv.h11
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h15
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/apic/apic.c115
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c2
-rw-r--r--arch/x86/kernel/apic/htirq.c21
-rw-r--r--arch/x86/kernel/apic/io_apic.c44
-rw-r--r--arch/x86/kernel/apic/msi.c55
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c53
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c36
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c26
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c55
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c569
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c273
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c11
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c27
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/espfix_64.c2
-rw-r--r--arch/x86/kernel/head64.c145
-rw-r--r--arch/x86/kernel/head_64.S131
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/irq.c78
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/kprobes/opt.c9
-rw-r--r--arch/x86/kernel/ldt.c56
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/nmi_selftest.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process.c11
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c210
-rw-r--r--arch/x86/kernel/tsc_sync.c21
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c62
-rw-r--r--arch/x86/lib/copy_user_64.S7
-rw-r--r--arch/x86/lib/msr-reg.S8
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/gup.c496
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/init_64.c116
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/kasan_init_64.c12
-rw-r--r--arch/x86/mm/kaslr.c81
-rw-r--r--arch/x86/mm/mmap.c3
-rw-r--r--arch/x86/mm/tlb.c458
-rw-r--r--arch/x86/net/Makefile2
-rw-r--r--arch/x86/pci/ce4100.c87
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/legacy.c18
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/platform/efi/efi_32.c9
-rw-r--r--arch/x86/platform/efi/efi_64.c9
-rw-r--r--arch/x86/platform/efi/quirks.c137
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c24
-rw-r--r--arch/x86/platform/uv/uv_irq.c18
-rw-r--r--arch/x86/power/Makefile2
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/power/hibernate_64.c3
-rw-r--r--arch/x86/ras/Kconfig11
-rw-r--r--arch/x86/ras/Makefile2
-rw-r--r--arch/x86/ras/mce_amd_inj.c492
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/efi.c45
-rw-r--r--arch/x86/xen/mmu_pv.c83
-rw-r--r--arch/x86/xen/xen-pvh.S2
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/processor.h2
-rw-r--r--arch/xtensa/include/uapi/asm/Kbuild1
-rw-r--r--arch/xtensa/include/uapi/asm/ioctls.h1
-rw-r--r--arch/xtensa/kernel/time.c2
475 files changed, 4940 insertions, 4292 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 6c00e5b00f8b..f76b214cf7ad 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -867,4 +867,13 @@ config STRICT_MODULE_RWX
config ARCH_WANT_RELAX_ORDER
bool
+config REFCOUNT_FULL
+ bool "Perform full reference count validation at the expense of speed"
+ help
+ Enabling this switches the refcounting infrastructure from a fast
+ unchecked atomic_t implementation to a fully state checked
+ implementation, which can be (slightly) slower but provides protections
+ against various use-after-free conditions that can be used in
+ security flaw exploits.
+
source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index f30c94ae1bdb..ff67b8373bf7 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -100,6 +100,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define TIOCSERCONFIG 0x5453
#define TIOCSERGWILD 0x5454
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 7bee4e4799fd..3e74ca5e6402 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -36,7 +36,6 @@ generic-y += preempt.h
generic-y += resource.h
generic-y += sembuf.h
generic-y += shmbuf.h
-generic-y += siginfo.h
generic-y += socket.h
generic-y += sockios.h
generic-y += stat.h
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 6e1242da0159..4104a0839214 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
#define TSK_K_FP(tsk) TSK_K_REG(tsk, 0)
-#define thread_saved_pc(tsk) TSK_K_BLINK(tsk)
-
extern void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long usp);
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index fc8211f338ad..666613fde91d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -470,7 +470,7 @@ void __init setup_arch(char **cmdline_p)
void __init time_init(void)
{
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
static int __init customize_machine(void)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4c1a35f15838..43f45d3b40e8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,6 +25,7 @@ config ARM
select EDAC_SUPPORT
select EDAC_ATOMIC_SCRUB
select GENERIC_ALLOCATOR
+ select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CPU_AUTOPROBE
@@ -337,7 +338,7 @@ config ARCH_MULTIPLATFORM
select ARM_HAS_SG_CHAIN
select ARM_PATCH_PHYS_VIRT
select AUTO_ZRELADDR
- select CLKSRC_OF
+ select TIMER_OF
select COMMON_CLK
select GENERIC_CLOCKEVENTS
select MIGHT_HAVE_PCI
@@ -351,7 +352,7 @@ config ARM_SINGLE_ARMV7M
depends on !MMU
select ARM_NVIC
select AUTO_ZRELADDR
- select CLKSRC_OF
+ select TIMER_OF
select COMMON_CLK
select CPU_V7M
select GENERIC_CLOCKEVENTS
@@ -532,7 +533,7 @@ config ARCH_PXA
select CLKDEV_LOOKUP
select CLKSRC_PXA
select CLKSRC_MMIO
- select CLKSRC_OF
+ select TIMER_OF
select CPU_XSCALE if !CPU_XSC3
select GENERIC_CLOCKEVENTS
select GPIO_PXA
@@ -571,7 +572,7 @@ config ARCH_SA1100
select CLKDEV_LOOKUP
select CLKSRC_MMIO
select CLKSRC_PXA
- select CLKSRC_OF if OF
+ select TIMER_OF if OF
select CPU_FREQ
select CPU_SA1100
select GENERIC_CLOCKEVENTS
@@ -1357,7 +1358,7 @@ config HAVE_ARM_ARCH_TIMER
config HAVE_ARM_TWD
bool
- select CLKSRC_OF if OF
+ select TIMER_OF if OF
help
This options enables support for the ARM timer and watchdog unit
@@ -1416,6 +1417,7 @@ choice
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
+ depends on !ARM_LPAE
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
@@ -1637,7 +1639,7 @@ config ARCH_SELECT_MEMORY_MODEL
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
def_bool y
depends on ARM_LPAE
@@ -2061,6 +2063,23 @@ config EFI
is only useful for kernels that may run on systems that have
UEFI firmware.
+config DMI
+ bool "Enable support for SMBIOS (DMI) tables"
+ depends on EFI
+ default y
+ help
+ This enables SMBIOS/DMI feature for systems.
+
+ This option is only useful on systems that have UEFI firmware.
+ However, even with this option, the resultant kernel should
+ continue to boot on existing non-UEFI platforms.
+
+ NOTE: This does *NOT* enable or encourage the use of DMI quirks,
+ i.e., the the practice of identifying the platform via DMI to
+ decide whether certain workarounds for buggy hardware and/or
+ firmware need to be enabled. This would require the DMI subsystem
+ to be enabled much earlier than we do on ARM, which is non-trivial.
+
endmenu
menu "CPU Power Management"
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
index 3f7d1b74c5e0..a17ca8d78656 100644
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,7 +17,8 @@
@ there.
.inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
#else
- W(mov) r0, r0
+ AR_CLASS( mov r0, r0 )
+ M_CLASS( nop.w )
#endif
.endm
diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi
index 8c6bc29eb7f6..3e74929d3289 100644
--- a/arch/arm/boot/dts/aspeed-g4.dtsi
+++ b/arch/arm/boot/dts/aspeed-g4.dtsi
@@ -893,6 +893,7 @@
//interrupts = <16 17 18 35 36 37 38 39>;
interrupts = <16>;
clocks = <&clk_apb>;
+ clock-names = "PCLK";
};
wdt1: wdt@1e785000 {
diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi
index a0bea4a6ec77..1e6c701da853 100644
--- a/arch/arm/boot/dts/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed-g5.dtsi
@@ -1000,6 +1000,7 @@
//interrupts = <16 17 18 35 36 37 38 39>;
interrupts = <16>;
clocks = <&clk_apb>;
+ clock-names = "PCLK";
};
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 3a36d99ff836..d8360501c082 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -28,7 +28,6 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
-generic-y += siginfo.h
generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
diff --git a/arch/arm/include/asm/dmi.h b/arch/arm/include/asm/dmi.h
new file mode 100644
index 000000000000..df2d2ff06f5b
--- /dev/null
+++ b/arch/arm/include/asm/dmi.h
@@ -0,0 +1,19 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#define dmi_early_remap(x, l) memremap(x, l, MEMREMAP_WB)
+#define dmi_early_unmap(x, l) memunmap(x)
+#define dmi_remap(x, l) memremap(x, l, MEMREMAP_WB)
+#define dmi_unmap(x) memunmap(x)
+#define dmi_alloc(l) kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 607f702c2d62..e9b098d6b766 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -4,3 +4,5 @@ include include/uapi/asm-generic/Kbuild.asm
genhdr-y += unistd-common.h
genhdr-y += unistd-oabi.h
genhdr-y += unistd-eabi.h
+
+generic-y += siginfo.h
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index be3b3fbd382f..af2a7f1e3103 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1090,7 +1090,7 @@ static int __init arch_hw_breakpoint_init(void)
* driven low on this core and there isn't an architected way to
* determine that.
*/
- get_online_cpus();
+ cpus_read_lock();
register_undef_hook(&debug_reg_hook);
/*
@@ -1098,15 +1098,16 @@ static int __init arch_hw_breakpoint_init(void)
* assume that a halting debugger will leave the world in a nice state
* for us.
*/
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
- dbg_reset_online, NULL);
+ ret = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
+ "arm/hw_breakpoint:online",
+ dbg_reset_online, NULL);
unregister_undef_hook(&debug_reg_hook);
if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
core_num_brps = 0;
core_num_wrps = 0;
if (ret > 0)
- cpuhp_remove_state_nocalls(ret);
- put_online_cpus();
+ cpuhp_remove_state_nocalls_cpuslocked(ret);
+ cpus_read_unlock();
return 0;
}
@@ -1124,7 +1125,7 @@ static int __init arch_hw_breakpoint_init(void)
TRAP_HWBKPT, "watchpoint debug exception");
hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
TRAP_HWBKPT, "breakpoint debug exception");
- put_online_cpus();
+ cpus_read_unlock();
/* Register PM notifiers. */
pm_init();
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index 020560b2dcb7..a1a34722c655 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -124,5 +124,5 @@ void __kprobes patch_text(void *addr, unsigned int insn)
.insn = insn,
};
- stop_machine(patch_text_stop_machine, &patch, NULL);
+ stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 32e1a9513dc7..4e80bf7420d4 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
if (arch >= CPU_ARCH_ARMv6) {
unsigned int cachetype = read_cpuid_cachetype();
- if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+ if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
cacheid = 0;
} else if ((cachetype & (7 << 29)) == 4 << 29) {
/* ARMv7 register format */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 572a8df1b766..c9a0a5299827 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock);
*/
static void ipi_cpu_stop(unsigned int cpu)
{
- if (system_state == SYSTEM_BOOTING ||
- system_state == SYSTEM_RUNNING) {
+ if (system_state <= SYSTEM_RUNNING) {
raw_spin_lock(&stop_lock);
pr_crit("CPU%u: stopping\n", cpu);
dump_stack();
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 895ae5197159..b30eafeef096 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -403,7 +403,7 @@ out:
WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
return err;
}
-CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
+TIMER_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
#endif
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 97b22fa7cb3a..629f8e9981f1 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -120,6 +120,6 @@ void __init time_init(void)
#ifdef CONFIG_COMMON_CLK
of_clk_init(NULL);
#endif
- clocksource_probe();
+ timer_probe();
}
}
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index f8a3ab82e77f..bf949a763dbe 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -11,6 +11,7 @@
* for more details.
*/
+#include <linux/arch_topology.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
@@ -44,77 +45,6 @@
* to run the rebalance_domains for all idle cores and the cpu_capacity can be
* updated during this sequence.
*/
-static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
-static DEFINE_MUTEX(cpu_scale_mutex);
-
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
- return per_cpu(cpu_scale, cpu);
-}
-
-static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
-{
- per_cpu(cpu_scale, cpu) = capacity;
-}
-
-#ifdef CONFIG_PROC_SYSCTL
-static ssize_t cpu_capacity_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%lu\n",
- arch_scale_cpu_capacity(NULL, cpu->dev.id));
-}
-
-static ssize_t cpu_capacity_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- int this_cpu = cpu->dev.id, i;
- unsigned long new_capacity;
- ssize_t ret;
-
- if (count) {
- ret = kstrtoul(buf, 0, &new_capacity);
- if (ret)
- return ret;
- if (new_capacity > SCHED_CAPACITY_SCALE)
- return -EINVAL;
-
- mutex_lock(&cpu_scale_mutex);
- for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
- set_capacity_scale(i, new_capacity);
- mutex_unlock(&cpu_scale_mutex);
- }
-
- return count;
-}
-
-static DEVICE_ATTR_RW(cpu_capacity);
-
-static int register_cpu_capacity_sysctl(void)
-{
- int i;
- struct device *cpu;
-
- for_each_possible_cpu(i) {
- cpu = get_cpu_device(i);
- if (!cpu) {
- pr_err("%s: too early to get CPU%d device!\n",
- __func__, i);
- continue;
- }
- device_create_file(cpu, &dev_attr_cpu_capacity);
- }
-
- return 0;
-}
-subsys_initcall(register_cpu_capacity_sysctl);
-#endif
#ifdef CONFIG_OF
struct cpu_efficiency {
@@ -143,145 +73,6 @@ static unsigned long *__cpu_capacity;
static unsigned long middle_capacity = 1;
static bool cap_from_dt = true;
-static u32 *raw_capacity;
-static bool cap_parsing_failed;
-static u32 capacity_scale;
-
-static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
-{
- int ret = 1;
- u32 cpu_capacity;
-
- if (cap_parsing_failed)
- return !ret;
-
- ret = of_property_read_u32(cpu_node,
- "capacity-dmips-mhz",
- &cpu_capacity);
- if (!ret) {
- if (!raw_capacity) {
- raw_capacity = kcalloc(num_possible_cpus(),
- sizeof(*raw_capacity),
- GFP_KERNEL);
- if (!raw_capacity) {
- pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
- cap_parsing_failed = true;
- return !ret;
- }
- }
- capacity_scale = max(cpu_capacity, capacity_scale);
- raw_capacity[cpu] = cpu_capacity;
- pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
- cpu_node->full_name, raw_capacity[cpu]);
- } else {
- if (raw_capacity) {
- pr_err("cpu_capacity: missing %s raw capacity\n",
- cpu_node->full_name);
- pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
- }
- cap_parsing_failed = true;
- kfree(raw_capacity);
- }
-
- return !ret;
-}
-
-static void normalize_cpu_capacity(void)
-{
- u64 capacity;
- int cpu;
-
- if (!raw_capacity || cap_parsing_failed)
- return;
-
- pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
- mutex_lock(&cpu_scale_mutex);
- for_each_possible_cpu(cpu) {
- capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
- / capacity_scale;
- set_capacity_scale(cpu, capacity);
- pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
- cpu, arch_scale_cpu_capacity(NULL, cpu));
- }
- mutex_unlock(&cpu_scale_mutex);
-}
-
-#ifdef CONFIG_CPU_FREQ
-static cpumask_var_t cpus_to_visit;
-static bool cap_parsing_done;
-static void parsing_done_workfn(struct work_struct *work);
-static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
-
-static int
-init_cpu_capacity_callback(struct notifier_block *nb,
- unsigned long val,
- void *data)
-{
- struct cpufreq_policy *policy = data;
- int cpu;
-
- if (cap_parsing_failed || cap_parsing_done)
- return 0;
-
- switch (val) {
- case CPUFREQ_NOTIFY:
- pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
- cpumask_pr_args(policy->related_cpus),
- cpumask_pr_args(cpus_to_visit));
- cpumask_andnot(cpus_to_visit,
- cpus_to_visit,
- policy->related_cpus);
- for_each_cpu(cpu, policy->related_cpus) {
- raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
- policy->cpuinfo.max_freq / 1000UL;
- capacity_scale = max(raw_capacity[cpu], capacity_scale);
- }
- if (cpumask_empty(cpus_to_visit)) {
- normalize_cpu_capacity();
- kfree(raw_capacity);
- pr_debug("cpu_capacity: parsing done\n");
- cap_parsing_done = true;
- schedule_work(&parsing_done_work);
- }
- }
- return 0;
-}
-
-static struct notifier_block init_cpu_capacity_notifier = {
- .notifier_call = init_cpu_capacity_callback,
-};
-
-static int __init register_cpufreq_notifier(void)
-{
- if (cap_parsing_failed)
- return -EINVAL;
-
- if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
- pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
- return -ENOMEM;
- }
- cpumask_copy(cpus_to_visit, cpu_possible_mask);
-
- return cpufreq_register_notifier(&init_cpu_capacity_notifier,
- CPUFREQ_POLICY_NOTIFIER);
-}
-core_initcall(register_cpufreq_notifier);
-
-static void parsing_done_workfn(struct work_struct *work)
-{
- cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
- CPUFREQ_POLICY_NOTIFIER);
-}
-
-#else
-static int __init free_raw_capacity(void)
-{
- kfree(raw_capacity);
-
- return 0;
-}
-core_initcall(free_raw_capacity);
-#endif
/*
* Iterate all CPUs' descriptor in DT and compute the efficiency
@@ -320,7 +111,7 @@ static void __init parse_dt_topology(void)
continue;
}
- if (parse_cpu_capacity(cn, cpu)) {
+ if (topology_parse_cpu_capacity(cn, cpu)) {
of_node_put(cn);
continue;
}
@@ -368,8 +159,8 @@ static void __init parse_dt_topology(void)
middle_capacity = ((max_capacity / 3)
>> (SCHED_CAPACITY_SHIFT-1)) + 1;
- if (cap_from_dt && !cap_parsing_failed)
- normalize_cpu_capacity();
+ if (cap_from_dt)
+ topology_normalize_cpu_scale();
}
/*
@@ -382,10 +173,10 @@ static void update_cpu_capacity(unsigned int cpu)
if (!cpu_capacity(cpu) || cap_from_dt)
return;
- set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+ topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity);
pr_info("CPU%u: update cpu_capacity %lu\n",
- cpu, arch_scale_cpu_capacity(NULL, cpu));
+ cpu, topology_get_cpu_scale(NULL, cpu));
}
#else
diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig
index f3f8c5c658db..2d5570e6e186 100644
--- a/arch/arm/mach-aspeed/Kconfig
+++ b/arch/arm/mach-aspeed/Kconfig
@@ -4,7 +4,7 @@ menuconfig ARCH_ASPEED
select SRAM
select WATCHDOG
select ASPEED_WATCHDOG
- select MOXART_TIMER
+ select FTTMR010_TIMER
select MFD_SYSCON
select PINCTRL
help
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index f9389c5910e7..f23a23934162 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -150,7 +150,7 @@ config ARCH_BCM2835
select ARM_ERRATA_411920 if ARCH_MULTI_V6
select ARM_TIMER_SP804
select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
- select CLKSRC_OF
+ select TIMER_OF
select BCM2835_TIMER
select PINCTRL
select PINCTRL_BCM2835
diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig
index 61284b9389cf..f385b1fcafef 100644
--- a/arch/arm/mach-clps711x/Kconfig
+++ b/arch/arm/mach-clps711x/Kconfig
@@ -2,7 +2,7 @@ menuconfig ARCH_CLPS711X
bool "Cirrus Logic EP721x/EP731x-based"
depends on ARCH_MULTI_V4T
select AUTO_ZRELADDR
- select CLKSRC_OF
+ select TIMER_OF
select CLPS711X_TIMER
select COMMON_CLK
select CPU_ARM720T
diff --git a/arch/arm/mach-mediatek/mediatek.c b/arch/arm/mach-mediatek/mediatek.c
index a6e3c98b95ed..c3cf215773b2 100644
--- a/arch/arm/mach-mediatek/mediatek.c
+++ b/arch/arm/mach-mediatek/mediatek.c
@@ -41,7 +41,7 @@ static void __init mediatek_timer_init(void)
}
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
};
static const char * const mediatek_board_dt_compat[] = {
diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig
index 70db2abf6163..a4a91f9a3301 100644
--- a/arch/arm/mach-moxart/Kconfig
+++ b/arch/arm/mach-moxart/Kconfig
@@ -4,7 +4,7 @@ menuconfig ARCH_MOXART
select CPU_FA526
select ARM_DMA_MEM_BUFFERABLE
select FARADAY_FTINTC010
- select MOXART_TIMER
+ select FTTMR010_TIMER
select GPIOLIB
select PHYLIB if NETDEVICES
help
diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c
index e58c13a9bea5..0b77a0176018 100644
--- a/arch/arm/mach-omap2/dma.c
+++ b/arch/arm/mach-omap2/dma.c
@@ -249,6 +249,24 @@ static const struct dma_slave_map omap24xx_sdma_map[] = {
{ "omap_uart.2", "rx", SDMA_FILTER_PARAM(54) },
{ "omap_hsmmc.0", "tx", SDMA_FILTER_PARAM(61) },
{ "omap_hsmmc.0", "rx", SDMA_FILTER_PARAM(62) },
+
+ /* external DMA requests when tusb6010 is used */
+ { "musb-tusb", "dmareq0", SDMA_FILTER_PARAM(2) },
+ { "musb-tusb", "dmareq1", SDMA_FILTER_PARAM(3) },
+ { "musb-tusb", "dmareq2", SDMA_FILTER_PARAM(14) }, /* OMAP2420 only */
+ { "musb-tusb", "dmareq3", SDMA_FILTER_PARAM(15) }, /* OMAP2420 only */
+ { "musb-tusb", "dmareq4", SDMA_FILTER_PARAM(16) }, /* OMAP2420 only */
+ { "musb-tusb", "dmareq5", SDMA_FILTER_PARAM(64) }, /* OMAP2420 only */
+};
+
+static const struct dma_slave_map omap24xx_sdma_dt_map[] = {
+ /* external DMA requests when tusb6010 is used */
+ { "musb-hdrc.1.auto", "dmareq0", SDMA_FILTER_PARAM(2) },
+ { "musb-hdrc.1.auto", "dmareq1", SDMA_FILTER_PARAM(3) },
+ { "musb-hdrc.1.auto", "dmareq2", SDMA_FILTER_PARAM(14) }, /* OMAP2420 only */
+ { "musb-hdrc.1.auto", "dmareq3", SDMA_FILTER_PARAM(15) }, /* OMAP2420 only */
+ { "musb-hdrc.1.auto", "dmareq4", SDMA_FILTER_PARAM(16) }, /* OMAP2420 only */
+ { "musb-hdrc.1.auto", "dmareq5", SDMA_FILTER_PARAM(64) }, /* OMAP2420 only */
};
static const struct dma_slave_map omap3xxx_sdma_map[] = {
@@ -346,6 +364,12 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused)
__func__);
return -ENODEV;
}
+ } else {
+ if (soc_is_omap24xx()) {
+ /* DMA slave map for drivers not yet converted to DT */
+ p.slave_map = omap24xx_sdma_dt_map;
+ p.slavecnt = ARRAY_SIZE(omap24xx_sdma_dt_map);
+ }
}
pdev = omap_device_build(name, 0, oh, &p, sizeof(p));
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 07dd692c4737..ae4bb9fdc483 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -497,7 +497,7 @@ void __init omap_init_time(void)
__omap_sync32k_timer_init(1, "timer_32k_ck", "ti,timer-alwon",
2, "timer_sys_ck", NULL, false);
- clocksource_probe();
+ timer_probe();
}
#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM43XX)
@@ -506,7 +506,7 @@ void __init omap3_secure_sync32k_timer_init(void)
__omap_sync32k_timer_init(12, "secure_32k_fck", "ti,timer-secure",
2, "timer_sys_ck", NULL, false);
- clocksource_probe();
+ timer_probe();
}
#endif /* CONFIG_ARCH_OMAP3 */
@@ -517,7 +517,7 @@ void __init omap3_gptimer_timer_init(void)
__omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
1, "timer_sys_ck", "ti,timer-alwon", true);
if (of_have_populated_dt())
- clocksource_probe();
+ timer_probe();
}
#endif
@@ -532,7 +532,7 @@ static void __init omap4_sync32k_timer_init(void)
void __init omap4_local_timer_init(void)
{
omap4_sync32k_timer_init();
- clocksource_probe();
+ timer_probe();
}
#endif
@@ -656,7 +656,7 @@ void __init omap5_realtime_timer_init(void)
omap4_sync32k_timer_init();
realtime_counter_init();
- clocksource_probe();
+ timer_probe();
}
#endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index ef0500a4c8ad..5ab834ebcb49 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -55,7 +55,7 @@ static void __init rockchip_timer_init(void)
}
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
static void __init rockchip_dt_init(void)
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 6b279d037774..bdb5ec1cf560 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -761,19 +761,21 @@ static struct expansion_card *__init ecard_alloc_card(int type, int slot)
return ec;
}
-static ssize_t ecard_show_irq(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t irq_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
return sprintf(buf, "%u\n", ec->irq);
}
+static DEVICE_ATTR_RO(irq);
-static ssize_t ecard_show_dma(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t dma_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
return sprintf(buf, "%u\n", ec->dma);
}
+static DEVICE_ATTR_RO(dma);
-static ssize_t ecard_show_resources(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
char *str = buf;
@@ -787,35 +789,39 @@ static ssize_t ecard_show_resources(struct device *dev, struct device_attribute
return str - buf;
}
+static DEVICE_ATTR_RO(resource);
-static ssize_t ecard_show_vendor(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
return sprintf(buf, "%u\n", ec->cid.manufacturer);
}
+static DEVICE_ATTR_RO(vendor);
-static ssize_t ecard_show_device(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t device_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
return sprintf(buf, "%u\n", ec->cid.product);
}
+static DEVICE_ATTR_RO(device);
-static ssize_t ecard_show_type(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct expansion_card *ec = ECARD_DEV(dev);
return sprintf(buf, "%s\n", ec->easi ? "EASI" : "IOC");
}
-
-static struct device_attribute ecard_dev_attrs[] = {
- __ATTR(device, S_IRUGO, ecard_show_device, NULL),
- __ATTR(dma, S_IRUGO, ecard_show_dma, NULL),
- __ATTR(irq, S_IRUGO, ecard_show_irq, NULL),
- __ATTR(resource, S_IRUGO, ecard_show_resources, NULL),
- __ATTR(type, S_IRUGO, ecard_show_type, NULL),
- __ATTR(vendor, S_IRUGO, ecard_show_vendor, NULL),
- __ATTR_NULL,
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *ecard_dev_attrs[] = {
+ &dev_attr_device.attr,
+ &dev_attr_dma.attr,
+ &dev_attr_irq.attr,
+ &dev_attr_resource.attr,
+ &dev_attr_type.attr,
+ &dev_attr_vendor.attr,
+ NULL,
};
-
+ATTRIBUTE_GROUPS(ecard_dev);
int ecard_request_resources(struct expansion_card *ec)
{
@@ -1120,7 +1126,7 @@ static int ecard_match(struct device *_dev, struct device_driver *_drv)
struct bus_type ecard_bus_type = {
.name = "ecard",
- .dev_attrs = ecard_dev_attrs,
+ .dev_groups = ecard_dev_groups,
.match = ecard_match,
.probe = ecard_drv_probe,
.remove = ecard_drv_remove,
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index 4b1690acb6a5..f07da82ebfea 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -394,7 +394,7 @@ config MACH_SMDK2416
config MACH_S3C2416_DT
bool "Samsung S3C2416 machine using devicetree"
- select CLKSRC_OF
+ select TIMER_OF
select USE_OF
select PINCTRL
select PINCTRL_S3C24XX
diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig
index 459214fa20b4..71a49343d711 100644
--- a/arch/arm/mach-s3c64xx/Kconfig
+++ b/arch/arm/mach-s3c64xx/Kconfig
@@ -336,7 +336,7 @@ config MACH_WLF_CRAGG_6410
config MACH_S3C64XX_DT
bool "Samsung S3C6400/S3C6410 machine using Device Tree"
- select CLKSRC_OF
+ select TIMER_OF
select CPU_S3C6400
select CPU_S3C6410
select PINCTRL
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index 52d466b75973..a6e74f481dea 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -113,7 +113,7 @@ void __init rcar_gen2_timer_init(void)
#endif /* CONFIG_ARM_ARCH_TIMER */
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
struct memory_reserve_config {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index ca2f6a82a414..31c43cabf362 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void)
clk_put(pclk);
spear_setup_of_timer();
- clocksource_probe();
+ timer_probe();
}
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index f44e3acb5c90..7ab353fb25f2 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -42,7 +42,7 @@ static void __init sun6i_timer_init(void)
of_clk_init(NULL);
if (IS_ENABLED(CONFIG_RESET_CONTROLLER))
sun6i_reset_init();
- clocksource_probe();
+ timer_probe();
}
DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index a4910ea6811a..048f15e8c669 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -407,7 +407,7 @@ static const char * u300_board_compat[] = {
DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)")
.map_io = u300_map_io,
.init_irq = u300_init_irq_dt,
- .init_time = clocksource_probe,
+ .init_time = timer_probe,
.init_machine = u300_init_machine_dt,
.restart = u300_restart,
.dt_compat = u300_board_compat,
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index ed118648313f..6aba9ebf8041 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -150,7 +150,7 @@ static void __init zynq_timer_init(void)
{
zynq_clock_init();
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
static struct map_desc zynq_cortex_a9_scu_map __initdata = {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 31af3cb59a60..e46a6a446cdd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void)
high_memory = __va(arm_lowmem_limit - 1) + 1;
+ if (!memblock_limit)
+ memblock_limit = arm_lowmem_limit;
+
/*
* Round the memblock limit down to a pmd size. This
* helps to ensure that we will allocate memory from the
* last full pmd, which should be mapped.
*/
- if (memblock_limit)
- memblock_limit = round_down(memblock_limit, PMD_SIZE);
- if (!memblock_limit)
- memblock_limit = arm_lowmem_limit;
+ memblock_limit = round_down(memblock_limit, PMD_SIZE);
if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
if (memblock_end_of_DRAM() > arm_lowmem_limit) {
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index ad1f4e6a9e33..52d1cd14fda4 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -182,7 +182,8 @@ void __kprobes kprobes_remove_breakpoint(void *addr, unsigned int insn)
.addr = addr,
.insn = insn,
};
- stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
+ stop_machine_cpuslocked(__kprobes_remove_breakpoint, &p,
+ cpu_online_mask);
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b2024db225a9..300146dc8433 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -41,6 +41,7 @@ config ARM64
select EDAC_SUPPORT
select FRAME_POINTER
select GENERIC_ALLOCATOR
+ select GENERIC_ARCH_TOPOLOGY
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE
@@ -205,7 +206,7 @@ config GENERIC_CALIBRATE_DELAY
config ZONE_DMA
def_bool y
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
def_bool y
config ARCH_DMA_ADDR_T_64BIT
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 73272f43ca01..9ed0a659046b 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -18,7 +18,7 @@ config ARCH_ALPINE
config ARCH_BCM2835
bool "Broadcom BCM2835 family"
- select CLKSRC_OF
+ select TIMER_OF
select GPIOLIB
select PINCTRL
select PINCTRL_BCM2835
@@ -178,7 +178,7 @@ config ARCH_TEGRA
select ARCH_HAS_RESET_CONTROLLER
select CLKDEV_LOOKUP
select CLKSRC_MMIO
- select CLKSRC_OF
+ select TIMER_OF
select GENERIC_CLOCKEVENTS
select GPIOLIB
select PINCTRL
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 5013e4b2ea71..6b82695ce32c 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -887,5 +887,69 @@
};
};
};
+
+ debug@f6590000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf6590000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu0>;
+ };
+
+ debug@f6592000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf6592000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu1>;
+ };
+
+ debug@f6594000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf6594000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu2>;
+ };
+
+ debug@f6596000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf6596000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu3>;
+ };
+
+ debug@f65d0000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf65d0000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu4>;
+ };
+
+ debug@f65d2000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf65d2000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu5>;
+ };
+
+ debug@f65d4000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf65d4000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu6>;
+ };
+
+ debug@f65d6000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0 0xf65d6000 0 0x1000>;
+ clocks = <&sys_ctrl HI6220_DAPB_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&cpu7>;
+ };
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index ab3093995ded..17691abea608 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -1116,6 +1116,38 @@
};
};
+ debug@850000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0x850000 0x1000>;
+ clocks = <&rpmcc RPM_QDSS_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&CPU0>;
+ };
+
+ debug@852000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0x852000 0x1000>;
+ clocks = <&rpmcc RPM_QDSS_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&CPU1>;
+ };
+
+ debug@854000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0x854000 0x1000>;
+ clocks = <&rpmcc RPM_QDSS_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&CPU2>;
+ };
+
+ debug@856000 {
+ compatible = "arm,coresight-cpu-debug","arm,primecell";
+ reg = <0x856000 0x1000>;
+ clocks = <&rpmcc RPM_QDSS_CLK>;
+ clock-names = "apb_pclk";
+ cpu = <&CPU3>;
+ };
+
etm@85c000 {
compatible = "arm,coresight-etm4x", "arm,primecell";
reg = <0x85c000 0x1000>;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 29cb2ca756f6..4214c38d016b 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -433,7 +433,6 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
s32 aarch64_insn_adrp_get_offset(u32 insn);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 5d17f377d905..82cd07592519 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,7 +11,6 @@
*
*/
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/init.h>
@@ -117,20 +116,6 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
set_permissions, md);
}
-static int __init arm64_dmi_init(void)
-{
- /*
- * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
- * be called early because dmi_id_init(), which is an arch_initcall
- * itself, depends on dmi_scan_machine() having been called already.
- */
- dmi_scan_machine();
- if (dmi_available)
- dmi_set_dump_stack_arch_desc();
- return 0;
-}
-core_initcall(arm64_dmi_init);
-
/*
* UpdateCapsule() depends on the system being shutdown via
* ResetSystem().
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index b884a926a632..cd872133e88e 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -255,6 +255,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
return ret;
}
+static
int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
{
struct aarch64_insn_patch patch = {
@@ -267,8 +268,8 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
if (cnt <= 0)
return -EINVAL;
- return stop_machine(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
+ return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+ cpu_online_mask);
}
int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6e0e16a3a7d4..321119881abf 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -961,8 +961,7 @@ void smp_send_stop(void)
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
- if (system_state == SYSTEM_BOOTING ||
- system_state == SYSTEM_RUNNING)
+ if (system_state <= SYSTEM_RUNNING)
pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_STOP);
}
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 59779699a1a4..da33c90248e9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -70,7 +70,7 @@ void __init time_init(void)
u32 arch_timer_rate;
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
tick_setup_hrtimer_broadcast();
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 08243533e5ee..79244c75eaec 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,7 +11,7 @@
* for more details.
*/
-#include <linux/acpi.h>
+#include <linux/arch_topology.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/init.h>
@@ -23,227 +23,11 @@
#include <linux/sched/topology.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/cpufreq.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
-static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
-static DEFINE_MUTEX(cpu_scale_mutex);
-
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
- return per_cpu(cpu_scale, cpu);
-}
-
-static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
-{
- per_cpu(cpu_scale, cpu) = capacity;
-}
-
-static ssize_t cpu_capacity_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%lu\n",
- arch_scale_cpu_capacity(NULL, cpu->dev.id));
-}
-
-static ssize_t cpu_capacity_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- int this_cpu = cpu->dev.id, i;
- unsigned long new_capacity;
- ssize_t ret;
-
- if (count) {
- ret = kstrtoul(buf, 0, &new_capacity);
- if (ret)
- return ret;
- if (new_capacity > SCHED_CAPACITY_SCALE)
- return -EINVAL;
-
- mutex_lock(&cpu_scale_mutex);
- for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
- set_capacity_scale(i, new_capacity);
- mutex_unlock(&cpu_scale_mutex);
- }
-
- return count;
-}
-
-static DEVICE_ATTR_RW(cpu_capacity);
-
-static int register_cpu_capacity_sysctl(void)
-{
- int i;
- struct device *cpu;
-
- for_each_possible_cpu(i) {
- cpu = get_cpu_device(i);
- if (!cpu) {
- pr_err("%s: too early to get CPU%d device!\n",
- __func__, i);
- continue;
- }
- device_create_file(cpu, &dev_attr_cpu_capacity);
- }
-
- return 0;
-}
-subsys_initcall(register_cpu_capacity_sysctl);
-
-static u32 capacity_scale;
-static u32 *raw_capacity;
-static bool cap_parsing_failed;
-
-static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
-{
- int ret;
- u32 cpu_capacity;
-
- if (cap_parsing_failed)
- return;
-
- ret = of_property_read_u32(cpu_node,
- "capacity-dmips-mhz",
- &cpu_capacity);
- if (!ret) {
- if (!raw_capacity) {
- raw_capacity = kcalloc(num_possible_cpus(),
- sizeof(*raw_capacity),
- GFP_KERNEL);
- if (!raw_capacity) {
- pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
- cap_parsing_failed = true;
- return;
- }
- }
- capacity_scale = max(cpu_capacity, capacity_scale);
- raw_capacity[cpu] = cpu_capacity;
- pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
- cpu_node->full_name, raw_capacity[cpu]);
- } else {
- if (raw_capacity) {
- pr_err("cpu_capacity: missing %s raw capacity\n",
- cpu_node->full_name);
- pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
- }
- cap_parsing_failed = true;
- kfree(raw_capacity);
- }
-}
-
-static void normalize_cpu_capacity(void)
-{
- u64 capacity;
- int cpu;
-
- if (!raw_capacity || cap_parsing_failed)
- return;
-
- pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
- mutex_lock(&cpu_scale_mutex);
- for_each_possible_cpu(cpu) {
- pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
- cpu, raw_capacity[cpu]);
- capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
- / capacity_scale;
- set_capacity_scale(cpu, capacity);
- pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
- cpu, arch_scale_cpu_capacity(NULL, cpu));
- }
- mutex_unlock(&cpu_scale_mutex);
-}
-
-#ifdef CONFIG_CPU_FREQ
-static cpumask_var_t cpus_to_visit;
-static bool cap_parsing_done;
-static void parsing_done_workfn(struct work_struct *work);
-static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
-
-static int
-init_cpu_capacity_callback(struct notifier_block *nb,
- unsigned long val,
- void *data)
-{
- struct cpufreq_policy *policy = data;
- int cpu;
-
- if (cap_parsing_failed || cap_parsing_done)
- return 0;
-
- switch (val) {
- case CPUFREQ_NOTIFY:
- pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
- cpumask_pr_args(policy->related_cpus),
- cpumask_pr_args(cpus_to_visit));
- cpumask_andnot(cpus_to_visit,
- cpus_to_visit,
- policy->related_cpus);
- for_each_cpu(cpu, policy->related_cpus) {
- raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
- policy->cpuinfo.max_freq / 1000UL;
- capacity_scale = max(raw_capacity[cpu], capacity_scale);
- }
- if (cpumask_empty(cpus_to_visit)) {
- normalize_cpu_capacity();
- kfree(raw_capacity);
- pr_debug("cpu_capacity: parsing done\n");
- cap_parsing_done = true;
- schedule_work(&parsing_done_work);
- }
- }
- return 0;
-}
-
-static struct notifier_block init_cpu_capacity_notifier = {
- .notifier_call = init_cpu_capacity_callback,
-};
-
-static int __init register_cpufreq_notifier(void)
-{
- /*
- * on ACPI-based systems we need to use the default cpu capacity
- * until we have the necessary code to parse the cpu capacity, so
- * skip registering cpufreq notifier.
- */
- if (!acpi_disabled || cap_parsing_failed)
- return -EINVAL;
-
- if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
- pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
- return -ENOMEM;
- }
- cpumask_copy(cpus_to_visit, cpu_possible_mask);
-
- return cpufreq_register_notifier(&init_cpu_capacity_notifier,
- CPUFREQ_POLICY_NOTIFIER);
-}
-core_initcall(register_cpufreq_notifier);
-
-static void parsing_done_workfn(struct work_struct *work)
-{
- cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
- CPUFREQ_POLICY_NOTIFIER);
-}
-
-#else
-static int __init free_raw_capacity(void)
-{
- kfree(raw_capacity);
-
- return 0;
-}
-core_initcall(free_raw_capacity);
-#endif
-
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
@@ -255,7 +39,7 @@ static int __init get_cpu_for_node(struct device_node *node)
for_each_possible_cpu(cpu) {
if (of_get_cpu_node(cpu, NULL) == cpu_node) {
- parse_cpu_capacity(cpu_node, cpu);
+ topology_parse_cpu_capacity(cpu_node, cpu);
of_node_put(cpu_node);
return cpu;
}
@@ -400,16 +184,14 @@ static int __init parse_dt_topology(void)
* cluster with restricted subnodes.
*/
map = of_get_child_by_name(cn, "cpu-map");
- if (!map) {
- cap_parsing_failed = true;
+ if (!map)
goto out;
- }
ret = parse_cluster(map, 0);
if (ret != 0)
goto out_map;
- normalize_cpu_capacity();
+ topology_normalize_cpu_scale();
/*
* Check that all cores are in the topology; the SMP code will
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..7492d9009610 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -220,11 +220,10 @@ void update_vsyscall(struct timekeeper *tk)
if (!use_syscall) {
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_time.tv_sec;
- vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
+ vdso_data->raw_time_sec = tk->raw_sec;
+ vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- /* tkr_raw.xtime_nsec == 0 */
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
seqcnt_check fail=monotonic_raw
/* All computations are done with left-shifted nsecs. */
- lsl x14, x14, x12
get_nsec_per_sec res=x9
lsl x9, x9, x12
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 85d4af97c986..dbdbb8a558df 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (tsk->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index f0eaf0475e7e..a3c8d05c4cc7 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -45,7 +45,6 @@ generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += signal.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index b9eb3da7f278..7c87b5be53b5 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
#define release_segments(mm) do { } while (0)
/*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
* saved kernel SP and DP of a blocked thread.
*/
#ifdef _BIG_ENDIAN
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 13a97aa2285f..1c44d3b3eba0 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -2,3 +2,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
+generic-y += siginfo.h
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index e299d30105b5..a2cdb1521aca 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
while(1) /* waiting for RETRIBUTION! */ ;
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->irp;
-}
-
/* setup the child's kernel stack with a pt_regs and switch_stack on it.
* it will be un-nested during _resume and _ret_from_sys_call when the
* new thread is scheduled.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c530a8fa87ce..fe87b383fbf3 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -85,14 +85,6 @@ hard_reset_now(void)
}
/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return task_pt_regs(t)->erp;
-}
-
-/*
* Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
* It will be unnested during _resume and _ret_from_sys_call when the new thread
* is scheduled.
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 2890099992a9..acc5781100c2 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -36,7 +36,6 @@ generic-y += resource.h
generic-y += sections.h
generic-y += sembuf.h
generic-y += shmbuf.h
-generic-y += siginfo.h
generic-y += socket.h
generic-y += sockios.h
generic-y += statfs.h
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 15b815df29c1..bc2729e4b2c9 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp)
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
/* Free all resources held by a thread. */
static inline void release_thread(struct task_struct *dead_task)
{
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index ddaeb9cc9143..e4d08d74ed9f 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
#define release_segments(mm) do { } while (0)
#define forget_segments() do { } while (0)
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc)
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 5a4c92abc99e..a957b374e3a6 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(tsk->thread.pc))
- return ((unsigned long *)tsk->thread.fp)[2];
- else
- return tsk->thread.pc;
-}
-
int elf_check_arch(const struct elf32_hdr *hdr)
{
unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 3ae852507e57..6e3d36f37a02 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -15,7 +15,7 @@ config H8300
select OF_IRQ
select OF_EARLY_FLATTREE
select HAVE_MEMBLOCK
- select CLKSRC_OF
+ select TIMER_OF
select H8300_TMR8
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 757cdeb24e6e..99c824608a31 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -54,7 +54,6 @@ generic-y += serial.h
generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 65132d7ae9e5..afa53147e66a 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 0f5db5bb561b..d1ddcabbbe83 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index c8c25a4e9e48..6be15d634650 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -246,5 +246,5 @@ void __init calibrate_delay(void)
void __init time_init(void)
{
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 6b45ef79eb8f..0fc9cb04e6ad 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -41,7 +41,6 @@ generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 45a825402f63..ce67940860a5 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
/* task_struct, defined elsewhere, is the "process descriptor" */
struct task_struct;
-/* this is defined in arch/process.c */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
/*
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index de715bab7956..656050c2e6a0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
}
/*
- * Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return 0;
-}
-
-/*
* Copy architecture-specific thread state
*/
int copy_thread(unsigned long clone_flags, unsigned long usp,
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index de8cba121013..70d52e9bb575 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -387,19 +387,6 @@ static int activate(struct tty_port *port, struct tty_struct *tty)
}
state->xmit.head = state->xmit.tail = 0;
-
- /*
- * Set up the tty->alt_speed kludge
- */
- if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
- tty->alt_speed = 57600;
- if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
- tty->alt_speed = 115200;
- if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
- tty->alt_speed = 230400;
- if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
- tty->alt_speed = 460800;
-
errout:
local_irq_restore(flags);
return retval;
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 5de673ac9cb1..a2540e21f919 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -117,7 +117,7 @@ extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
* following the barrier will arrive after all previous writes. For most
* ia64 platforms, this is a simple 'mf.a' instruction.
*
- * See Documentation/DocBook/deviceiobook.tmpl for more information.
+ * See Documentation/driver-api/device-io.rst for more information.
*/
static inline void ___ia64_mmiowb(void)
{
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 26a63d69c599..ab982f07ea68 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
}
/*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
- struct unw_frame_info info;
- unsigned long ip;
-
- unw_init_from_blocked_task(&info, t);
- if (unw_unwind(&info) < 0)
- return 0;
- unw_get_ip(&info, &ip);
- return ip;
-}
-
-/*
* Get the current instruction/program counter value.
*/
#define current_text_addr() \
diff --git a/arch/ia64/include/asm/siginfo.h b/arch/ia64/include/asm/siginfo.h
deleted file mode 100644
index 6f2e2dd0f28f..000000000000
--- a/arch/ia64/include/asm/siginfo.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Based on <asm-i386/siginfo.h>.
- *
- * Modified 1998-2002
- * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
- */
-#ifndef _ASM_IA64_SIGINFO_H
-#define _ASM_IA64_SIGINFO_H
-
-#include <linux/string.h>
-#include <uapi/asm/siginfo.h>
-
-static inline void
-copy_siginfo (siginfo_t *to, siginfo_t *from)
-{
- if (from->si_code < 0)
- memcpy(to, from, sizeof(siginfo_t));
- else
- /* _sigchld is currently the largest know union member */
- memcpy(to, from, 4*sizeof(int) + sizeof(from->_sifields._sigchld));
-}
-
-#endif /* _ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
index f72bf0172bb2..4694c64252d6 100644
--- a/arch/ia64/include/uapi/asm/siginfo.h
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -11,7 +11,6 @@
#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
#define HAVE_ARCH_SIGINFO_T
-#define HAVE_ARCH_COPY_SIGINFO
#define HAVE_ARCH_COPY_SIGINFO_TO_USER
#include <asm-generic/siginfo.h>
diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c
index c77ebdf98119..2b22a71663c1 100644
--- a/arch/ia64/sn/kernel/iomv.c
+++ b/arch/ia64/sn/kernel/iomv.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL(sn_io_addr);
/**
* __sn_mmiowb - I/O space memory barrier
*
- * See arch/ia64/include/asm/io.h and Documentation/DocBook/deviceiobook.tmpl
+ * See arch/ia64/include/asm/io.h and Documentation/driver-api/device-io.rst
* for details.
*
* On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 5767367550c6..657874eeeccc 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
extern void release_segments(struct mm_struct * mm);
-extern unsigned long thread_saved_pc(struct task_struct *);
-
/* Copy and release all segment info associated with a VM */
#define copy_segments(p, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index b15bf6bc0e94..c94ee54210bc 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/m32r/include/uapi/asm/siginfo.h b/arch/m32r/include/uapi/asm/siginfo.h
deleted file mode 100644
index 7d9cd9ebfd0e..000000000000
--- a/arch/m32r/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M32R_SIGINFO_H
-#define _M32R_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif /* _M32R_SIGINFO_H */
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index d8ffcfec599c..8cd7e03f4370 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
#include <linux/err.h>
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return tsk->thread.lr;
-}
-
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 531cb9eb3319..ddff1164aff0 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -349,7 +351,6 @@ CONFIG_SCSI_A4000T=y
CONFIG_SCSI_ZORRO7XX=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -361,6 +362,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -414,6 +416,7 @@ CONFIG_ZORRO8390=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -572,6 +575,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -590,6 +595,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index ca91d39555da..17384dc959a5 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -388,6 +390,7 @@ CONFIG_VETH=m
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -531,6 +534,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -549,6 +554,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 23a3d8a691e2..53a641d62f85 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -340,7 +342,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_ATARI_SCSI=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -399,6 +401,7 @@ CONFIG_SMC91X=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -552,6 +555,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -570,6 +575,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 95deb95140fe..3925ae3a5eb3 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
# CONFIG_EFI_PARTITION is not set
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_BVME6000_SCSI=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_BVME6000_NET=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index afae6958db2d..f4a134b390b4 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -331,7 +333,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -343,6 +344,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -389,6 +391,7 @@ CONFIG_HPLANCE=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -533,6 +536,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -551,6 +556,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b010734729a7..9ed0cef632b7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -340,7 +342,6 @@ CONFIG_MAC_SCSI=y
CONFIG_SCSI_MAC_ESP=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -352,6 +353,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -408,6 +410,7 @@ CONFIG_MAC8390=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -555,6 +558,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -573,6 +578,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 0e414549b235..efed0d48fd53 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -22,6 +22,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
# CONFIG_EFI_PARTITION is not set
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68020=y
@@ -373,7 +375,6 @@ CONFIG_BVME6000_SCSI=y
CONFIG_SUN3X_ESP=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -385,6 +386,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -454,6 +456,7 @@ CONFIG_SMC91X=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PLIP=m
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
@@ -635,6 +638,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -653,6 +658,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b2e687a0ec3d..9040457c7f9c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
# CONFIG_EFI_PARTITION is not set
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68030=y
@@ -329,7 +331,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MVME147_SCSI=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -341,6 +342,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_MVME147_NET=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index cbd8ee24d1bc..8b17f00e0484 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -26,6 +26,8 @@ CONFIG_SUN_PARTITION=y
# CONFIG_EFI_PARTITION is not set
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -330,7 +332,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MVME16x_SCSI=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -342,6 +343,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -387,6 +389,7 @@ CONFIG_MVME16x_NET=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -523,6 +526,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -541,6 +546,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 1e82cc944339..5f3718c62c85 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -27,6 +27,8 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_M68040=y
@@ -336,7 +338,6 @@ CONFIG_ISCSI_TCP=m
CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -348,6 +349,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -398,6 +400,7 @@ CONFIG_NE2000=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PLIP=m
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
@@ -546,6 +549,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -564,6 +569,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index f9e77f57a972..8c979a68fca5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_SUN3=y
@@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_SUN3_SCSI=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -385,6 +387,7 @@ CONFIG_SUN3_82586=y
# CONFIG_NET_VENDOR_SUN is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -525,6 +528,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -542,6 +547,7 @@ CONFIG_TEST_FIRMWARE=m
CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 3c394fcfb368..a1e79530e806 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -26,6 +26,8 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SYSV68_PARTITION=y
CONFIG_IOSCHED_DEADLINE=m
CONFIG_MQ_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_KYBER=m
+CONFIG_IOSCHED_BFQ=m
CONFIG_KEXEC=y
CONFIG_BOOTINFO_PROC=y
CONFIG_SUN3X=y
@@ -327,7 +329,6 @@ CONFIG_ISCSI_BOOT_SYSFS=m
CONFIG_SUN3X_ESP=y
CONFIG_MD=y
CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
@@ -339,6 +340,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_UEVENT=y
CONFIG_DM_LOG_WRITES=m
+CONFIG_DM_INTEGRITY=m
CONFIG_TARGET_CORE=m
CONFIG_TCM_IBLOCK=m
CONFIG_TCM_FILEIO=m
@@ -385,6 +387,7 @@ CONFIG_SUN3LANCE=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -525,6 +528,8 @@ CONFIG_DLM=m
# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_TEST_LIST_SORT=m
+CONFIG_TEST_SORT=m
CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
@@ -543,6 +548,7 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_EARLY_PRINTK=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_HARDENED_USERCOPY=y
CONFIG_CRYPTO_RSA=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 82005d2ff717..5ecf4e47b2e2 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += preempt.h
generic-y += resource.h
generic-y += sections.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += spinlock.h
generic-y += statfs.h
generic-y += termios.h
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 77239e81379b..94c36030440c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 8c8ce5e1ee0e..3bc64d02ba5f 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -62,9 +62,4 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
#endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
-#ifndef __uClinux__
-extern void ptrace_signal_deliver(void);
-#define ptrace_signal_deliver ptrace_signal_deliver
-#endif /* __uClinux__ */
-
#endif /* _M68K_SIGNAL_H */
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 64368077235a..68b45cc87e2c 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += auxvec.h
generic-y += msgbuf.h
generic-y += sembuf.h
generic-y += shmbuf.h
+generic-y += siginfo.h
generic-y += socket.h
generic-y += sockios.h
generic-y += termbits.h
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e475c945c8b2..7df92f8b0781 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(sw->retpc))
- return ((unsigned long *)sw->a6)[1];
- else
- return sw->retpc;
-}
-
void arch_cpu_idle(void)
{
#if defined(MACH_ATARI_ONLY)
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 6f945bb5ffbd..e79421f5b9cd 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -109,22 +109,6 @@ int fixup_exception(struct pt_regs *regs)
return 1;
}
-void ptrace_signal_deliver(void)
-{
- struct pt_regs *regs = signal_pt_regs();
- if (regs->orig_d0 < 0)
- return;
- switch (regs->d0) {
- case -ERESTARTNOHAND:
- case -ERESTARTSYS:
- case -ERESTARTNOINTR:
- regs->d0 = regs->orig_d0;
- regs->orig_d0 = -1;
- regs->pc -= 2;
- break;
- }
-}
-
static inline void push_cache (unsigned long vaddr)
{
/*
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 232a12bf3f99..2dbbb7c66043 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -567,8 +567,7 @@ static void stop_this_cpu(void *data)
{
unsigned int cpu = smp_processor_id();
- if (system_state == SYSTEM_BOOTING ||
- system_state == SYSTEM_RUNNING) {
+ if (system_state <= SYSTEM_RUNNING) {
spin_lock(&stop_lock);
pr_crit("CPU%u: stopping\n", cpu);
dump_stack();
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 85885a501dce..8e47121b8b8b 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -4,7 +4,7 @@ config MICROBLAZE
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
- select CLKSRC_OF
+ select TIMER_OF
select CLONE_BACKWARDS3
select COMMON_CLK
select GENERIC_ATOMIC64
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index dc5dd5b69fde..92fd4e95b488 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -1,8 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_AUDIT_LOGINUID_IMMUTABLE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_SYSFS_DEPRECATED=y
@@ -33,10 +31,12 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
+CONFIG_BRIDGE=m
CONFIG_MTD=y
-CONFIG_PROC_DEVICETREE=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_NETDEVICES=y
@@ -47,9 +47,9 @@ CONFIG_XILINX_LL_TEMAC=y
# CONFIG_VT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=y
CONFIG_I2C=y
@@ -66,7 +66,6 @@ CONFIG_FB=y
CONFIG_FB_XILINX=y
# CONFIG_USB_SUPPORT is not set
CONFIG_UIO=y
-CONFIG_UIO_PDRV=y
CONFIG_UIO_PDRV_GENIRQ=y
CONFIG_UIO_DMEM_GENIRQ=y
CONFIG_EXT2_FS=y
@@ -77,14 +76,13 @@ CONFIG_NFS_FS=y
CONFIG_CIFS=y
CONFIG_CIFS_STATS=y
CONFIG_CIFS_STATS2=y
-CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_INFO=y
CONFIG_KGDB=y
CONFIG_KGDB_TESTS=y
CONFIG_KGDB_KDB=y
CONFIG_EARLY_PRINTK=y
CONFIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig
index 4cdaf565e638..06d69a6e192d 100644
--- a/arch/microblaze/configs/nommu_defconfig
+++ b/arch/microblaze/configs/nommu_defconfig
@@ -1,9 +1,6 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_AUDIT_LOGINUID_IMMUTABLE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_IKCONFIG=y
@@ -34,18 +31,15 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_RAM=y
CONFIG_MTD_UCLINUX=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_NETDEVICES=y
@@ -56,9 +50,9 @@ CONFIG_XILINX_LL_TEMAC=y
# CONFIG_VT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=y
CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_XILINX_HWICAP=y
CONFIG_I2C=y
@@ -74,10 +68,6 @@ CONFIG_XILINX_WATCHDOG=y
CONFIG_FB=y
CONFIG_FB_XILINX=y
# CONFIG_USB_SUPPORT is not set
-CONFIG_UIO=y
-CONFIG_UIO_PDRV=y
-CONFIG_UIO_PDRV_GENIRQ=y
-CONFIG_UIO_DMEM_GENIRQ=y
CONFIG_EXT2_FS=y
# CONFIG_DNOTIFY is not set
CONFIG_CRAMFS=y
@@ -85,10 +75,10 @@ CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NLS=y
-CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SLAB=y
+CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_INFO=y
CONFIG_EARLY_PRINTK=y
CONFIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=y
@@ -97,4 +87,3 @@ CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 56830ff65333..83a4ef3a2495 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,14 +1,57 @@
generic-y += barrier.h
+generic-y += bitops.h
+generic-y += bitsperlong.h
+generic-y += bug.h
+generic-y += bugs.h
generic-y += clkdev.h
generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
generic-y += exec.h
generic-y += extable.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += hardirq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kprobes.h
+generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += percpu.h
+generic-y += poll.h
generic-y += preempt.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
generic-y += syscalls.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += topology.h
generic-y += trace_clock.h
+generic-y += ucontext.h
+generic-y += vga.h
generic-y += word-at-a-time.h
-generic-y += kprobes.h
+generic-y += xor.h
diff --git a/arch/microblaze/include/asm/bitops.h b/arch/microblaze/include/asm/bitops.h
deleted file mode 100644
index a72468f15c8b..000000000000
--- a/arch/microblaze/include/asm/bitops.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitops.h>
diff --git a/arch/microblaze/include/asm/bug.h b/arch/microblaze/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e9..000000000000
--- a/arch/microblaze/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bug.h>
diff --git a/arch/microblaze/include/asm/bugs.h b/arch/microblaze/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f5..000000000000
--- a/arch/microblaze/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bugs.h>
diff --git a/arch/microblaze/include/asm/div64.h b/arch/microblaze/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/microblaze/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/microblaze/include/asm/emergency-restart.h b/arch/microblaze/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50bd..000000000000
--- a/arch/microblaze/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/emergency-restart.h>
diff --git a/arch/microblaze/include/asm/fb.h b/arch/microblaze/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df45..000000000000
--- a/arch/microblaze/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fb.h>
diff --git a/arch/microblaze/include/asm/hardirq.h b/arch/microblaze/include/asm/hardirq.h
deleted file mode 100644
index fb3c05a0cbbf..000000000000
--- a/arch/microblaze/include/asm/hardirq.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/hardirq.h>
diff --git a/arch/microblaze/include/asm/irq_regs.h b/arch/microblaze/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/microblaze/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/microblaze/include/asm/kdebug.h b/arch/microblaze/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/microblaze/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/microblaze/include/asm/kmap_types.h b/arch/microblaze/include/asm/kmap_types.h
deleted file mode 100644
index 25975252d83d..000000000000
--- a/arch/microblaze/include/asm/kmap_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_MICROBLAZE_KMAP_TYPES_H
-#define _ASM_MICROBLAZE_KMAP_TYPES_H
-
-#include <asm-generic/kmap_types.h>
-
-#endif /* _ASM_MICROBLAZE_KMAP_TYPES_H */
diff --git a/arch/microblaze/include/asm/linkage.h b/arch/microblaze/include/asm/linkage.h
deleted file mode 100644
index 0540bbaad897..000000000000
--- a/arch/microblaze/include/asm/linkage.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/linkage.h>
diff --git a/arch/microblaze/include/asm/local.h b/arch/microblaze/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/microblaze/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/microblaze/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/parport.h b/arch/microblaze/include/asm/parport.h
deleted file mode 100644
index cf252af64590..000000000000
--- a/arch/microblaze/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/microblaze/include/asm/percpu.h b/arch/microblaze/include/asm/percpu.h
deleted file mode 100644
index 06a959d67234..000000000000
--- a/arch/microblaze/include/asm/percpu.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/percpu.h>
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 37ef196e4519..330d556860ba 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
extern unsigned long get_wchan(struct task_struct *p);
# define KSTK_EIP(tsk) (0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/* Return saved (kernel) PC of a blocked thread. */
-# define thread_saved_pc(tsk) \
- ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
unsigned long get_wchan(struct task_struct *p);
/* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/include/asm/serial.h b/arch/microblaze/include/asm/serial.h
deleted file mode 100644
index a0cb0caff152..000000000000
--- a/arch/microblaze/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/microblaze/include/asm/shmparam.h b/arch/microblaze/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d0..000000000000
--- a/arch/microblaze/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmparam.h>
diff --git a/arch/microblaze/include/asm/topology.h b/arch/microblaze/include/asm/topology.h
deleted file mode 100644
index 5428f333a02c..000000000000
--- a/arch/microblaze/include/asm/topology.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/topology.h>
diff --git a/arch/microblaze/include/asm/ucontext.h b/arch/microblaze/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30fb..000000000000
--- a/arch/microblaze/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 032fed71223f..9774e1d9507b 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -38,6 +38,6 @@
#endif /* __ASSEMBLY__ */
-#define __NR_syscalls 398
+#define __NR_syscalls 399
#endif /* _ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/include/asm/vga.h b/arch/microblaze/include/asm/vga.h
deleted file mode 100644
index 89d82fd8fcf1..000000000000
--- a/arch/microblaze/include/asm/vga.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/vga.h>
diff --git a/arch/microblaze/include/asm/xor.h b/arch/microblaze/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/arch/microblaze/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 2178c78c7c1a..cb6784f4a427 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,3 +2,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += types.h
+generic-y += siginfo.h
diff --git a/arch/microblaze/include/uapi/asm/bitsperlong.h b/arch/microblaze/include/uapi/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0c13b2..000000000000
--- a/arch/microblaze/include/uapi/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/microblaze/include/uapi/asm/errno.h b/arch/microblaze/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/microblaze/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/microblaze/include/uapi/asm/fcntl.h b/arch/microblaze/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/microblaze/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/microblaze/include/uapi/asm/ioctl.h b/arch/microblaze/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/microblaze/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/microblaze/include/uapi/asm/ioctls.h b/arch/microblaze/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/microblaze/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/microblaze/include/uapi/asm/ipcbuf.h b/arch/microblaze/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/microblaze/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/kvm_para.h b/arch/microblaze/include/uapi/asm/kvm_para.h
deleted file mode 100644
index 14fab8f0b957..000000000000
--- a/arch/microblaze/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/microblaze/include/uapi/asm/mman.h b/arch/microblaze/include/uapi/asm/mman.h
deleted file mode 100644
index 8eebf89f5ab1..000000000000
--- a/arch/microblaze/include/uapi/asm/mman.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
diff --git a/arch/microblaze/include/uapi/asm/msgbuf.h b/arch/microblaze/include/uapi/asm/msgbuf.h
deleted file mode 100644
index 809134c644a6..000000000000
--- a/arch/microblaze/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/msgbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/param.h b/arch/microblaze/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/microblaze/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/microblaze/include/uapi/asm/poll.h b/arch/microblaze/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/microblaze/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/microblaze/include/uapi/asm/resource.h b/arch/microblaze/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/microblaze/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/microblaze/include/uapi/asm/sembuf.h b/arch/microblaze/include/uapi/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef7..000000000000
--- a/arch/microblaze/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sembuf.h>
diff --git a/arch/microblaze/include/uapi/asm/shmbuf.h b/arch/microblaze/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de38..000000000000
--- a/arch/microblaze/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmbuf.h>
diff --git a/arch/microblaze/include/uapi/asm/siginfo.h b/arch/microblaze/include/uapi/asm/siginfo.h
deleted file mode 100644
index 0815d29d82e5..000000000000
--- a/arch/microblaze/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/siginfo.h>
diff --git a/arch/microblaze/include/uapi/asm/signal.h b/arch/microblaze/include/uapi/asm/signal.h
deleted file mode 100644
index 7b1573ce19de..000000000000
--- a/arch/microblaze/include/uapi/asm/signal.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/signal.h>
diff --git a/arch/microblaze/include/uapi/asm/socket.h b/arch/microblaze/include/uapi/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/microblaze/include/uapi/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/microblaze/include/uapi/asm/sockios.h b/arch/microblaze/include/uapi/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/microblaze/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/microblaze/include/uapi/asm/stat.h b/arch/microblaze/include/uapi/asm/stat.h
deleted file mode 100644
index 3dc90fa92c70..000000000000
--- a/arch/microblaze/include/uapi/asm/stat.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/stat.h>
diff --git a/arch/microblaze/include/uapi/asm/statfs.h b/arch/microblaze/include/uapi/asm/statfs.h
deleted file mode 100644
index 0b91fe198c20..000000000000
--- a/arch/microblaze/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/statfs.h>
diff --git a/arch/microblaze/include/uapi/asm/swab.h b/arch/microblaze/include/uapi/asm/swab.h
deleted file mode 100644
index 7847e563ab66..000000000000
--- a/arch/microblaze/include/uapi/asm/swab.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/swab.h>
diff --git a/arch/microblaze/include/uapi/asm/termbits.h b/arch/microblaze/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/microblaze/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/microblaze/include/uapi/asm/termios.h b/arch/microblaze/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/microblaze/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h
index d8086159d996..a88b3c11cc20 100644
--- a/arch/microblaze/include/uapi/asm/unistd.h
+++ b/arch/microblaze/include/uapi/asm/unistd.h
@@ -413,5 +413,6 @@
#define __NR_pkey_mprotect 395
#define __NR_pkey_alloc 396
#define __NR_pkey_free 397
+#define __NR_statx 398
#endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 12e093a03e60..e45ada8fb006 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -65,8 +65,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
continue;
- __dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
- sg->length, direction);
+ __dma_sync(sg_phys(sg), sg->length, direction);
}
return nents;
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index ef548510b951..4e1b567becd6 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -208,9 +208,7 @@ syscall_debug_table:
mfs r11, rmsr; /* save MSR */ \
swi r11, r1, PT_MSR;
-#define RESTORE_REGS \
- lwi r11, r1, PT_MSR; \
- mts rmsr , r11; \
+#define RESTORE_REGS_GP \
lwi r2, r1, PT_R2; /* restore SDA */ \
lwi r3, r1, PT_R3; \
lwi r4, r1, PT_R4; \
@@ -242,6 +240,18 @@ syscall_debug_table:
lwi r30, r1, PT_R30; \
lwi r31, r1, PT_R31; /* Restore cur task reg */
+#define RESTORE_REGS \
+ lwi r11, r1, PT_MSR; \
+ mts rmsr , r11; \
+ RESTORE_REGS_GP
+
+#define RESTORE_REGS_RTBD \
+ lwi r11, r1, PT_MSR; \
+ andni r11, r11, MSR_EIP; /* clear EIP */ \
+ ori r11, r11, MSR_EE | MSR_BIP; /* set EE and BIP */ \
+ mts rmsr , r11; \
+ RESTORE_REGS_GP
+
#define SAVE_STATE \
swi r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* save stack */ \
/* See if already in kernel mode.*/ \
@@ -427,7 +437,7 @@ C_ENTRY(ret_from_trap):
swi CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */
VM_OFF;
tophys(r1,r1);
- RESTORE_REGS;
+ RESTORE_REGS_RTBD;
addik r1, r1, PT_SIZE /* Clean up stack space. */
lwi r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
bri 6f;
@@ -436,7 +446,7 @@ C_ENTRY(ret_from_trap):
2: set_bip; /* Ints masked for state restore */
VM_OFF;
tophys(r1,r1);
- RESTORE_REGS;
+ RESTORE_REGS_RTBD;
addik r1, r1, PT_SIZE /* Clean up stack space. */
tovirt(r1,r1);
6:
@@ -612,7 +622,7 @@ C_ENTRY(ret_from_exc):
VM_OFF;
tophys(r1,r1);
- RESTORE_REGS;
+ RESTORE_REGS_RTBD;
addik r1, r1, PT_SIZE /* Clean up stack space. */
lwi r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */
@@ -621,7 +631,7 @@ C_ENTRY(ret_from_exc):
2: set_bip; /* Ints masked for state restore */
VM_OFF;
tophys(r1,r1);
- RESTORE_REGS;
+ RESTORE_REGS_RTBD;
addik r1, r1, PT_SIZE /* Clean up stack space. */
tovirt(r1,r1);
@@ -847,7 +857,7 @@ dbtrap_call: /* Return point for kernel/user entry + 8 because of rtsd r15, 8 */
VM_OFF;
tophys(r1,r1);
/* MS: Restore all regs */
- RESTORE_REGS
+ RESTORE_REGS_RTBD
addik r1, r1, PT_SIZE /* Clean up stack space */
lwi r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer */
DBTRAP_return_user: /* MS: Make global symbol for debugging */
@@ -858,7 +868,7 @@ DBTRAP_return_user: /* MS: Make global symbol for debugging */
2: VM_OFF;
tophys(r1,r1);
/* MS: Restore all regs */
- RESTORE_REGS
+ RESTORE_REGS_RTBD
lwi r14, r1, PT_R14;
lwi r16, r1, PT_PC;
addik r1, r1, PT_SIZE; /* MS: Clean up stack space */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index e92a817e645f..6527ec22f158 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct cpu_context *ctx =
- &(((struct thread_info *)(tsk->stack))->cpu_context);
-
- /* Check whether the thread is blocked in resume() */
- if (in_sched_functions(ctx->r15))
- return (unsigned long)ctx->r15;
- else
- return ctx->r14;
-}
-#endif
-
unsigned long get_wchan(struct task_struct *p)
{
/* TBD (used by procfs) */
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index f31ebb5dc26c..be98ffe28ca8 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -192,7 +192,7 @@ void __init time_init(void)
{
of_clk_init(NULL);
setup_cpuinfo_clk();
- clocksource_probe();
+ timer_probe();
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 6841c2df14d9..c48ff4ad2070 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -398,3 +398,4 @@ ENTRY(sys_call_table)
.long sys_pkey_mprotect /* 395 */
.long sys_pkey_alloc
.long sys_pkey_free
+ .long sys_statx
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 999066192715..ea2d83f1f4bb 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -178,8 +178,10 @@ static __init int xilinx_clockevent_init(void)
clockevent_xilinx_timer.shift);
clockevent_xilinx_timer.max_delta_ns =
clockevent_delta2ns((u32)~0, &clockevent_xilinx_timer);
+ clockevent_xilinx_timer.max_delta_ticks = (u32)~0;
clockevent_xilinx_timer.min_delta_ns =
clockevent_delta2ns(1, &clockevent_xilinx_timer);
+ clockevent_xilinx_timer.min_delta_ticks = 1;
clockevent_xilinx_timer.cpumask = cpumask_of(0);
clockevents_register_device(&clockevent_xilinx_timer);
@@ -333,5 +335,5 @@ static int __init xilinx_timer_init(struct device_node *timer)
return 0;
}
-CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
+TIMER_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
xilinx_timer_init);
diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c
index 2fcc5a52d84d..ed4454c5ce35 100644
--- a/arch/microblaze/mm/highmem.c
+++ b/arch/microblaze/mm/highmem.c
@@ -60,6 +60,7 @@ void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
int type;
+ unsigned int idx;
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
@@ -68,21 +69,18 @@ void __kunmap_atomic(void *kvaddr)
}
type = kmap_atomic_idx();
-#ifdef CONFIG_DEBUG_HIGHMEM
- {
- unsigned int idx;
-
- idx = type + KM_TYPE_NR * smp_processor_id();
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
- /*
- * force other mappings to Oops if they'll try to access
- * this pte without first remap it
- */
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
- local_flush_tlb_page(NULL, vaddr);
- }
+ idx = type + KM_TYPE_NR * smp_processor_id();
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
#endif
+ /*
+ * force other mappings to Oops if they'll try to access
+ * this pte without first remap it
+ */
+ pte_clear(&init_mm, vaddr, kmap_pte-idx);
+ local_flush_tlb_page(NULL, vaddr);
+
kmap_atomic_idx_pop();
pagefault_enable();
preempt_enable();
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index 4af619215410..1231b5a17b37 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -161,7 +161,7 @@ void __init plat_time_init(void)
}
}
- clocksource_probe();
+ timer_probe();
}
void __init arch_init_irq(void)
diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h
index 740219c2c894..68e19b689a00 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -91,6 +91,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
/* I hope the range from 0x5480 on is free ... */
#define TIOCSCTTY 0x5480 /* become controlling tty */
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 8d83fc2a96b7..38a302919e6b 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -11,6 +11,7 @@
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/compiler.h>
+#include <asm/irqflags.h>
#include <asm/regdef.h>
#include <asm/mipsregs.h>
#include <asm/stackframe.h>
@@ -119,6 +120,7 @@ work_pending:
andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
beqz t0, work_notifysig
work_resched:
+ TRACE_IRQS_OFF
jal schedule
local_irq_disable # make sure need_resched and
@@ -155,6 +157,7 @@ syscall_exit_work:
beqz t0, work_pending # trace bit set?
local_irq_enable # could let syscall_trace_leave()
# call schedule() instead
+ TRACE_IRQS_ON
move a0, sp
jal syscall_trace_leave
b resume_userspace
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index cf052204eb0a..d1bb506adc10 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -106,8 +106,8 @@ NESTED(kernel_entry, 16, sp) # kernel entry point
beq t0, t1, dtb_found
#endif
li t1, -2
- beq a0, t1, dtb_found
move t2, a1
+ beq a0, t1, dtb_found
li t2, 0
dtb_found:
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index 3e586daa3a32..32e3168316cd 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -58,7 +58,6 @@ void arch_jump_label_transform(struct jump_entry *e,
insn.word = 0; /* nop */
}
- get_online_cpus();
mutex_lock(&text_mutex);
if (IS_ENABLED(CONFIG_CPU_MICROMIPS)) {
insn_p->halfword[0] = insn.word >> 16;
@@ -70,7 +69,6 @@ void arch_jump_label_transform(struct jump_entry *e,
(unsigned long)insn_p + sizeof(*insn_p));
mutex_unlock(&text_mutex);
- put_online_cpus();
}
#endif /* HAVE_JUMP_LABEL */
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 5f928c34c148..d99416094ba9 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT);
* state. Actually per-core rather than per-CPU.
*/
static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
-static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
/* Indicates online CPUs coupled with the current CPU */
static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
@@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu)
{
enum cps_pm_state state;
unsigned core = cpu_data[cpu].core;
- unsigned dlinesz = cpu_data[cpu].dcache.linesz;
void *entry_fn, *core_rc;
for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
@@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu)
}
if (!per_cpu(ready_count, core)) {
- core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+ core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
if (!core_rc) {
pr_err("Failed allocate core %u ready_count\n", core);
return -ENOMEM;
}
- per_cpu(ready_count_alloc, core) = core_rc;
-
- /* Ensure ready_count is aligned to a cacheline boundary */
- core_rc += dlinesz - 1;
- core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
per_cpu(ready_count, core) = core_rc;
}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9681b5877140..38dfa27730ff 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
{
struct pt_regs regs;
mm_segment_t old_fs = get_fs();
+
+ regs.cp0_status = KSU_KERNEL;
if (sp) {
regs.regs[29] = (unsigned long)sp;
regs.regs[31] = 0;
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 7c6336dd2638..7cd92166a0b9 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
- int idx_user, idx_kernel;
+ /*
+ * Initialize idx_user and idx_kernel to workaround bogus
+ * maybe-initialized warning when using GCC 6.
+ */
+ int idx_user = 0, idx_kernel = 0;
unsigned long flags, old_entryhi;
local_irq_save(flags);
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 4a2d03c72959..caa62f20a888 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
return ieee754dp_nanxcpt(z);
case IEEE754_CLASS_DNORM:
DPDNORMZ;
- /* QNAN is handled separately below */
+ /* QNAN and ZERO cases are handled separately below */
}
switch (CLPAIR(xc, yc)) {
@@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
}
assert(rm & (DP_HIDDEN_BIT << 3));
+ if (zc == IEEE754_CLASS_ZERO)
+ return ieee754dp_format(rs, re, rm);
+
/* And now the addition */
assert(zm & DP_HIDDEN_BIT);
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index a8cd8b4f235e..c91d5e5d9b5f 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
return ieee754sp_nanxcpt(z);
case IEEE754_CLASS_DNORM:
SPDNORMZ;
- /* QNAN is handled separately below */
+ /* QNAN and ZERO cases are handled separately below */
}
switch (CLPAIR(xc, yc)) {
@@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
}
assert(rm & (SP_HIDDEN_BIT << 3));
+ if (zc == IEEE754_CLASS_ZERO)
+ return ieee754sp_format(rs, re, rm);
+
/* And now the addition */
assert(zm & SP_HIDDEN_BIT);
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index fe8df14b6169..e08598c70b3e 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev,
* systems and only the R10000 and R12000 are used in such systems, the
* SGI IP28 Indigo² rsp. SGI IP32 aka O2.
*/
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
{
- return !plat_device_is_coherent(dev) &&
- (boot_cpu_type() == CPU_R10000 ||
- boot_cpu_type() == CPU_R12000 ||
- boot_cpu_type() == CPU_BMIPS5000);
+ if (plat_device_is_coherent(dev))
+ return false;
+
+ switch (boot_cpu_type()) {
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_BMIPS5000:
+ return true;
+
+ default:
+ /*
+ * Presence of MAARs suggests that the CPU supports
+ * speculatively prefetching data, and therefore requires
+ * the post-DMA flush/invalidate.
+ */
+ return cpu_has_maar;
+ }
}
static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 289edcfadd7c..cea4ec909806 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -265,7 +265,7 @@ void __init plat_time_init(void)
(freq%1000000)*100/1000000);
#ifdef CONFIG_CLKSRC_MIPS_GIC
update_gic_frequency_dt();
- clocksource_probe();
+ timer_probe();
#endif
}
#endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
index 62a0a78b6c64..1894e50939b5 100644
--- a/arch/mips/pic32/pic32mzda/time.c
+++ b/arch/mips/pic32/pic32mzda/time.c
@@ -64,5 +64,5 @@ void __init plat_time_init(void)
pr_info("CPU Clock: %ldMHz\n", rate / 1000000);
mips_hpt_frequency = rate / 2;
- clocksource_probe();
+ timer_probe();
}
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 1022201b2beb..17a0f1dec05b 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -39,7 +39,7 @@ void __init plat_time_init(void)
struct clk *clk;
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
np = of_get_cpu_node(0, NULL);
if (!np) {
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 9825dee10bc1..710b04cf4851 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -4,7 +4,7 @@ config CLKEVT_RT3352
bool
depends on SOC_RT305X || SOC_MT7620
default y
- select CLKSRC_OF
+ select TIMER_OF
select CLKSRC_MMIO
config RALINK_ILL_ACC
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index b8a1376165b0..92f284d2b802 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -152,4 +152,4 @@ static int __init ralink_systick_init(struct device_node *np)
return 0;
}
-CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
+TIMER_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c
index df795885eace..eb1c61917eb7 100644
--- a/arch/mips/ralink/clk.c
+++ b/arch/mips/ralink/clk.c
@@ -82,5 +82,5 @@ void __init plat_time_init(void)
pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
mips_hpt_frequency = clk_get_rate(clk) / 2;
clk_put(clk);
- clocksource_probe();
+ timer_probe();
}
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
index 069771dbec42..b5f07d21fcf2 100644
--- a/arch/mips/ralink/timer-gic.c
+++ b/arch/mips/ralink/timer-gic.c
@@ -20,5 +20,5 @@ void __init plat_time_init(void)
ralink_of_remap();
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
}
diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index cdf187600010..b225033aade6 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -169,6 +169,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
}
+static DEVICE_ATTR_RO(modalias);
static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -178,6 +179,7 @@ static ssize_t name_show(struct device *dev,
giodev = to_gio_device(dev);
return sprintf(buf, "%s", giodev->name);
}
+static DEVICE_ATTR_RO(name);
static ssize_t id_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -187,13 +189,15 @@ static ssize_t id_show(struct device *dev,
giodev = to_gio_device(dev);
return sprintf(buf, "%x", giodev->id.id);
}
+static DEVICE_ATTR_RO(id);
-static struct device_attribute gio_dev_attrs[] = {
- __ATTR_RO(modalias),
- __ATTR_RO(name),
- __ATTR_RO(id),
- __ATTR_NULL,
+static struct attribute *gio_dev_attrs[] = {
+ &dev_attr_modalias.attr,
+ &dev_attr_name.attr,
+ &dev_attr_id.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(gio_dev);
static int gio_device_uevent(struct device *dev, struct kobj_uevent_env *env)
{
@@ -374,7 +378,7 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq)
static struct bus_type gio_bus_type = {
.name = "gio",
- .dev_attrs = gio_dev_attrs,
+ .dev_groups = gio_dev_groups,
.match = gio_bus_match,
.probe = gio_device_probe,
.remove = gio_device_remove,
diff --git a/arch/mips/xilfpga/time.c b/arch/mips/xilfpga/time.c
index cbb3fca7b6fa..36f3f1870ee2 100644
--- a/arch/mips/xilfpga/time.c
+++ b/arch/mips/xilfpga/time.c
@@ -22,7 +22,7 @@ void __init plat_time_init(void)
struct clk *clk;
of_clk_init(NULL);
- clocksource_probe();
+ timer_probe();
np = of_get_cpu_node(0, NULL);
if (!np) {
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index 18e17abf7664..3ae479117b42 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index b15bf6bc0e94..c94ee54210bc 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/mn10300/include/uapi/asm/siginfo.h b/arch/mn10300/include/uapi/asm/siginfo.h
deleted file mode 100644
index 0815d29d82e5..000000000000
--- a/arch/mn10300/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/siginfo.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index c9fa42619c6a..89e8027e07fb 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -40,14 +40,6 @@
#include "internal.h"
/*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
* power off function, if any
*/
void (*pm_power_off)(void);
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index a72d5f0de692..c587764b9c5a 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -1,6 +1,6 @@
config NIOS2
def_bool y
- select CLKSRC_OF
+ select TIMER_OF
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 727dbb333f60..e1a843def56f 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -47,7 +47,6 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
-generic-y += siginfo.h
generic-y += signal.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 3bbbc3d798e5..4944e2e1d8b0 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea)
-
extern unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(p) \
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 374bd123329f..51eff5bc2eb4 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -2,4 +2,5 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += setup.h
+generic-y += siginfo.h
generic-y += ucontext.h
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 6e2bdc9b8530..645129aaa9a0 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -350,7 +350,7 @@ void __init time_init(void)
if (count < 2)
panic("%d timer is found, it needs 2 timers in system\n", count);
- clocksource_probe();
+ timer_probe();
}
-CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
+TIMER_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index fdbcf0bf44a4..091585addb91 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -46,7 +46,6 @@ generic-y += sembuf.h
generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += signal.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index a908e6c30a00..396d8f306c21 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
#define init_stack (init_thread_union.stack)
#define cpu_relax() barrier()
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 106859ae27ff..f9b77003f113 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return (unsigned long)user_regs(t->stack)->pc;
-}
-
void release_thread(struct task_struct *dead_task)
{
}
diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 5404c6a726b2..9a2a8956a695 100644
--- a/arch/parisc/include/asm/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
@@ -20,6 +20,8 @@
** flush/purge and allocate "regular" cacheable pages for everything.
*/
+#define DMA_ERROR_CODE (~(dma_addr_t)0)
+
#ifdef CONFIG_PA11
extern const struct dma_map_ops pcxl_dma_ops;
extern const struct dma_map_ops pcx_dma_ops;
@@ -54,12 +56,13 @@ parisc_walk_tree(struct device *dev)
break;
}
}
- BUG_ON(!dev->platform_data);
return dev->platform_data;
}
-
-#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu)
-
+
+#define GET_IOC(dev) ({ \
+ void *__pdata = parisc_walk_tree(dev); \
+ __pdata ? HBA_DATA(__pdata)->iommu : NULL; \
+})
#ifdef CONFIG_IOMMU_CCIO
struct parisc_device;
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 1a16f1d1075f..af98254f7257 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -34,10 +34,10 @@ static inline unsigned char gsc_readb(unsigned long addr)
unsigned char ret;
__asm__ __volatile__(
- " rsm 2,%0\n"
+ " rsm %3,%0\n"
" ldbx 0(%2),%1\n"
" mtsm %0\n"
- : "=&r" (flags), "=r" (ret) : "r" (addr) );
+ : "=&r" (flags), "=r" (ret) : "r" (addr), "i" (PSW_SM_D) );
return ret;
}
@@ -48,10 +48,10 @@ static inline unsigned short gsc_readw(unsigned long addr)
unsigned short ret;
__asm__ __volatile__(
- " rsm 2,%0\n"
+ " rsm %3,%0\n"
" ldhx 0(%2),%1\n"
" mtsm %0\n"
- : "=&r" (flags), "=r" (ret) : "r" (addr) );
+ : "=&r" (flags), "=r" (ret) : "r" (addr), "i" (PSW_SM_D) );
return ret;
}
@@ -87,20 +87,20 @@ static inline void gsc_writeb(unsigned char val, unsigned long addr)
{
long flags;
__asm__ __volatile__(
- " rsm 2,%0\n"
+ " rsm %3,%0\n"
" stbs %1,0(%2)\n"
" mtsm %0\n"
- : "=&r" (flags) : "r" (val), "r" (addr) );
+ : "=&r" (flags) : "r" (val), "r" (addr), "i" (PSW_SM_D) );
}
static inline void gsc_writew(unsigned short val, unsigned long addr)
{
long flags;
__asm__ __volatile__(
- " rsm 2,%0\n"
+ " rsm %3,%0\n"
" sths %1,0(%2)\n"
" mtsm %0\n"
- : "=&r" (flags) : "r" (val), "r" (addr) );
+ : "=&r" (flags) : "r" (val), "r" (addr), "i" (PSW_SM_D) );
}
static inline void gsc_writel(unsigned int val, unsigned long addr)
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 59be25764433..a81226257878 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -49,15 +49,26 @@ static inline void load_context(mm_context_t context)
mtctl(__space_to_prot(context), 8);
}
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+static inline void switch_mm_irqs_off(struct mm_struct *prev,
+ struct mm_struct *next, struct task_struct *tsk)
{
-
if (prev != next) {
mtctl(__pa(next->pgd), 25);
load_context(next->context);
}
}
+static inline void switch_mm(struct mm_struct *prev,
+ struct mm_struct *next, struct task_struct *tsk)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm_irqs_off(prev, next, tsk);
+ local_irq_restore(flags);
+}
+#define switch_mm_irqs_off switch_mm_irqs_off
+
#define deactivate_mm(tsk,mm) do { } while (0)
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 451906d78136..7569627a032b 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -6,6 +6,8 @@
#if !defined(__ASSEMBLY__)
extern int pdc_type;
+extern unsigned long parisc_cell_num; /* cell number the CPU runs on (PAT) */
+extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT) */
/* Values for pdc_type */
#define PDC_TYPE_ILLEGAL -1
@@ -143,6 +145,18 @@ struct pdc_btlb_info { /* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */
#endif /* !CONFIG_PA20 */
+struct pdc_mem_retinfo { /* PDC_MEM/PDC_MEM_MEMINFO (return info) */
+ unsigned long pdt_size;
+ unsigned long pdt_entries;
+ unsigned long pdt_status;
+ unsigned long first_dbe_loc;
+ unsigned long good_mem;
+};
+
+struct pdc_mem_read_pdt { /* PDC_MEM/PDC_MEM_READ_PDT (return info) */
+ unsigned long pdt_entries;
+};
+
#ifdef CONFIG_64BIT
struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */
unsigned long entries_returned;
@@ -301,6 +315,10 @@ int pdc_get_initiator(struct hardware_path *, struct pdc_initiator *);
int pdc_tod_read(struct pdc_tod *tod);
int pdc_tod_set(unsigned long sec, unsigned long usec);
+void pdc_pdt_init(void); /* in pdt.c */
+int pdc_mem_pdt_info(struct pdc_mem_retinfo *rinfo);
+int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *rpdt_read,
+ unsigned long *pdt_entries_ptr);
#ifdef CONFIG_64BIT
int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr,
struct pdc_memory_table *tbl, unsigned long entries);
diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h
index e1d289092705..32e105fb8adb 100644
--- a/arch/parisc/include/asm/pdcpat.h
+++ b/arch/parisc/include/asm/pdcpat.h
@@ -147,9 +147,9 @@
#define PDC_PAT_MEM_CELL_CLEAR 6L /* Clear PDT For Cell */
#define PDC_PAT_MEM_CELL_READ 7L /* Read PDT entries For Cell */
#define PDC_PAT_MEM_CELL_RESET 8L /* Reset clear bit For Cell */
-#define PDC_PAT_MEM_SETGM 9L /* Set Golden Memory value */
-#define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */
-#define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */
+#define PDC_PAT_MEM_SETGM 9L /* Set Good Memory value */
+#define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */
+#define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */
/* Memory Address */
#define PDC_PAT_MEM_GET_TXT_SIZE 12L /* Get Formatted Text Size */
#define PDC_PAT_MEM_GET_PD_TXT 13L /* Get PD Formatted Text */
@@ -212,6 +212,23 @@ struct pdc_pat_cpu_num {
unsigned long cpu_loc;
};
+struct pdc_pat_mem_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_INFO (return info) */
+ unsigned int ke; /* bit 0: memory inside good memory? */
+ unsigned int current_pdt_entries:16;
+ unsigned int max_pdt_entries:16;
+ unsigned long Cs_bitmap;
+ unsigned long Ic_bitmap;
+ unsigned long good_mem;
+ unsigned long first_dbe_loc; /* first location of double bit error */
+ unsigned long clear_time; /* last PDT clear time (since Jan 1970) */
+};
+
+struct pdc_pat_mem_read_pd_retinfo { /* PDC_PAT_MEM/PDC_PAT_MEM_PD_READ */
+ unsigned long actual_count_bytes;
+ unsigned long pdt_entries;
+};
+
+
struct pdc_pat_pd_addr_map_entry {
unsigned char entry_type; /* 1 = Memory Descriptor Entry Type */
unsigned char reserve1[5];
@@ -293,15 +310,15 @@ extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, unsigned lon
extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset);
-
extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val);
extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val);
-
-/* Flag to indicate this is a PAT box...don't use this unless you
-** really have to...it might go away some day.
-*/
-extern int pdc_pat; /* arch/parisc/kernel/inventory.c */
+extern int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo);
+extern int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
+ unsigned long *pdt_entries_ptr, unsigned long max_entries);
+extern int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
+ unsigned long *pdt_entries_ptr, unsigned long count,
+ unsigned long offset);
#endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 3a4ed9f91d57..71ca86cb0f16 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -511,6 +511,9 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define pte_same(A,B) (pte_val(A) == pte_val(B))
+struct seq_file;
+extern void arch_report_meminfo(struct seq_file *m);
+
#endif /* !__ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a3661ee6b060..b3b66c3d6f3c 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -103,6 +103,8 @@ struct cpuinfo_parisc {
unsigned long bh_count; /* number of times bh was invoked */
unsigned long fp_rev;
unsigned long fp_model;
+ unsigned long cpu_num; /* CPU number from PAT firmware */
+ unsigned long cpu_loc; /* CPU location from PAT firmware */
unsigned int state;
struct parisc_device *dev;
unsigned long loops_per_jiffy;
@@ -163,12 +165,7 @@ struct thread_struct {
.flags = 0 \
}
-/*
- * Return saved PC of a blocked thread. This is used by ps mostly.
- */
-
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
void show_trace(struct task_struct *task, unsigned long *stack);
/*
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 6b113f39f30c..c3e114f67485 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -69,17 +69,6 @@ struct exception_table_entry {
ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
/*
- * The page fault handler stores, in a per-cpu area, the following information
- * if a fixup routine is available.
- */
-struct exception_data {
- unsigned long fault_ip;
- unsigned long fault_gp;
- unsigned long fault_space;
- unsigned long fault_addr;
-};
-
-/*
* load_sr2() preloads the space register %%sr2 - based on the value of
* get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which
* is 0), or with the current value of %%sr3 to access user space (USER_DS)
diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h
index b6572f051b67..674c68a5bbd0 100644
--- a/arch/parisc/include/uapi/asm/ioctls.h
+++ b/arch/parisc/include/uapi/asm/ioctls.h
@@ -60,6 +60,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define FIONCLEX 0x5450 /* these numbers need to be adjusted. */
#define FIOCLEX 0x5451
diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h
index 0609ff117f67..1f30b49772aa 100644
--- a/arch/parisc/include/uapi/asm/pdc.h
+++ b/arch/parisc/include/uapi/asm/pdc.h
@@ -131,12 +131,12 @@
#define PDC_TLB_SETUP 1 /* set up miss handling */
#define PDC_MEM 20 /* Manage memory */
-#define PDC_MEM_MEMINFO 0
-#define PDC_MEM_ADD_PAGE 1
-#define PDC_MEM_CLEAR_PDT 2
-#define PDC_MEM_READ_PDT 3
-#define PDC_MEM_RESET_CLEAR 4
-#define PDC_MEM_GOODMEM 5
+#define PDC_MEM_MEMINFO 0 /* Return PDT info */
+#define PDC_MEM_ADD_PAGE 1 /* Add page to PDT */
+#define PDC_MEM_CLEAR_PDT 2 /* Clear PDT */
+#define PDC_MEM_READ_PDT 3 /* Read PDT entry */
+#define PDC_MEM_RESET_CLEAR 4 /* Reset PDT clear flag */
+#define PDC_MEM_GOODMEM 5 /* Set good_mem value */
#define PDC_MEM_TABLE 128 /* Non contig mem map (sprockets) */
#define PDC_MEM_RETURN_ADDRESS_TABLE PDC_MEM_TABLE
#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES_SIZE 131
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 69a11183d48d..c4294df69fb6 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -4,7 +4,7 @@
extra-y := head.o vmlinux.lds
-obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \
+obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
ptrace.o hardware.o inventory.o drivers.o \
signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 1c4fe61a592b..dfff8a0d6fd1 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -298,11 +298,6 @@ int main(void)
DEFINE(HUGEPAGE_SIZE, PAGE_SIZE);
#endif
BLANK();
- DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
- DEFINE(EXCDATA_GP, offsetof(struct exception_data, fault_gp));
- DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
- DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
- BLANK();
DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long));
BLANK();
return 0;
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index fa78419100c8..d8f77358e2ba 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -575,7 +575,8 @@ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, ch
{ \
struct parisc_device *padev = to_parisc_device(dev); \
return sprintf(buf, format_string, padev->field); \
-}
+} \
+static DEVICE_ATTR_RO(name);
#define pa_dev_attr_id(field, format) pa_dev_attr(field, id.field, format)
@@ -589,22 +590,24 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
{
return make_modalias(dev, buf);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute parisc_device_attrs[] = {
- __ATTR_RO(irq),
- __ATTR_RO(hw_type),
- __ATTR_RO(rev),
- __ATTR_RO(hversion),
- __ATTR_RO(sversion),
- __ATTR_RO(modalias),
- __ATTR_NULL,
+static struct attribute *parisc_device_attrs[] = {
+ &dev_attr_irq.attr,
+ &dev_attr_hw_type.attr,
+ &dev_attr_rev.attr,
+ &dev_attr_hversion.attr,
+ &dev_attr_sversion.attr,
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(parisc_device);
struct bus_type parisc_bus_type = {
.name = "parisc",
.match = parisc_generic_match,
.uevent = parisc_uevent,
- .dev_attrs = parisc_device_attrs,
+ .dev_groups = parisc_device_groups,
.probe = parisc_driver_probe,
.remove = parisc_driver_remove,
};
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 9d797ae4fa22..98190252c12f 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -957,6 +957,41 @@ int pdc_tod_read(struct pdc_tod *tod)
}
EXPORT_SYMBOL(pdc_tod_read);
+int pdc_mem_pdt_info(struct pdc_mem_retinfo *rinfo)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ retval = mem_pdc_call(PDC_MEM, PDC_MEM_MEMINFO, __pa(pdc_result), 0);
+ convert_to_wide(pdc_result);
+ memcpy(rinfo, pdc_result, sizeof(*rinfo));
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+ return retval;
+}
+
+int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *pret,
+ unsigned long *pdt_entries_ptr)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ retval = mem_pdc_call(PDC_MEM, PDC_MEM_READ_PDT, __pa(pdc_result),
+ __pa(pdc_result2));
+ if (retval == PDC_OK) {
+ convert_to_wide(pdc_result);
+ memcpy(pret, pdc_result, sizeof(*pret));
+ convert_to_wide(pdc_result2);
+ memcpy(pdt_entries_ptr, pdc_result2,
+ pret->pdt_entries * sizeof(*pdt_entries_ptr));
+ }
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+ return retval;
+}
+
/**
* pdc_tod_set - Set the Time-Of-Day clock.
* @sec: The number of seconds since epoch.
@@ -1383,6 +1418,79 @@ int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val)
return retval;
}
+
+/**
+ * pdc_pat_mem_pdc_info - Retrieve information about page deallocation table
+ * @rinfo: memory pdt information
+ *
+ */
+int pdc_pat_mem_pdt_info(struct pdc_pat_mem_retinfo *rinfo)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_INFO,
+ __pa(&pdc_result));
+ if (retval == PDC_OK)
+ memcpy(rinfo, &pdc_result, sizeof(*rinfo));
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+ return retval;
+}
+
+/**
+ * pdc_pat_mem_read_cell_pdt - Read PDT entries from (old) PAT firmware
+ * @pret: array of PDT entries
+ * @pdt_entries_ptr: ptr to hold number of PDT entries
+ * @max_entries: maximum number of entries to be read
+ *
+ */
+int pdc_pat_mem_read_cell_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
+ unsigned long *pdt_entries_ptr, unsigned long max_entries)
+{
+ int retval;
+ unsigned long flags, entries;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ /* PDC_PAT_MEM_CELL_READ is available on early PAT machines only */
+ retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_CELL_READ,
+ __pa(&pdc_result), parisc_cell_num, __pa(&pdc_result2));
+
+ if (retval == PDC_OK) {
+ /* build up return value as for PDC_PAT_MEM_PD_READ */
+ entries = min(pdc_result[0], max_entries);
+ pret->pdt_entries = entries;
+ pret->actual_count_bytes = entries * sizeof(unsigned long);
+ memcpy(pdt_entries_ptr, &pdc_result2, pret->actual_count_bytes);
+ }
+
+ spin_unlock_irqrestore(&pdc_lock, flags);
+ WARN_ON(retval == PDC_OK && pdc_result[0] > max_entries);
+
+ return retval;
+}
+/**
+ * pdc_pat_mem_read_pd_pdt - Read PDT entries from (newer) PAT firmware
+ * @pret: array of PDT entries
+ * @pdt_entries_ptr: ptr to hold number of PDT entries
+ *
+ */
+int pdc_pat_mem_read_pd_pdt(struct pdc_pat_mem_read_pd_retinfo *pret,
+ unsigned long *pdt_entries_ptr, unsigned long count,
+ unsigned long offset)
+{
+ int retval;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pdc_lock, flags);
+ retval = mem_pdc_call(PDC_PAT_MEM, PDC_PAT_MEM_PD_READ,
+ __pa(&pret), __pa(pdt_entries_ptr),
+ count, offset);
+ spin_unlock_irqrestore(&pdc_lock, flags);
+
+ return retval;
+}
#endif /* CONFIG_64BIT */
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index 0fbd0a0e1cda..e3a8e5e4d5de 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -44,6 +44,7 @@
#include <asm/assembly.h>
#include <asm/pdc.h>
+#include <asm/psw.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -135,7 +136,7 @@ ENTRY_CFI(os_hpmc)
* So turn on the Q bit and turn off the M bit.
*/
- ldo 8(%r0),%r4 /* PSW Q on, PSW M off */
+ ldi PSW_SM_Q,%r4 /* PSW Q on, PSW M off */
mtctl %r4,ipsw
mtctl %r0,pcsq
mtctl %r0,pcsq
@@ -257,7 +258,7 @@ os_hpmc_5:
tovirt_r1 %r30 /* make sp virtual */
- rsm 8,%r0 /* Clear Q bit */
+ rsm PSW_SM_Q,%r0 /* Clear Q bit */
ldi 1,%r8 /* Set trap code to "1" for HPMC */
load32 PA(intr_save),%r1
be 0(%sr7,%r1)
diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c
index c9789d9c73b4..b0fe19ac4d78 100644
--- a/arch/parisc/kernel/inventory.c
+++ b/arch/parisc/kernel/inventory.c
@@ -40,6 +40,11 @@
int pdc_type __read_mostly = PDC_TYPE_ILLEGAL;
+/* cell number and location (PAT firmware only) */
+unsigned long parisc_cell_num __read_mostly;
+unsigned long parisc_cell_loc __read_mostly;
+
+
void __init setup_pdc(void)
{
long status;
@@ -78,6 +83,10 @@ void __init setup_pdc(void)
if (status == PDC_OK) {
pdc_type = PDC_TYPE_PAT;
pr_cont("64 bit PAT.\n");
+ parisc_cell_num = cell_info.cell_num;
+ parisc_cell_loc = cell_info.cell_loc;
+ pr_info("PAT: Running on cell %lu and location %lu.\n",
+ parisc_cell_num, parisc_cell_loc);
return;
}
#endif
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
new file mode 100644
index 000000000000..f3a797e670b0
--- /dev/null
+++ b/arch/parisc/kernel/pdt.c
@@ -0,0 +1,143 @@
+/*
+ * Page Deallocation Table (PDT) support
+ *
+ * The Page Deallocation Table (PDT) holds a table with pointers to bad
+ * memory (broken RAM modules) which is maintained by firmware.
+ *
+ * Copyright 2017 by Helge Deller <deller@gmx.de>
+ *
+ * TODO:
+ * - check regularily for new bad memory
+ * - add userspace interface with procfs or sysfs
+ * - increase number of PDT entries dynamically
+ */
+
+#include <linux/memblock.h>
+#include <linux/seq_file.h>
+
+#include <asm/pdc.h>
+#include <asm/pdcpat.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+
+enum pdt_access_type {
+ PDT_NONE,
+ PDT_PDC,
+ PDT_PAT_NEW,
+ PDT_PAT_OLD
+};
+
+static enum pdt_access_type pdt_type;
+
+/* global PDT status information */
+static struct pdc_mem_retinfo pdt_status;
+
+#define MAX_PDT_TABLE_SIZE PAGE_SIZE
+#define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
+static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
+
+
+/* report PDT entries via /proc/meminfo */
+void arch_report_meminfo(struct seq_file *m)
+{
+ if (pdt_type == PDT_NONE)
+ return;
+
+ seq_printf(m, "PDT_max_entries: %7lu\n",
+ pdt_status.pdt_size);
+ seq_printf(m, "PDT_cur_entries: %7lu\n",
+ pdt_status.pdt_entries);
+}
+
+/*
+ * pdc_pdt_init()
+ *
+ * Initialize kernel PDT structures, read initial PDT table from firmware,
+ * report all current PDT entries and mark bad memory with memblock_reserve()
+ * to avoid that the kernel will use broken memory areas.
+ *
+ */
+void __init pdc_pdt_init(void)
+{
+ int ret, i;
+ unsigned long entries;
+ struct pdc_mem_read_pdt pdt_read_ret;
+
+ if (is_pdc_pat()) {
+ struct pdc_pat_mem_retinfo pat_rinfo;
+
+ pdt_type = PDT_PAT_NEW;
+ ret = pdc_pat_mem_pdt_info(&pat_rinfo);
+ pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
+ pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
+ pdt_status.pdt_status = 0;
+ pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
+ pdt_status.good_mem = pat_rinfo.good_mem;
+ } else {
+ pdt_type = PDT_PDC;
+ ret = pdc_mem_pdt_info(&pdt_status);
+ }
+
+ if (ret != PDC_OK) {
+ pdt_type = PDT_NONE;
+ pr_info("PDT: Firmware does not provide any page deallocation"
+ " information.\n");
+ return;
+ }
+
+ entries = pdt_status.pdt_entries;
+ WARN_ON(entries > MAX_PDT_ENTRIES);
+
+ pr_info("PDT: size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
+ " good_mem %lu\n",
+ pdt_status.pdt_size, pdt_status.pdt_entries,
+ pdt_status.pdt_status, pdt_status.first_dbe_loc,
+ pdt_status.good_mem);
+
+ if (entries == 0) {
+ pr_info("PDT: Firmware reports all memory OK.\n");
+ return;
+ }
+
+ if (pdt_status.first_dbe_loc &&
+ pdt_status.first_dbe_loc <= __pa((unsigned long)&_end))
+ pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n");
+
+ pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n",
+ entries);
+
+ if (pdt_type == PDT_PDC)
+ ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry);
+ else {
+#ifdef CONFIG_64BIT
+ struct pdc_pat_mem_read_pd_retinfo pat_pret;
+
+ ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
+ MAX_PDT_ENTRIES);
+ if (ret != PDC_OK) {
+ pdt_type = PDT_PAT_OLD;
+ ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
+ MAX_PDT_TABLE_SIZE, 0);
+ }
+#else
+ ret = PDC_BAD_PROC;
+#endif
+ }
+
+ if (ret != PDC_OK) {
+ pdt_type = PDT_NONE;
+ pr_debug("PDT type %d, retval = %d\n", pdt_type, ret);
+ return;
+ }
+
+ for (i = 0; i < pdt_status.pdt_entries; i++) {
+ if (i < 20)
+ pr_warn("PDT: BAD PAGE #%d at 0x%08lx (error_type = %lu)\n",
+ i,
+ pdt_entry[i] & PAGE_MASK,
+ pdt_entry[i] & 1);
+
+ /* mark memory page bad */
+ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
+ }
+}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4516a5b53f38..b64d7d21646e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- return t->thread.regs.kpc;
-}
-
unsigned long
get_wchan(struct task_struct *p)
{
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 85de47f4eb59..0ab32779dfa7 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -94,7 +94,7 @@ static int processor_probe(struct parisc_device *dev)
unsigned long txn_addr;
unsigned long cpuid;
struct cpuinfo_parisc *p;
- struct pdc_pat_cpu_num cpu_info __maybe_unused;
+ struct pdc_pat_cpu_num cpu_info = { };
#ifdef CONFIG_SMP
if (num_online_cpus() >= nr_cpu_ids) {
@@ -113,6 +113,7 @@ static int processor_probe(struct parisc_device *dev)
*/
cpuid = boot_cpu_data.cpu_count;
txn_addr = dev->hpa.start; /* for legacy PDC */
+ cpu_info.cpu_num = cpu_info.cpu_loc = cpuid;
#ifdef CONFIG_64BIT
if (is_pdc_pat()) {
@@ -180,6 +181,8 @@ static int processor_probe(struct parisc_device *dev)
p->hpa = dev->hpa.start; /* save CPU hpa */
p->cpuid = cpuid; /* save CPU id */
p->txn_addr = txn_addr; /* save CPU IRQ address */
+ p->cpu_num = cpu_info.cpu_num;
+ p->cpu_loc = cpu_info.cpu_loc;
#ifdef CONFIG_SMP
/*
** FIXME: review if any other initialization is clobbered
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 44aeaa9c039f..6308749359e4 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -361,7 +361,7 @@
ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
ENTRY_SAME(add_key)
ENTRY_SAME(request_key) /* 265 */
- ENTRY_SAME(keyctl)
+ ENTRY_COMP(keyctl)
ENTRY_SAME(ioprio_set)
ENTRY_SAME(ioprio_get)
ENTRY_SAME(inotify_init)
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 89421df70160..2d956aa0a38a 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -243,14 +243,30 @@ void __init time_init(void)
static int __init init_cr16_clocksource(void)
{
/*
- * The cr16 interval timers are not syncronized across CPUs, so mark
- * them unstable and lower rating on SMP systems.
+ * The cr16 interval timers are not syncronized across CPUs on
+ * different sockets, so mark them unstable and lower rating on
+ * multi-socket SMP systems.
*/
if (num_online_cpus() > 1) {
- clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
- clocksource_cr16.rating = 0;
+ int cpu;
+ unsigned long cpu0_loc;
+ cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
+
+ for_each_online_cpu(cpu) {
+ if (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)
+ continue;
+
+ clocksource_cr16.name = "cr16_unstable";
+ clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
+ clocksource_cr16.rating = 0;
+ break;
+ }
}
+ /* XXX: We may want to mark sched_clock stable here if cr16 clocks are
+ * in sync:
+ * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */
+
/* register at clocksource framework */
clocksource_register_hz(&clocksource_cr16,
100 * PAGE0->mem_10msec);
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 85c28bb80fb7..d4fe19806d57 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -56,12 +56,6 @@
mtsp %r1,%sr1
.endm
- .macro fixup_branch lbl
- ldil L%\lbl, %r1
- ldo R%\lbl(%r1), %r1
- bv %r0(%r1)
- .endm
-
/*
* unsigned long lclear_user(void *to, unsigned long n)
*
@@ -82,16 +76,16 @@ $lclu_loop:
$lclu_done:
bv %r0(%r2)
copy %r25,%r28
- .exit
-ENDPROC_CFI(lclear_user)
- .section .fixup,"ax"
-2: fixup_branch $lclu_done
- ldo 1(%r25),%r25
- .previous
+2: b $lclu_done
+ ldo 1(%r25),%r25
ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
+ .exit
+ENDPROC_CFI(lclear_user)
+
+
.procend
/*
@@ -122,16 +116,15 @@ $lslen_done:
$lslen_nzero:
b $lslen_done
ldo 1(%r26),%r26 /* special case for N == 0 */
-ENDPROC_CFI(lstrnlen_user)
- .section .fixup,"ax"
-3: fixup_branch $lslen_done
+3: b $lslen_done
copy %r24,%r26 /* reset r26 so 0 is returned on fault */
- .previous
ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
+ENDPROC_CFI(lstrnlen_user)
+
.procend
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 32ec22146141..5b101f6a5607 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -29,8 +29,6 @@
#define BITSSET 0x1c0 /* for identifying LDCW */
-DEFINE_PER_CPU(struct exception_data, exception_data);
-
int show_unhandled_signals = 1;
/*
@@ -143,13 +141,6 @@ int fixup_exception(struct pt_regs *regs)
fix = search_exception_tables(regs->iaoq[0]);
if (fix) {
- struct exception_data *d;
- d = this_cpu_ptr(&exception_data);
- d->fault_ip = regs->iaoq[0];
- d->fault_gp = regs->gr[27];
- d->fault_space = regs->isr;
- d->fault_addr = regs->ior;
-
/*
* Fix up get_user() and put_user().
* ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
@@ -163,6 +154,7 @@ int fixup_exception(struct pt_regs *regs)
/* zero target register for get_user() */
if (parisc_acctyp(0, regs->iir) == VM_READ) {
int treg = regs->iir & 0x1f;
+ BUG_ON(treg == 0);
regs->gr[treg] = 0;
}
}
@@ -367,7 +359,7 @@ bad_area:
case 15: /* Data TLB miss fault/Data page fault */
/* send SIGSEGV when outside of vma */
if (!vma ||
- address < vma->vm_start || address > vma->vm_end) {
+ address < vma->vm_start || address >= vma->vm_end) {
si.si_signo = SIGSEGV;
si.si_code = SEGV_MAPERR;
break;
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 66f3a6345105..1ca9a2b4239f 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -381,6 +381,9 @@ static void __init setup_bootmem(void)
request_resource(res, &data_resource);
}
request_resource(&sysram_resources[0], &pdcdata_resource);
+
+ /* Initialize Page Deallocation Table (PDT) and check for bad memory. */
+ pdc_pdt_init();
}
static int __init parisc_text_address(unsigned long vaddr)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bf4391d18923..6189238e69f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -184,7 +184,7 @@ config PPC
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_RCU_GUP
+ select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bb99b651085a..1189d04f3bd1 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,12 +378,6 @@ struct thread_struct {
}
#endif
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk) \
- ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs)
unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 329771559cbb..dc4e15937ccf 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,6 +43,7 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+extern int numa_update_cpu_topology(bool cpus_locked);
static inline int early_cpu_to_node(int cpu)
{
@@ -71,6 +72,11 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
int nid)
{
}
+
+static inline int numa_update_cpu_topology(bool cpus_locked)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8cdae5..41e88d3ce36b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -267,13 +267,7 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 49a25796a61a..bfd609a3e928 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -100,6 +100,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define TIOCSERCONFIG 0x5453
#define TIOCSERGWILD 0x5454
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..b886795060fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa410 /* weird error? */
+ andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
/* Error */
blt- 13f
+
+ /* Reload DSISR into r4 for the DABR check below */
+ ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: ld r4,_DAR(r1)
+11: andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+ ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..01addfb0ed0a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+int is_current_kprobe_addr(unsigned long addr)
+{
+ struct kprobe *p = kprobe_running();
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
+
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 3650732639ed..0f0b1b2f3b60 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -283,7 +283,7 @@ static void prrn_work_fn(struct work_struct *work)
* the RTAS event.
*/
pseries_devicetree_update(-prrn_update_scope);
- arch_update_cpu_topology();
+ numa_update_cpu_topology(false);
}
static DECLARE_WORK(prrn_work, prrn_work_fn);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index a8c1f99e9607..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
#endif
/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+ ti->task = NULL;
+ ti->cpu = cpu;
+ ti->preempt_count = 0;
+ ti->local_flags = 0;
+ ti->flags = 0;
+ klp_init_thread_info(ti);
+}
+
+/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index df2a41647d8e..1069f74fca47 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -97,7 +97,7 @@ int smp_generic_cpu_bootable(unsigned int nr)
/* Special case - we inhibit secondary thread startup
* during boot if the user requests it.
*/
- if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
if (smt_enabled_at_boot
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
+ /* Load the possibly modified NIP */
+ ld r15, _NIP(r1)
+
#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
+ cmpd r14, r15 /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+ /* NIP has not been altered, skip over further checks */
+ beq 1f
+
+ /* Check if there is an active kprobe on us */
+ subi r3, r14, 4
+ bl is_current_kprobe_addr
+ nop
+
+ /*
+ * If r3 == 1, then this is a kprobe/jprobe.
+ * else, this is livepatched function.
+ *
+ * The conditional branch for livepatch_handler below will use the
+ * result of this comparison. For kprobe/jprobe, we just need to branch to
+ * the new NIP, not call livepatch_handler. The branch below is bne, so we
+ * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+ * CR0[EQ] = (r3 == 1).
+ */
+ cmpdi r3, 1
+1:
#endif
+ /* Load CTR with the possibly modified NIP */
+ mtctr r15
+
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ /*
+ * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+ * not on a kprobe/jprobe, then handle livepatch.
+ */
bne- livepatch_handler
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..773b35d16a0b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
@@ -3317,7 +3368,7 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
- get_online_cpus();
+ cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
@@ -3339,17 +3390,17 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- put_online_cpus();
+ cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
- cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- "ppc/kvm_book3s:prepare",
- kvmppc_set_host_core,
- kvmppc_clear_host_core);
- put_online_cpus();
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..404deb512844 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ andis. r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+ bne 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
+32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..4888dd494604 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,29 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 144
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+ extsw r3, r3
+ EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 371792e4418f..b95c584ce19d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data)
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
+ *
+ * cpus_locked says whether we already hold cpu_hotplug_lock.
*/
-int arch_update_cpu_topology(void)
+int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
@@ -1400,15 +1402,23 @@ int arch_update_cpu_topology(void)
if (!cpumask_weight(&updated_cpus))
goto out;
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_cpu_topology, &updates[0],
+ &updated_cpus);
+ else
+ stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
/*
* Update the numa-cpu lookup table with the new mappings, even for
* offline CPUs. It is best to perform this update from the stop-
* machine context.
*/
- stop_machine(update_lookup_table, &updates[0],
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
+ else
+ stop_machine(update_lookup_table, &updates[0],
+ cpumask_of(raw_smp_processor_id()));
for (ud = &updates[0]; ud; ud = ud->next) {
unregister_cpu_under_node(ud->cpu, ud->old_nid);
@@ -1426,6 +1436,12 @@ out:
return changed;
}
+int arch_update_cpu_topology(void)
+{
+ lockdep_assert_cpus_held();
+ return numa_update_cpu_topology(true);
+}
+
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
- regs_user->abi = perf_reg_abi(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b46207..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
return mmio_atsd_reg;
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
{
unsigned long launch;
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
{
- int i, j, reg;
+ int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
if (va)
mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
else
mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
*/
flush_tlb_mm(npu_context->mm);
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
return ERR_PTR(-EINVAL);
}
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 8c6119280c13..309876d699e9 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -348,7 +348,7 @@ static int set_subcores_per_core(int new_mode)
state->master = 0;
}
- get_online_cpus();
+ cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
@@ -356,9 +356,10 @@ static int set_subcores_per_core(int new_mode)
/* Ensure state is consistent before we call the other cpus */
mb();
- stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 2d2e5f80a3d3..5cc35d6b94b6 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -471,11 +471,13 @@ static ssize_t modalias_show(struct device *_dev, struct device_attribute *a,
return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute ps3_system_bus_dev_attrs[] = {
- __ATTR_RO(modalias),
- __ATTR_NULL,
+static struct attribute *ps3_system_bus_dev_attrs[] = {
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(ps3_system_bus_dev);
struct bus_type ps3_system_bus_type = {
.name = "ps3_system_bus",
@@ -484,7 +486,7 @@ struct bus_type ps3_system_bus_type = {
.probe = ps3_system_bus_probe,
.remove = ps3_system_bus_remove,
.shutdown = ps3_system_bus_shutdown,
- .dev_attrs = ps3_system_bus_dev_attrs,
+ .dev_groups = ps3_system_bus_dev_groups,
};
static int __init ps3_system_bus_init(void)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index bda18d8e1674..39187696ee74 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -588,7 +588,7 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
return sprintf(buf, "%s\n", "memory,cpu");
}
-static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store);
+static CLASS_ATTR_RW(dlpar);
static int __init pseries_dlpar_init(void)
{
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index b363e439ddb9..52146b1356d2 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -397,6 +397,7 @@ static ssize_t devspec_show(struct device *dev,
ofdev = to_platform_device(dev);
return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
}
+static DEVICE_ATTR_RO(devspec);
static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -406,19 +407,22 @@ static ssize_t name_show(struct device *dev,
ofdev = to_platform_device(dev);
return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
}
+static DEVICE_ATTR_RO(name);
static ssize_t modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return of_device_modalias(dev, buf, PAGE_SIZE);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute ibmebus_bus_device_attrs[] = {
- __ATTR_RO(devspec),
- __ATTR_RO(name),
- __ATTR_RO(modalias),
- __ATTR_NULL
+static struct attribute *ibmebus_bus_device_attrs[] = {
+ &dev_attr_devspec.attr,
+ &dev_attr_name.attr,
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(ibmebus_bus_device);
struct bus_type ibmebus_bus_type = {
.name = "ibmebus",
@@ -428,7 +432,7 @@ struct bus_type ibmebus_bus_type = {
.probe = ibmebus_bus_device_probe,
.remove = ibmebus_bus_device_remove,
.shutdown = ibmebus_bus_device_shutdown,
- .dev_attrs = ibmebus_bus_device_attrs,
+ .dev_groups = ibmebus_bus_device_groups,
};
EXPORT_SYMBOL(ibmebus_bus_type);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 5a0c7ba429ce..2da4851eff99 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -349,8 +349,9 @@ void post_mobility_fixup(void)
return;
}
-static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
- const char *buf, size_t count)
+static ssize_t migration_store(struct class *class,
+ struct class_attribute *attr, const char *buf,
+ size_t count)
{
u64 streamid;
int rc;
@@ -380,7 +381,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
*/
#define MIGRATION_API_VERSION 1
-static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_WO(migration);
static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
static int __init mobility_sysfs_init(void)
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 28b09fd797ec..117beb9e8786 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -948,21 +948,21 @@ static void vio_cmo_bus_init(void)
/* sysfs device functions and data structures for CMO */
#define viodev_cmo_rd_attr(name) \
-static ssize_t viodev_cmo_##name##_show(struct device *dev, \
+static ssize_t cmo_##name##_show(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
}
-static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+static ssize_t cmo_allocs_failed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct vio_dev *viodev = to_vio_dev(dev);
return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
}
-static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+static ssize_t cmo_allocs_failed_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
@@ -970,7 +970,7 @@ static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
return count;
}
-static ssize_t viodev_cmo_desired_set(struct device *dev,
+static ssize_t cmo_desired_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
@@ -993,27 +993,37 @@ static ssize_t name_show(struct device *, struct device_attribute *, char *);
static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf);
-static struct device_attribute vio_cmo_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_desired_show, viodev_cmo_desired_set),
- __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
- __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
- __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
- __ATTR_NULL
+
+static struct device_attribute dev_attr_name;
+static struct device_attribute dev_attr_devspec;
+static struct device_attribute dev_attr_modalias;
+
+static DEVICE_ATTR_RO(cmo_entitled);
+static DEVICE_ATTR_RO(cmo_allocated);
+static DEVICE_ATTR_RW(cmo_desired);
+static DEVICE_ATTR_RW(cmo_allocs_failed);
+
+static struct attribute *vio_cmo_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ &dev_attr_cmo_entitled.attr,
+ &dev_attr_cmo_allocated.attr,
+ &dev_attr_cmo_desired.attr,
+ &dev_attr_cmo_allocs_failed.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(vio_cmo_dev);
/* sysfs bus functions and data structures for CMO */
#define viobus_cmo_rd_attr(name) \
-static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf) \
+static ssize_t cmo_bus_##name##_show(struct bus_type *bt, char *buf) \
{ \
return sprintf(buf, "%lu\n", vio_cmo.name); \
} \
-static BUS_ATTR_RO(cmo_##name)
+static struct bus_attribute bus_attr_cmo_bus_##name = \
+ __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
#define viobus_cmo_pool_rd_attr(name, var) \
static ssize_t \
@@ -1051,11 +1061,11 @@ static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
static BUS_ATTR_RW(cmo_high);
static struct attribute *vio_bus_attrs[] = {
- &bus_attr_cmo_entitled.attr,
- &bus_attr_cmo_spare.attr,
- &bus_attr_cmo_min.attr,
- &bus_attr_cmo_desired.attr,
- &bus_attr_cmo_curr.attr,
+ &bus_attr_cmo_bus_entitled.attr,
+ &bus_attr_cmo_bus_spare.attr,
+ &bus_attr_cmo_bus_min.attr,
+ &bus_attr_cmo_bus_desired.attr,
+ &bus_attr_cmo_bus_curr.attr,
&bus_attr_cmo_high.attr,
&bus_attr_cmo_reserve_size.attr,
&bus_attr_cmo_excess_size.attr,
@@ -1066,7 +1076,7 @@ ATTRIBUTE_GROUPS(vio_bus);
static void vio_cmo_sysfs_init(void)
{
- vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+ vio_bus_type.dev_groups = vio_cmo_dev_groups;
vio_bus_type.bus_groups = vio_bus_groups;
}
#else /* CONFIG_PPC_SMLPAR */
@@ -1537,6 +1547,7 @@ static ssize_t name_show(struct device *dev,
{
return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
}
+static DEVICE_ATTR_RO(name);
static ssize_t devspec_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1545,6 +1556,7 @@ static ssize_t devspec_show(struct device *dev,
return sprintf(buf, "%s\n", of_node_full_name(of_node));
}
+static DEVICE_ATTR_RO(devspec);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1566,13 +1578,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute vio_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR_NULL
+static struct attribute *vio_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(vio_dev);
void vio_unregister_device(struct vio_dev *viodev)
{
@@ -1608,7 +1622,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
struct bus_type vio_bus_type = {
.name = "vio",
- .dev_attrs = vio_dev_attrs,
+ .dev_groups = vio_dev_groups,
.uevent = vio_hotplug,
.match = vio_bus_match,
.probe = vio_bus_probe,
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6967addc6a89..37abe86e5bc9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -64,6 +64,7 @@ config ARCH_SUPPORTS_UPROBES
config S390
def_bool y
+ select ARCH_BINFMT_ELF_STATE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_GCOV_PROFILE_ALL
@@ -184,7 +185,7 @@ config SCHED_OMIT_FRAME_POINTER
config PGTABLE_LEVELS
int
- default 4
+ default 5
source "init/Kconfig"
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 678d9863e3f0..ad4bd777768d 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
-obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o paes_s390.o
+obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
+obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 9317b3e645e2..36aefc07d10c 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <asm/cpacf.h>
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 45092b12f54f..b3c88479feba 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,10 +1,12 @@
generic-y += asm-offsets.h
generic-y += cacheflush.h
generic-y += clkdev.h
+generic-y += device.h
generic-y += dma-contiguous.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += export.h
+generic-y += fb.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kmap_types.h
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
deleted file mode 100644
index 5203fc87f080..000000000000
--- a/arch/s390/include/asm/device.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-struct dev_archdata {
-};
-
-struct pdev_archdata {
-};
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/device.h>
+#include <linux/blkdev.h>
struct arqb {
u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
int (*probe) (struct scm_device *scmdev);
int (*remove) (struct scm_device *scmdev);
void (*notify) (struct scm_device *scmdev, enum scm_event event);
- void (*handler) (struct scm_device *scmdev, void *data, int error);
+ void (*handler) (struct scm_device *scmdev, void *data,
+ blk_status_t error);
};
int scm_driver_register(struct scm_driver *scmdrv);
void scm_driver_unregister(struct scm_driver *scmdrv);
int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index e8f623041769..ec024c08dabe 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -117,6 +117,9 @@
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
+/* s390 specific phdr types */
+#define PT_S390_PGSTE 0x70000000
+
/*
* ELF register definitions..
*/
@@ -151,6 +154,35 @@ extern unsigned int vdso_enabled;
&& (x)->e_ident[EI_CLASS] == ELF_CLASS)
#define compat_start_thread start_thread31
+struct arch_elf_state {
+ int rc;
+};
+
+#define INIT_ARCH_ELF_STATE { .rc = 0 }
+
+#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
+#ifdef CONFIG_PGSTE
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
+({ \
+ struct arch_elf_state *_state = state; \
+ if ((phdr)->p_type == PT_S390_PGSTE && \
+ !page_table_allocate_pgste && \
+ !test_thread_flag(TIF_PGSTE) && \
+ !current->mm->context.alloc_pgste) { \
+ set_thread_flag(TIF_PGSTE); \
+ set_pt_regs_flag(task_pt_regs(current), \
+ PIF_SYSCALL_RESTART); \
+ _state->rc = -EAGAIN; \
+ } \
+ _state->rc; \
+})
+#else
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
+({ \
+ (state)->rc; \
+})
+#endif
+
/* For SVR4/S390 the function pointer to be registered with `atexit` is
passed in R14. */
#define ELF_PLAT_INIT(_r, load_addr) \
diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h
deleted file mode 100644
index c7df38030992..000000000000
--- a/arch/s390/include/asm/fb.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 437e9af96688..904e4b3af95d 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -25,8 +25,6 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
-#ifdef CONFIG_PCI
-
#define ioremap_nocache(addr, size) ioremap(addr, size)
#define ioremap_wc ioremap_nocache
#define ioremap_wt ioremap_nocache
@@ -49,6 +47,8 @@ static inline void ioport_unmap(void __iomem *p)
{
}
+#ifdef CONFIG_PCI
+
/*
* s390 needs a private implementation of pci_iomap since ioremap with its
* offset parameter isn't sufficient. That's because BAR spaces are not
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 65d07ac34647..6baae236f461 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -107,6 +107,20 @@ struct esca_block {
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
} __packed;
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -264,7 +278,8 @@ struct kvm_s390_itdb {
struct sie_page {
struct kvm_s390_sie_block sie_block;
- __u8 reserved200[1024]; /* 0x0200 */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[1000]; /* 0x0218 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
} __packed;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8712e11bead4..4541ac44b35f 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -25,7 +25,9 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
- mm->context.alloc_pgste = page_table_allocate_pgste;
+ mm->context.alloc_pgste = page_table_allocate_pgste ||
+ test_thread_flag(TIF_PGSTE) ||
+ current->mm->context.alloc_pgste;
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
mm->context.use_cmma = 0;
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index e3e8895f5d3e..13623b9991d4 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -14,7 +14,14 @@
#include <linux/const.h>
#include <linux/types.h>
+#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
+ 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
+ 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
+ 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
+ 1ULL<<45 | 1ULL<<44)
#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
+#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
+#define MCCK_CODE_CP _BITUL(63 - 9)
#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 69b8a41fca84..624deaa44230 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -74,6 +74,7 @@ typedef struct { unsigned long pgste; } pgste_t;
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef pte_t *pgtable_t;
@@ -82,12 +83,14 @@ typedef pte_t *pgtable_t;
#define pte_val(x) ((x).pte)
#define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
+#define p4d_val(x) ((x).p4d)
#define pgd_val(x) ((x).pgd)
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
+#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4e3186649578..f36b4b726057 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -70,11 +70,10 @@ struct zpci_fmb {
} __packed __aligned(128);
enum zpci_state {
- ZPCI_FN_STATE_RESERVED,
- ZPCI_FN_STATE_STANDBY,
- ZPCI_FN_STATE_CONFIGURED,
- ZPCI_FN_STATE_ONLINE,
- NR_ZPCI_FN_STATES,
+ ZPCI_FN_STATE_STANDBY = 0,
+ ZPCI_FN_STATE_CONFIGURED = 1,
+ ZPCI_FN_STATE_RESERVED = 2,
+ ZPCI_FN_STATE_ONLINE = 3,
};
struct zpci_bar_struct {
@@ -109,7 +108,7 @@ struct zpci_dev {
u64 msi_addr; /* MSI address */
unsigned int max_msi; /* maximum number of MSI's */
struct airq_iv *aibv; /* adapter interrupt bit vector */
- unsigned int aisb; /* number of the summary bit */
+ unsigned long aisb; /* number of the summary bit */
/* DMA stuff */
unsigned long *dma_table;
@@ -159,11 +158,12 @@ extern const struct attribute_group *zpci_attr_groups[];
----------------------------------------------------------------------------- */
/* Base stuff */
int zpci_create_device(struct zpci_dev *);
+void zpci_remove_device(struct zpci_dev *zdev);
int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
-void zpci_stop_device(struct zpci_dev *);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
+void zpci_remove_reserved_devices(void);
/* CLP */
int clp_scan_pci_devices(void);
@@ -172,6 +172,7 @@ int clp_rescan_pci_devices_simple(void);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
+int clp_get_state(u32 fid, enum zpci_state *state);
#ifdef CONFIG_PCI
/* Error handling and recovery */
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 649eb62c52b3..34abcf275799 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -76,7 +76,7 @@ struct zpci_fib {
u32 gd;
} __packed __aligned(8);
-int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
int zpci_load(u64 *data, u64 req, u64 offset);
int zpci_store(u64 data, u64 req, u64 offset);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 166f703dad7c..bb0ff1bb0c4a 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -51,12 +51,24 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
return _SEGMENT_ENTRY_EMPTY;
if (mm->context.asce_limit <= (1UL << 42))
return _REGION3_ENTRY_EMPTY;
- return _REGION2_ENTRY_EMPTY;
+ if (mm->context.asce_limit <= (1UL << 53))
+ return _REGION2_ENTRY_EMPTY;
+ return _REGION1_ENTRY_EMPTY;
}
-int crst_table_upgrade(struct mm_struct *);
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
void crst_table_downgrade(struct mm_struct *);
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (table)
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ return (p4d_t *) table;
+}
+#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
@@ -86,9 +98,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
crst_table_free(mm, (unsigned long *) pmd);
}
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
- pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
+ p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e6e3b887bee3..57057fb1cc07 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,7 +24,6 @@
* the S390 page table tree.
*/
#ifndef __ASSEMBLY__
-#include <asm-generic/5level-fixup.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
@@ -87,12 +86,15 @@ extern unsigned long zero_page_mask;
*/
#define PMD_SHIFT 20
#define PUD_SHIFT 31
-#define PGDIR_SHIFT 42
+#define P4D_SHIFT 42
+#define PGDIR_SHIFT 53
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PUD_SIZE (1UL << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
+#define P4D_SIZE (1UL << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
@@ -105,6 +107,7 @@ extern unsigned long zero_page_mask;
#define PTRS_PER_PTE 256
#define PTRS_PER_PMD 2048
#define PTRS_PER_PUD 2048
+#define PTRS_PER_P4D 2048
#define PTRS_PER_PGD 2048
#define FIRST_USER_ADDRESS 0UL
@@ -115,6 +118,8 @@ extern unsigned long zero_page_mask;
printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
#define pud_ERROR(e) \
printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+#define p4d_ERROR(e) \
+ printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
#define pgd_ERROR(e) \
printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
@@ -296,8 +301,6 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
-#define _REGION3_ENTRY_ORIGIN ~0x7ffUL/* region third table origin */
-
#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
@@ -310,8 +313,8 @@ static inline int is_module_addr(void *addr)
#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
#endif
-#define _REGION_ENTRY_BITS 0xfffffffffffff227UL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
+#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
@@ -560,18 +563,23 @@ static inline void crdte(unsigned long old, unsigned long new,
}
/*
- * pgd/pmd/pte query functions
+ * pgd/p4d/pud/pmd/pte query functions
*/
+static inline int pgd_folded(pgd_t pgd)
+{
+ return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1;
+}
+
static inline int pgd_present(pgd_t pgd)
{
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ if (pgd_folded(pgd))
return 1;
return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
}
static inline int pgd_none(pgd_t pgd)
{
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ if (pgd_folded(pgd))
return 0;
return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
}
@@ -589,16 +597,48 @@ static inline int pgd_bad(pgd_t pgd)
return (pgd_val(pgd) & mask) != 0;
}
+static inline int p4d_folded(p4d_t p4d)
+{
+ return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (p4d_folded(p4d))
+ return 1;
+ return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (p4d_folded(p4d))
+ return 0;
+ return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
+}
+
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pud_folded(pud_t pud)
+{
+ return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3;
+}
+
static inline int pud_present(pud_t pud)
{
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+ if (pud_folded(pud))
return 1;
return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
}
static inline int pud_none(pud_t pud)
{
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+ if (pud_folded(pud))
return 0;
return pud_val(pud) == _REGION3_ENTRY_EMPTY;
}
@@ -614,7 +654,7 @@ static inline unsigned long pud_pfn(pud_t pud)
{
unsigned long origin_mask;
- origin_mask = _REGION3_ENTRY_ORIGIN;
+ origin_mask = _REGION_ENTRY_ORIGIN;
if (pud_large(pud))
origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
@@ -641,6 +681,13 @@ static inline int pud_bad(pud_t pud)
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
}
+static inline int p4d_bad(p4d_t p4d)
+{
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ return pud_bad(__pud(p4d_val(p4d)));
+ return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
+}
+
static inline int pmd_present(pmd_t pmd)
{
return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
@@ -794,8 +841,14 @@ static inline int pte_unused(pte_t pte)
static inline void pgd_clear(pgd_t *pgd)
{
- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
}
static inline void pud_clear(pud_t *pud)
@@ -1089,6 +1142,7 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
}
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
@@ -1098,19 +1152,31 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- pud_t *pud = (pud_t *) pgd;
- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pud = (pud_t *) pgd_deref(*pgd);
- return pud + pud_index(address);
+ p4d_t *p4d = (p4d_t *) pgd;
+
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ p4d = (p4d_t *) pgd_deref(*pgd);
+ return p4d + p4d_index(address);
+}
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ pud_t *pud = (pud_t *) p4d;
+
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pud = (pud_t *) p4d_deref(*p4d);
+ return pud + pud_index(address);
}
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
pmd_t *pmd = (pmd_t *) pud;
+
if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
pmd = (pmd_t *) pud_deref(*pud);
return pmd + pmd_index(address);
@@ -1122,6 +1188,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
#define pud_page(pud) pfn_to_page(pud_pfn(pud))
+#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 60d395fdc864..c25d57e0aad3 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -20,6 +20,7 @@
#define CIF_FPU 4 /* restore FPU registers */
#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
+#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
@@ -28,6 +29,7 @@
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
+#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
#ifndef __ASSEMBLY__
@@ -92,11 +94,11 @@ extern void execve_tail(void);
*/
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
- (1UL << 31) : (1UL << 53))
+ (1UL << 31) : -PAGE_SIZE)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
-#define TASK_SIZE_MAX (1UL << 53)
+#define TASK_SIZE_MAX (-PAGE_SIZE)
#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
(1UL << 31) : (1UL << 42))
@@ -221,11 +223,6 @@ extern void release_thread(struct task_struct *);
/* Free guarded storage control block for current */
void exit_thread_gs(void);
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 99bc456cc26a..853b01245c20 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -11,9 +11,11 @@
#define PIF_SYSCALL 0 /* inside a system call */
#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
+#define PIF_SYSCALL_RESTART 2 /* restart the current system call */
#define _PIF_SYSCALL _BITUL(PIF_SYSCALL)
#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP)
+#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART)
#ifndef __ASSEMBLY__
@@ -24,38 +26,38 @@
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
struct psw_bits {
- unsigned long : 1;
- unsigned long r : 1; /* PER-Mask */
- unsigned long : 3;
- unsigned long t : 1; /* DAT Mode */
- unsigned long i : 1; /* Input/Output Mask */
- unsigned long e : 1; /* External Mask */
- unsigned long key : 4; /* PSW Key */
- unsigned long : 1;
- unsigned long m : 1; /* Machine-Check Mask */
- unsigned long w : 1; /* Wait State */
- unsigned long p : 1; /* Problem State */
- unsigned long as : 2; /* Address Space Control */
- unsigned long cc : 2; /* Condition Code */
- unsigned long pm : 4; /* Program Mask */
- unsigned long ri : 1; /* Runtime Instrumentation */
- unsigned long : 6;
- unsigned long eaba : 2; /* Addressing Mode */
- unsigned long : 31;
- unsigned long ia : 64; /* Instruction Address */
+ unsigned long : 1;
+ unsigned long per : 1; /* PER-Mask */
+ unsigned long : 3;
+ unsigned long dat : 1; /* DAT Mode */
+ unsigned long io : 1; /* Input/Output Mask */
+ unsigned long ext : 1; /* External Mask */
+ unsigned long key : 4; /* PSW Key */
+ unsigned long : 1;
+ unsigned long mcheck : 1; /* Machine-Check Mask */
+ unsigned long wait : 1; /* Wait State */
+ unsigned long pstate : 1; /* Problem State */
+ unsigned long as : 2; /* Address Space Control */
+ unsigned long cc : 2; /* Condition Code */
+ unsigned long pm : 4; /* Program Mask */
+ unsigned long ri : 1; /* Runtime Instrumentation */
+ unsigned long : 6;
+ unsigned long eaba : 2; /* Addressing Mode */
+ unsigned long : 31;
+ unsigned long ia : 64; /* Instruction Address */
};
enum {
- PSW_AMODE_24BIT = 0,
- PSW_AMODE_31BIT = 1,
- PSW_AMODE_64BIT = 3
+ PSW_BITS_AMODE_24BIT = 0,
+ PSW_BITS_AMODE_31BIT = 1,
+ PSW_BITS_AMODE_64BIT = 3
};
enum {
- PSW_AS_PRIMARY = 0,
- PSW_AS_ACCREG = 1,
- PSW_AS_SECONDARY = 2,
- PSW_AS_HOME = 3
+ PSW_BITS_AS_PRIMARY = 0,
+ PSW_BITS_AS_ACCREG = 1,
+ PSW_BITS_AS_SECONDARY = 2,
+ PSW_BITS_AS_HOME = 3
};
#define psw_bits(__psw) (*({ \
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 72df5f2de6b0..020a8814d511 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -59,7 +59,7 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
int cc;
cc = ____pcpu_sigp(addr, order, parm, &_status);
- if (status && cc == 1)
+ if (status && cc == SIGP_CC_STATUS_STORED)
*status = _status;
return cc;
}
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index e784bed6ed7f..2b498e58b914 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -109,7 +109,7 @@ struct sysinfo_2_2_2 {
unsigned short cpus_shared;
char reserved_4[3];
unsigned char vsne;
- uuid_be uuid;
+ uuid_t uuid;
char reserved_5[160];
char ext_name[256];
};
@@ -134,7 +134,7 @@ struct sysinfo_3_2_2 {
char reserved_1[3];
unsigned char evmne;
unsigned int reserved_2;
- uuid_be uuid;
+ uuid_t uuid;
} vm[8];
char reserved_3[1504];
char ext_names[8][256];
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 0b3ee083a665..1aecf432c48d 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -58,6 +58,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_UPROBE 3 /* breakpointed or single-stepping */
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
+#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 853b2a3d8dee..7317b3108a88 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -137,6 +137,21 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
}
/*
+ * p4d_free_tlb frees a pud table and clears the CRSTE for the
+ * region second table entry from the tlb.
+ * If the mm uses a four level page table the single p4d is freed
+ * as the pgd. p4d_free_tlb checks the asce_limit against 8PB
+ * to avoid the double free of the p4d in this case.
+ */
+static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long address)
+{
+ if (tlb->mm->context.asce_limit <= (1UL << 53))
+ return;
+ tlb_remove_table(tlb, p4d);
+}
+
+/*
* pud_free_tlb frees a pud table and clears the CRSTE for the
* region third table entry from the tlb.
* If the mm uses a three level page table the single pud is freed
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 6bb29633e1f1..b65c414b6c0e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -58,6 +58,9 @@ int main(void)
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
OFFSET(__SF_EMPTY, stack_frame, empty1);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
+ OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
BLANK();
/* timeval/timezone offsets for use by vdso */
OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 829e1c53005c..dab78babfab6 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -98,8 +98,10 @@ static int show_address(void *data, unsigned long address, int reliable)
return 0;
}
-static void show_trace(struct task_struct *task, unsigned long sp)
+void show_stack(struct task_struct *task, unsigned long *stack)
{
+ unsigned long sp = (unsigned long) stack;
+
if (!sp)
sp = task ? task->thread.ksp : current_stack_pointer();
printk("Call Trace:\n");
@@ -109,29 +111,6 @@ static void show_trace(struct task_struct *task, unsigned long sp)
debug_show_held_locks(task);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- unsigned long *stack;
- int i;
-
- stack = sp;
- if (!stack) {
- if (!task)
- stack = (unsigned long *)current_stack_pointer();
- else
- stack = (unsigned long *)task->thread.ksp;
- }
- printk(KERN_DEFAULT "Stack:\n");
- for (i = 0; i < 20; i++) {
- if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
- break;
- if (i % 4 == 0)
- printk(KERN_DEFAULT " ");
- pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' ');
- }
- show_trace(task, (unsigned long)sp);
-}
-
static void show_last_breaking_event(struct pt_regs *regs)
{
printk("Last Breaking-Event-Address:\n");
@@ -149,8 +128,8 @@ void show_registers(struct pt_regs *regs)
pr_cont(" (%pSR)", (void *)regs->psw.addr);
pr_cont("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
- psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
+ "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
+ psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
@@ -169,7 +148,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_trace(NULL, regs->gprs[15]);
+ show_stack(NULL, (unsigned long *) regs->gprs[15]);
show_last_breaking_event(regs);
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6315037335ba..21900e1cee9c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -52,7 +52,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
_CIF_ASCE_SECONDARY | _CIF_FPU)
-_PIF_WORK = (_PIF_PER_TRAP)
+_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
#define BASED(name) name-cleanup_critical(%r13)
@@ -225,6 +225,7 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+.Lsie_entry:
sie 0(%r14)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
@@ -334,6 +335,8 @@ ENTRY(system_call)
jo .Lsysc_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lsysc_reschedule
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
+ jo .Lsysc_syscall_restart
#ifdef CONFIG_UPROBES
TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
@@ -347,6 +350,8 @@ ENTRY(system_call)
jo .Lsysc_patch_pending # handle live patching just before
# signals and possible syscall restart
#endif
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
+ jo .Lsysc_syscall_restart
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
@@ -448,6 +453,15 @@ ENTRY(system_call)
jg do_per_trap
#
+# _PIF_SYSCALL_RESTART is set, repeat the current system call
+#
+.Lsysc_syscall_restart:
+ ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART
+ lmg %r1,%r7,__PT_R1(%r11) # load svc arguments
+ lg %r2,__PT_ORIG_GPR2(%r11)
+ j .Lsysc_do_svc
+
+#
# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
# and after the system call
#
@@ -881,9 +895,7 @@ ENTRY(save_fpu_regs)
oi __LC_CPU_FLAGS+7,_CIF_FPU
br %r14
.Lsave_fpu_regs_end:
-#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(save_fpu_regs)
-#endif
/*
* Load floating-point controls and floating-point or vector registers.
@@ -1111,7 +1123,13 @@ cleanup_critical:
.quad .Lsie_done
.Lcleanup_sie:
- lg %r9,__SF_EMPTY(%r15) # get control block pointer
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ slg %r9,BASED(.Lsie_crit_mcck_start)
+ clg %r9,BASED(.Lsie_crit_mcck_length)
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
@@ -1296,6 +1314,10 @@ cleanup_critical:
.quad .Lsie_gmap
.Lsie_critical_length:
.quad .Lsie_done - .Lsie_gmap
+.Lsie_crit_mcck_start:
+ .quad .Lsie_entry
+.Lsie_crit_mcck_length:
+ .quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e545ffe5155a..8e622bb52f7a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
- if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
break;
case REIPL_METHOD_CCW_DIAG:
diag308(DIAG308_SET, reipl_block_ccw);
- if (MACHINE_IS_LPAR)
- diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
- else
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case REIPL_METHOD_FCP_RW_DIAG:
diag308(DIAG308_SET, reipl_block_fcp);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 6aa630a8d24f..262506cee4c3 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -93,7 +93,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
args.entry = entry;
args.type = type;
- stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+ stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3d6a99746454..6842e4501e2e 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -196,7 +196,7 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -204,7 +204,7 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine(swap_instruction, &args, NULL);
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 985589523970..31d03a84126c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -25,6 +25,8 @@
#include <asm/crw.h>
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/kvm_host.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -274,12 +276,39 @@ static int notrace s390_validate_registers(union mci mci, int umode)
return kill_task;
}
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+ struct mcck_volatile_info *mcck_backup;
+ struct sie_page *sie_page;
+
+ /* r14 contains the sie block, which was set in sie64a */
+ struct kvm_s390_sie_block *sie_block =
+ (struct kvm_s390_sie_block *) regs->gprs[14];
+
+ if (sie_block == NULL)
+ /* Something's seriously wrong, stop system. */
+ s390_handle_damage();
+
+ sie_page = container_of(sie_block, struct sie_page, sie_block);
+ mcck_backup = &sie_page->mcck_info;
+ mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+ mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+ mcck_backup->failing_storage_address
+ = S390_lowcore.failing_storage_address;
+}
+
#define MAX_IPD_COUNT 29
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
#define ED_STP_ISLAND 6 /* External damage STP island check */
#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
/*
* machine check handler.
*/
@@ -291,6 +320,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
struct mcck_struct *mcck;
unsigned long long tmp;
union mci mci;
+ unsigned long mcck_dam_code;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -301,7 +331,13 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
/* System damage -> stopping machine */
s390_handle_damage();
}
- if (mci.pd) {
+
+ /*
+ * Reinject the instruction processing damages' machine checks
+ * including Delayed Access Exception into the guest
+ * instead of damaging the host if they happen in the guest.
+ */
+ if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
if (mci.b) {
/* Processing backup -> verify if we can survive this */
u64 z_mcic, o_mcic, t_mcic;
@@ -345,6 +381,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->mcck_code = mci.val;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * Backup the machine check's info if it happens when the guest
+ * is running.
+ */
+ if (test_cpu_flag(CIF_MCCK_GUEST))
+ s390_backup_mcck_info(regs);
+
if (mci.cd) {
/* Timing facility damage */
s390_handle_damage();
@@ -358,15 +402,22 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
}
- if (mci.se)
- /* Storage error uncorrected */
- s390_handle_damage();
- if (mci.ke)
- /* Storage key-error uncorrected */
- s390_handle_damage();
- if (mci.ds && mci.fa)
- /* Storage degradation */
- s390_handle_damage();
+
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.se)
+ /* Storage error uncorrected */
+ s390_handle_damage();
+ if (mci.ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage();
+ if (mci.ds && mci.fa)
+ /* Storage degradation */
+ s390_handle_damage();
+ }
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
@@ -377,6 +428,19 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->warning = 1;
set_cpu_flag(CIF_MCCK_PENDING);
}
+
+ /*
+ * If there are only Channel Report Pending and External Damage
+ * machine checks, they will not be reinjected into the guest
+ * because they refer to host conditions only.
+ */
+ mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+ if (test_cpu_flag(CIF_MCCK_GUEST) &&
+ (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+ /* Set exit reason code for host's later handling */
+ *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ }
+ clear_cpu_flag(CIF_MCCK_GUEST);
nmi_exit();
}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ca960d0370d5..0c82f7903fc7 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -995,11 +995,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
regs.int_parm = CPU_MF_INT_SF_PRA;
sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
- psw_bits(regs.psw).ia = sfr->basic.ia;
- psw_bits(regs.psw).t = sfr->basic.T;
- psw_bits(regs.psw).w = sfr->basic.W;
- psw_bits(regs.psw).p = sfr->basic.P;
- psw_bits(regs.psw).as = sfr->basic.AS;
+ psw_bits(regs.psw).ia = sfr->basic.ia;
+ psw_bits(regs.psw).dat = sfr->basic.T;
+ psw_bits(regs.psw).wait = sfr->basic.W;
+ psw_bits(regs.psw).per = sfr->basic.P;
+ psw_bits(regs.psw).as = sfr->basic.AS;
/*
* Use the hardware provided configuration level to decide if the
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 955a7b6fa0a4..93a386f4a3b5 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -245,6 +245,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(page, "event=0x%04llx,name=%s\n",
- pmu_attr->id, attr->attr.name);
+ return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 999d7154bbdc..bb32b8618bf6 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct stack_frame *sf, *low, *high;
-
- if (!tsk || !task_stack_page(tsk))
- return 0;
- low = task_stack_page(tsk);
- high = (struct stack_frame *) task_pt_regs(tsk);
- sf = (struct stack_frame *) tsk->thread.ksp;
- if (sf <= low || sf > high)
- return 0;
- sf = (struct stack_frame *) sf->back_chain;
- if (sf <= low || sf > high)
- return 0;
- return sf->gprs[8];
-}
-
extern void kernel_thread_starter(void);
/*
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 488c5bb8dc77..252ed61a128b 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1160,6 +1160,8 @@ static int s390_gs_cb_get(struct task_struct *target,
return -ENODEV;
if (!data)
return -ENODATA;
+ if (target == current)
+ save_gs_cb(data);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(struct gs_cb));
}
@@ -1170,6 +1172,7 @@ static int s390_gs_cb_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
struct gs_cb *data = target->thread.gs_cb;
+ int rc;
if (!MACHINE_HAS_GS)
return -ENODEV;
@@ -1177,10 +1180,18 @@ static int s390_gs_cb_set(struct task_struct *target,
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
+ data->gsd = 25;
target->thread.gs_cb = data;
+ if (target == current)
+ __ctl_set_bit(2, 4);
+ } else if (target == current) {
+ save_gs_cb(data);
}
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+ if (target == current)
+ restore_gs_cb(data);
+ return rc;
}
static int s390_gs_bc_get(struct task_struct *target,
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 363000a77ffc..1020a11a24e5 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -26,6 +26,7 @@
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
+#include <linux/kmemleak.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
@@ -207,6 +208,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
if (!mcesa_origin)
goto out;
+ /* The pointer is stored with mcesa_bits ORed in */
+ kmemleak_not_leak((void *) mcesa_origin);
mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
}
} else {
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index eefcb54872a5..fb869b103825 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl,
static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
{
- if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+ if (uuid_is_null(&info->vm[i].uuid))
return;
seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index c3a52f9a69a0..192efdfac918 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -636,10 +636,10 @@ static void stp_work_fn(struct work_struct *work)
goto out_unlock;
memset(&stp_sync, 0, sizeof(stp_sync));
- get_online_cpus();
+ cpus_read_lock();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
- put_online_cpus();
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
if (!check_sync_clock())
/*
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index f787b9d8f54c..442e5423ce3d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -21,6 +21,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/cpu.h>
#include <asm/fpu/api.h>
#include "entry.h"
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 314e0ee3016a..d94baa8db507 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -27,12 +27,12 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT)
+ if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
return -EINVAL;
- if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT)
+ if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
return -EINVAL;
clear_pt_regs_flag(regs, PIF_PER_TRAP);
- auprobe->saved_per = psw_bits(regs->psw).r;
+ auprobe->saved_per = psw_bits(regs->psw).per;
auprobe->saved_int_code = regs->int_code;
regs->int_code = UPROBE_TRAP_NR;
regs->psw.addr = current->utask->xol_vaddr;
@@ -81,7 +81,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
update_cr_regs(current);
- psw_bits(regs->psw).r = auprobe->saved_per;
+ psw_bits(regs->psw).per = auprobe->saved_per;
regs->int_code = auprobe->saved_int_code;
if (fixup & FIXUP_PSW_NORMAL)
@@ -372,8 +372,8 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) ||
- ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) &&
+ if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
+ ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
!is_compat_task())) {
regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10516ae3b55e..b89d19f6f2ab 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -50,6 +50,56 @@ static struct page **vdso64_pagelist;
*/
unsigned int __read_mostly vdso_enabled = 1;
+static int vdso_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page **vdso_pagelist;
+ unsigned long vdso_pages;
+
+ vdso_pagelist = vdso64_pagelist;
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ }
+#endif
+
+ if (vmf->pgoff >= vdso_pages)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = vdso_pagelist[vmf->pgoff];
+ get_page(vmf->page);
+ return 0;
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma)
+{
+ unsigned long vdso_pages;
+
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task())
+ vdso_pages = vdso32_pages;
+#endif
+
+ if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(current->mm != vma->vm_mm))
+ return -EFAULT;
+
+ current->mm->context.vdso_base = vma->vm_start;
+ return 0;
+}
+
+static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+};
+
static int __init vdso_setup(char *s)
{
unsigned long val;
@@ -181,7 +231,7 @@ static void vdso_init_cr5(void)
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
- struct page **vdso_pagelist;
+ struct vm_area_struct *vma;
unsigned long vdso_pages;
unsigned long vdso_base;
int rc;
@@ -194,13 +244,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!uses_interp)
return 0;
- vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
- if (is_compat_task()) {
- vdso_pagelist = vdso32_pagelist;
+ if (is_compat_task())
vdso_pages = vdso32_pages;
- }
#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for
@@ -209,8 +256,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (vdso_pages == 0)
return 0;
- current->mm->context.vdso_base = 0;
-
/*
* pick a base address for the vDSO in process space. We try to put
* it at vdso_base which is the "natural" base for it, but we might
@@ -225,13 +270,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
}
/*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
- current->mm->context.vdso_base = vdso_base;
-
- /*
* our vma flags don't have VM_WRITE so by default, the process
* isn't allowed to write those pages.
* gdb can break that with ptrace interface, and thus trigger COW
@@ -241,24 +279,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* It's fine to use that for setting breakpoints in the vDSO code
* pages though.
*/
- rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pagelist);
- if (rc)
- current->mm->context.vdso_base = 0;
+ vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &vdso_mapping);
+ if (IS_ERR(vma)) {
+ rc = PTR_ERR(vma);
+ goto out_up;
+ }
+
+ current->mm->context.vdso_base = vdso_base;
+ rc = 0;
+
out_up:
up_write(&mm->mmap_sem);
return rc;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
- return "[vdso]";
- return NULL;
-}
-
static int __init vdso_init(void)
{
int i;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 072d84ba42a3..dd7178fbb4f3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,11 +110,10 @@ static inline u64 scale_vtime(u64 vtime)
return vtime;
}
-static void account_system_index_scaled(struct task_struct *p,
- u64 cputime, u64 scaled,
+static void account_system_index_scaled(struct task_struct *p, u64 cputime,
enum cpu_usage_stat index)
{
- p->stimescaled += cputime_to_nsecs(scaled);
+ p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
account_system_index_time(p, cputime_to_nsecs(cputime), index);
}
@@ -176,14 +175,11 @@ static int do_account_vtime(struct task_struct *tsk)
}
if (system)
- account_system_index_scaled(tsk, system, scale_vtime(system),
- CPUTIME_SYSTEM);
+ account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
if (hardirq)
- account_system_index_scaled(tsk, hardirq, scale_vtime(hardirq),
- CPUTIME_IRQ);
+ account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
if (softirq)
- account_system_index_scaled(tsk, softirq, scale_vtime(softirq),
- CPUTIME_SOFTIRQ);
+ account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9da243d94cc3..875f8bea8c67 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -551,26 +551,26 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
int rc;
struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
- if (!psw.t) {
+ if (!psw.dat) {
asce->val = 0;
asce->r = 1;
return 0;
}
- if (mode == GACC_IFETCH)
- psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
+ if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
+ psw.as = PSW_BITS_AS_PRIMARY;
switch (psw.as) {
- case PSW_AS_PRIMARY:
+ case PSW_BITS_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
- case PSW_AS_SECONDARY:
+ case PSW_BITS_AS_SECONDARY:
asce->val = vcpu->arch.sie_block->gcr[7];
return 0;
- case PSW_AS_HOME:
+ case PSW_BITS_AS_HOME:
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
- case PSW_AS_ACCREG:
+ case PSW_BITS_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, mode);
if (rc > 0)
return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
@@ -771,7 +771,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
if (!ctlreg0.lap)
return 0;
- if (psw_bits(*psw).t && asce.p)
+ if (psw_bits(*psw).dat && asce.p)
return 0;
return 1;
}
@@ -790,7 +790,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
ga &= PAGE_MASK;
- if (psw_bits(*psw).t) {
+ if (psw_bits(*psw).dat) {
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
@@ -831,7 +831,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
pages = vmalloc(nr_pages * sizeof(unsigned long));
if (!pages)
return -ENOMEM;
- need_ipte_lock = psw_bits(*psw).t && !asce.r;
+ need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
@@ -899,7 +899,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
mode, PROT_TYPE_LA);
}
- if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
+ if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0)
return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
ptr = asce.origin * 4096;
if (asce.r) {
*fake = 1;
+ ptr = 0;
asce.dt = ASCE_TYPE_REGION1;
}
switch (asce.dt) {
case ASCE_TYPE_REGION1:
- if (vaddr.rfx01 > asce.tl && !asce.r)
+ if (vaddr.rfx01 > asce.tl && !*fake)
return PGM_REGION_FIRST_TRANS;
break;
case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
union region1_table_entry rfte;
if (*fake) {
- /* offset in 16EB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ ptr += (unsigned long) vaddr.rfx << 53;
rfte.val = ptr;
goto shadow_r2t;
}
@@ -1036,8 +1036,7 @@ shadow_r2t:
union region2_table_entry rste;
if (*fake) {
- /* offset in 8PB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ ptr += (unsigned long) vaddr.rsx << 42;
rste.val = ptr;
goto shadow_r3t;
}
@@ -1064,8 +1063,7 @@ shadow_r3t:
union region3_table_entry rtte;
if (*fake) {
- /* offset in 4TB guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ ptr += (unsigned long) vaddr.rtx << 31;
rtte.val = ptr;
goto shadow_sgt;
}
@@ -1101,8 +1099,7 @@ shadow_sgt:
union segment_table_entry ste;
if (*fake) {
- /* offset in 2G guest memory block */
- ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ ptr += (unsigned long) vaddr.sx << 20;
ste.val = ptr;
goto shadow_pgt;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 7ce47fd36f28..bec42b852246 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -57,9 +57,9 @@ static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+ if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT)
return ga;
- if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+ if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT)
return ga & ((1UL << 31) - 1);
return ga & ((1UL << 24) - 1);
}
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 23d9a4e12da1..c2e0ddc1356e 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -613,15 +613,15 @@ int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
* instruction. Check primary and home space-switch-event
* controls. (theoretically home -> home produced no event)
*/
- if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) &&
- (pssec(vcpu) || hssec(vcpu)))
+ if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) &&
+ (pssec(vcpu) || hssec(vcpu)))
vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
/*
* PT, PTI, PR, PC instruction operate on primary AS only. Check
* if the primary-space-switch-event control was or got set.
*/
- if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) &&
+ if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) &&
(pssec(vcpu) || old_ssec(vcpu)))
vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f28e2e776931..b0d7de5a533d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2067,6 +2067,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
if (!vcpu)
goto out;
+ BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
goto out_free_cpu;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c03106c428cf..e53292a89257 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -361,7 +361,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
}
if (m3 & SSKE_MB) {
- if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
else
vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
@@ -374,7 +374,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
{
vcpu->stat.instruction_ipte_interlock++;
- if (psw_bits(vcpu->arch.sie_block->gpsw).p)
+ if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
kvm_s390_retry_instr(vcpu);
@@ -901,7 +901,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
/* only support 2G frame size if EDAT2 is available and we are
not in 24-bit addressing mode */
if (!test_kvm_facility(vcpu->kvm, 78) ||
- psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_24BIT)
+ psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
break;
@@ -938,7 +938,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
start += PAGE_SIZE;
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
- if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
vcpu->run->s.regs.gprs[reg2] = end;
} else {
vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 1b553d847140..049c3c455b32 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -149,7 +149,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
}
static void walk_pud_level(struct seq_file *m, struct pg_state *st,
- pgd_t *pgd, unsigned long addr)
+ p4d_t *p4d, unsigned long addr)
{
unsigned int prot;
pud_t *pud;
@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
if (!pud_none(*pud))
if (pud_large(*pud)) {
prot = pud_val(*pud) &
@@ -172,6 +172,23 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
}
}
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
+ pgd_t *pgd, unsigned long addr)
+{
+ p4d_t *p4d;
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
+ st->current_address = addr;
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d))
+ walk_pud_level(m, st, p4d, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += P4D_SIZE;
+ }
+}
+
static void walk_pgd_level(struct seq_file *m)
{
unsigned long addr = 0;
@@ -184,7 +201,7 @@ static void walk_pgd_level(struct seq_file *m)
st.current_address = addr;
pgd = pgd_offset_k(addr);
if (!pgd_none(*pgd))
- walk_pud_level(m, &st, pgd, addr);
+ walk_p4d_level(m, &st, pgd, addr);
else
note_page(m, &st, _PAGE_INVALID, 1);
addr += PGDIR_SIZE;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 5845d3028ffc..14f25798b001 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -130,7 +130,7 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long asce, unsigned long address)
{
- unsigned long *table = __va(asce & PAGE_MASK);
+ unsigned long *table = __va(asce & _ASCE_ORIGIN);
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 7f6db1e6c048..4fb3d3cdb370 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -125,7 +125,7 @@ static void gmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
- void **slot;
+ void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
@@ -150,7 +150,7 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
struct radix_tree_iter iter;
unsigned long indices[16];
unsigned long index;
- void **slot;
+ void __rcu **slot;
int i, nr;
/* A radix tree is freed by deleting all of its entries */
@@ -537,6 +537,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
unsigned long *table;
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
int rc;
@@ -573,7 +574,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
VM_BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, vmaddr);
+ p4d = p4d_offset(pgd, vmaddr);
+ VM_BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, vmaddr);
VM_BUG_ON(pud_none(*pud));
/* large puds cannot yet be handled */
if (pud_large(*pud))
@@ -1008,7 +1011,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
struct gmap_rmap *rmap)
{
- void **slot;
+ void __rcu **slot;
BUG_ON(!gmap_is_shadow(sg));
slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index b7b779c40a5b..8ecc25e760fa 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -166,15 +166,15 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
return 1;
}
-static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp, pud;
- pudp = (pud_t *) pgdp;
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pudp = (pud_t *) pgd_deref(pgd);
+ pudp = (pud_t *) p4dp;
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pudp = (pud_t *) p4d_deref(p4d);
pudp += pud_index(addr);
do {
pud = *pudp;
@@ -194,6 +194,29 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
return 1;
}
+static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp, p4d;
+
+ p4dp = (p4d_t *) pgdp;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ p4dp = (p4d_t *) pgd_deref(pgd);
+ p4dp += p4d_index(addr);
+ do {
+ p4d = *p4dp;
+ barrier();
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
@@ -228,7 +251,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 9b4050caa4e9..d3a5e39756f6 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -162,16 +162,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
- pudp = pud_alloc(mm, pgdp, addr);
- if (pudp) {
- if (sz == PUD_SIZE)
- return (pte_t *) pudp;
- else if (sz == PMD_SIZE)
- pmdp = pmd_alloc(mm, pudp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (p4dp) {
+ pudp = pud_alloc(mm, p4dp, addr);
+ if (pudp) {
+ if (sz == PUD_SIZE)
+ return (pte_t *) pudp;
+ else if (sz == PMD_SIZE)
+ pmdp = pmd_alloc(mm, pudp, addr);
+ }
}
return (pte_t *) pmdp;
}
@@ -179,16 +183,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgdp;
+ p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp = NULL;
pgdp = pgd_offset(mm, addr);
if (pgd_present(*pgdp)) {
- pudp = pud_offset(pgdp, addr);
- if (pud_present(*pudp)) {
- if (pud_large(*pudp))
- return (pte_t *) pudp;
- pmdp = pmd_offset(pudp, addr);
+ p4dp = p4d_offset(pgdp, addr);
+ if (p4d_present(*p4dp)) {
+ pudp = pud_offset(p4dp, addr);
+ if (pud_present(*pudp)) {
+ if (pud_large(*pudp))
+ return (pte_t *) pudp;
+ pmdp = pmd_offset(pudp, addr);
+ }
}
}
return (pte_t *) pmdp;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ee6a1d3d4983..3348e60dd8ad 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -81,6 +81,7 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long pgd_type, asce_bits;
+ psw_t psw;
init_mm.pgd = swapper_pg_dir;
if (VMALLOC_END > (1UL << 42)) {
@@ -100,7 +101,10 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
- __arch_local_irq_stosm(0x04);
+ psw.mask = __extract_psw();
+ psw_bits(psw).dat = 1;
+ psw_bits(psw).as = PSW_BITS_AS_HOME;
+ __load_psw_mask(psw.mask);
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b854b1da281a..2e10d2b8ad35 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -120,7 +120,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
check_asce_limit:
if (addr + len > current->mm->context.asce_limit) {
- rc = crst_table_upgrade(mm);
+ rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
@@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
check_asce_limit:
if (addr + len > current->mm->context.asce_limit) {
- rc = crst_table_upgrade(mm);
+ rc = crst_table_upgrade(mm, addr + len);
if (rc)
return (unsigned long) rc;
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 49e721f3645e..180481589246 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -229,14 +229,14 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
-static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
pud_t *pudp;
int rc = 0;
- pudp = pud_offset(pgd, addr);
+ pudp = pud_offset(p4d, addr);
do {
if (pud_none(*pudp))
return -EINVAL;
@@ -259,6 +259,26 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
return rc;
}
+static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+ int rc = 0;
+
+ p4dp = p4d_offset(pgd, addr);
+ do {
+ if (p4d_none(*p4dp))
+ return -EINVAL;
+ next = p4d_addr_end(addr, end);
+ rc = walk_pud_level(p4dp, addr, next, flags);
+ p4dp++;
+ addr = next;
+ cond_resched();
+ } while (addr < end && !rc);
+ return rc;
+}
+
static DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
@@ -278,7 +298,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
if (pgd_none(*pgdp))
break;
next = pgd_addr_end(addr, end);
- rc = walk_pud_level(pgdp, addr, next, flags);
+ rc = walk_p4d_level(pgdp, addr, next, flags);
if (rc)
break;
cond_resched();
@@ -319,6 +339,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
unsigned long address;
int nr, i, j;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -326,7 +347,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
pgd = pgd_offset_k(address);
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index f502cbe657af..18918e394ce4 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -76,29 +76,46 @@ static void __crst_table_upgrade(void *arg)
__tlb_flush_local();
}
-int crst_table_upgrade(struct mm_struct *mm)
+int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
{
unsigned long *table, *pgd;
+ int rc, notify;
- /* upgrade should only happen from 3 to 4 levels */
- BUG_ON(mm->context.asce_limit != (1UL << 42));
-
- table = crst_table_alloc(mm);
- if (!table)
+ /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
+ BUG_ON(mm->context.asce_limit < (1UL << 42));
+ if (end >= TASK_SIZE_MAX)
return -ENOMEM;
-
- spin_lock_bh(&mm->page_table_lock);
- pgd = (unsigned long *) mm->pgd;
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- spin_unlock_bh(&mm->page_table_lock);
-
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
+ rc = 0;
+ notify = 0;
+ while (mm->context.asce_limit < end) {
+ table = crst_table_alloc(mm);
+ if (!table) {
+ rc = -ENOMEM;
+ break;
+ }
+ spin_lock_bh(&mm->page_table_lock);
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit == (1UL << 42)) {
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ } else {
+ crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = -PAGE_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ }
+ notify = 1;
+ spin_unlock_bh(&mm->page_table_lock);
+ }
+ if (notify)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return rc;
}
void crst_table_downgrade(struct mm_struct *mm)
@@ -274,7 +291,7 @@ static void __tlb_remove_table(void *_table)
struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
switch (mask) {
- case 0: /* pmd or pud */
+ case 0: /* pmd, pud, or p4d */
free_pages((unsigned long) table, 2);
break;
case 1: /* lower 2K of a 4K page table */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 947b66a5cdba..d4d409ba206b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -610,6 +610,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgste_t pgste;
@@ -618,7 +619,10 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
bool dirty;
pgd = pgd_offset(mm, addr);
- pud = pud_alloc(mm, pgd, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return false;
+ pud = pud_alloc(mm, p4d, addr);
if (!pud)
return false;
pmd = pmd_alloc(mm, pud, addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c33c94b4be60..d8398962a723 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -38,6 +38,17 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_alloc(size, size);
}
+static inline p4d_t *vmem_p4d_alloc(void)
+{
+ p4d_t *p4d = NULL;
+
+ p4d = vmem_alloc_pages(2);
+ if (!p4d)
+ return NULL;
+ clear_table((unsigned long *) p4d, _REGION2_ENTRY_EMPTY, PAGE_SIZE * 4);
+ return p4d;
+}
+
static inline pud_t *vmem_pud_alloc(void)
{
pud_t *pud = NULL;
@@ -85,6 +96,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -102,12 +114,19 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_p4d_alloc();
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pu_dir);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
}
- pu_dir = pud_offset(pg_dir, address);
+ pu_dir = pud_offset(p4_dir, address);
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
!(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
!debug_pagealloc_enabled()) {
@@ -161,6 +180,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
unsigned long end = start + size;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -172,7 +192,12 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
address += PGDIR_SIZE;
continue;
}
- pu_dir = pud_offset(pg_dir, address);
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ address += P4D_SIZE;
+ continue;
+ }
+ pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
address += PUD_SIZE;
continue;
@@ -213,6 +238,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
unsigned long pgt_prot, sgt_prot;
unsigned long address = start;
pgd_t *pg_dir;
+ p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
@@ -227,13 +253,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (address = start; address < end;) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_p4d_alloc();
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
pu_dir = vmem_pud_alloc();
if (!pu_dir)
goto out;
- pgd_populate(&init_mm, pg_dir, pu_dir);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
}
- pu_dir = pud_offset(pg_dir, address);
+ pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8051df109db3..7b30af5da222 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -86,6 +86,25 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
return zdev;
}
+void zpci_remove_reserved_devices(void)
+{
+ struct zpci_dev *tmp, *zdev;
+ enum zpci_state state;
+ LIST_HEAD(remove);
+
+ spin_lock(&zpci_list_lock);
+ list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
+ if (zdev->state == ZPCI_FN_STATE_STANDBY &&
+ !clp_get_state(zdev->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED)
+ list_move_tail(&zdev->entry, &remove);
+ }
+ spin_unlock(&zpci_list_lock);
+
+ list_for_each_entry_safe(zdev, tmp, &remove, entry)
+ zpci_remove_device(zdev);
+}
+
static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
{
return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -108,6 +127,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
struct zpci_fib fib = {0};
+ u8 status;
fib.isc = PCI_ISC;
fib.sum = 1; /* enable summary notifications */
@@ -117,60 +137,58 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
fib.aisbo = zdev->aisb & 63;
- return zpci_mod_fc(req, &fib);
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
-struct mod_pci_args {
- u64 base;
- u64 limit;
- u64 iota;
- u64 fmb_addr;
-};
-
-static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
{
- u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
struct zpci_fib fib = {0};
+ u8 cc, status;
- fib.pba = args->base;
- fib.pal = args->limit;
- fib.iota = args->iota;
- fib.fmb_addr = args->fmb_addr;
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
- return zpci_mod_fc(req, &fib);
+ return cc ? -EIO : 0;
}
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
u64 base, u64 limit, u64 iota)
{
- struct mod_pci_args args = { base, limit, iota, 0 };
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
+ struct zpci_fib fib = {0};
+ u8 status;
WARN_ON_ONCE(iota & 0x3fff);
- args.iota |= ZPCI_IOTA_RTTO_FLAG;
- return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args);
+ fib.pba = base;
+ fib.pal = limit;
+ fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
{
- struct mod_pci_args args = { 0, 0, 0, 0 };
-
- return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args);
-}
-
-/* Modify PCI: Unregister adapter interruptions */
-static int zpci_clear_airq(struct zpci_dev *zdev)
-{
- struct mod_pci_args args = { 0, 0, 0, 0 };
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
- return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3) /* Function already gone. */
+ cc = 0;
+ return cc ? -EIO : 0;
}
/* Modify PCI: Set PCI function measurement parameters */
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
- struct mod_pci_args args = { 0, 0, 0, 0 };
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
return -EINVAL;
@@ -185,25 +203,35 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&zdev->mapped_pages, 0);
atomic64_set(&zdev->unmapped_pages, 0);
- args.fmb_addr = virt_to_phys(zdev->fmb);
- return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+ fib.fmb_addr = virt_to_phys(zdev->fmb);
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+ zdev->fmb = NULL;
+ }
+ return cc ? -EIO : 0;
}
/* Modify PCI: Disable PCI function measurement */
int zpci_fmb_disable_device(struct zpci_dev *zdev)
{
- struct mod_pci_args args = { 0, 0, 0, 0 };
- int rc;
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
if (!zdev->fmb)
return -EINVAL;
/* Function measurement is disabled if fmb address is zero */
- rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3) /* Function already gone. */
+ cc = 0;
- kmem_cache_free(zdev_fmb_cache, zdev->fmb);
- zdev->fmb = NULL;
- return rc;
+ if (!cc) {
+ kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+ zdev->fmb = NULL;
+ }
+ return cc ? -EIO : 0;
}
static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
@@ -372,22 +400,21 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
struct msi_msg msg;
int rc, irq;
+ zdev->aisb = -1UL;
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
/* Allocate adapter summary indicator bit */
- rc = -EIO;
aisb = airq_iv_alloc_bit(zpci_aisb_iv);
if (aisb == -1UL)
- goto out;
+ return -EIO;
zdev->aisb = aisb;
/* Create adapter interrupt vector */
- rc = -ENOMEM;
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
if (!zdev->aibv)
- goto out_si;
+ return -ENOMEM;
/* Wire up shortcut pointer */
zpci_aibv[aisb] = zdev->aibv;
@@ -398,10 +425,10 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = -EIO;
irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
if (irq < 0)
- goto out_msi;
+ return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
if (rc)
- goto out_msi;
+ return rc;
irq_set_chip_and_handler(irq, &zpci_irq_chip,
handle_simple_irq);
msg.data = hwirq;
@@ -415,27 +442,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Enable adapter interrupts */
rc = zpci_set_airq(zdev);
if (rc)
- goto out_msi;
+ return rc;
return (msi_vecs == nvec) ? 0 : msi_vecs;
-
-out_msi:
- for_each_pci_msi_entry(msi, pdev) {
- if (hwirq-- == 0)
- break;
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- }
- zpci_aibv[aisb] = NULL;
- airq_iv_release(zdev->aibv);
-out_si:
- airq_iv_free_bit(zpci_aisb_iv, aisb);
-out:
- return rc;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
@@ -451,6 +460,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
for_each_pci_msi_entry(msi, pdev) {
+ if (!msi->irq)
+ continue;
if (msi->msi_attrib.is_msix)
__pci_msix_desc_mask_irq(msi, 1);
else
@@ -463,9 +474,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
msi->irq = 0;
}
- zpci_aibv[zdev->aisb] = NULL;
- airq_iv_release(zdev->aibv);
- airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+ if (zdev->aisb != -1UL) {
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+ zdev->aisb = -1UL;
+ }
+ if (zdev->aibv) {
+ airq_iv_release(zdev->aibv);
+ zdev->aibv = NULL;
+ }
}
static void zpci_map_resources(struct pci_dev *pdev)
@@ -719,6 +736,16 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
{
if (zpci_unique_uid) {
zdev->domain = (u16) zdev->uid;
+ if (zdev->domain >= ZPCI_NR_DEVICES)
+ return 0;
+
+ spin_lock(&zpci_domain_lock);
+ if (test_bit(zdev->domain, zpci_domain)) {
+ spin_unlock(&zpci_domain_lock);
+ return -EEXIST;
+ }
+ set_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
return 0;
}
@@ -735,7 +762,7 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
static void zpci_free_domain(struct zpci_dev *zdev)
{
- if (zpci_unique_uid)
+ if (zdev->domain >= ZPCI_NR_DEVICES)
return;
spin_lock(&zpci_domain_lock);
@@ -755,6 +782,7 @@ void pcibios_remove_bus(struct pci_bus *bus)
list_del(&zdev->entry);
spin_unlock(&zpci_list_lock);
+ zpci_dbg(3, "rem fid:%x\n", zdev->fid);
kfree(zdev);
}
@@ -847,15 +875,14 @@ out:
return rc;
}
-void zpci_stop_device(struct zpci_dev *zdev)
+void zpci_remove_device(struct zpci_dev *zdev)
{
- zpci_dma_exit_device(zdev);
- /*
- * Note: SCLP disables fh via set-pci-fn so don't
- * do that here.
- */
+ if (!zdev->bus)
+ return;
+
+ pci_stop_root_bus(zdev->bus);
+ pci_remove_root_bus(zdev->bus);
}
-EXPORT_SYMBOL_GPL(zpci_stop_device);
int zpci_report_error(struct pci_dev *pdev,
struct zpci_report_error_header *report)
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 1c3332ac1957..bd534b4d40e3 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -193,12 +193,12 @@ out:
int clp_add_pci_device(u32 fid, u32 fh, int configured)
{
struct zpci_dev *zdev;
- int rc;
+ int rc = -ENOMEM;
zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
- return -ENOMEM;
+ goto error;
zdev->fh = fh;
zdev->fid = fid;
@@ -219,6 +219,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
return 0;
error:
+ zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
kfree(zdev);
return rc;
}
@@ -295,8 +296,8 @@ int clp_disable_fh(struct zpci_dev *zdev)
return rc;
}
-static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
- void (*cb)(struct clp_fh_list_entry *entry))
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
+ void (*cb)(struct clp_fh_list_entry *, void *))
{
u64 resume_token = 0;
int entries, i, rc;
@@ -327,21 +328,13 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
resume_token = rrb->response.resume_token;
for (i = 0; i < entries; i++)
- cb(&rrb->response.fh_list[i]);
+ cb(&rrb->response.fh_list[i], data);
} while (resume_token);
out:
return rc;
}
-static void __clp_add(struct clp_fh_list_entry *entry)
-{
- if (!entry->vendor_id)
- return;
-
- clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
-}
-
-static void __clp_rescan(struct clp_fh_list_entry *entry)
+static void __clp_add(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
@@ -349,22 +342,11 @@ static void __clp_rescan(struct clp_fh_list_entry *entry)
return;
zdev = get_zdev_by_fid(entry->fid);
- if (!zdev) {
+ if (!zdev)
clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
- return;
- }
-
- if (!entry->config_state) {
- /*
- * The handle is already disabled, that means no iota/irq freeing via
- * the firmware interfaces anymore. Need to free resources manually
- * (DMA memory, debug, sysfs)...
- */
- zpci_stop_device(zdev);
- }
}
-static void __clp_update(struct clp_fh_list_entry *entry)
+static void __clp_update(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
@@ -387,7 +369,7 @@ int clp_scan_pci_devices(void)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, __clp_add);
+ rc = clp_list_pci(rrb, NULL, __clp_add);
clp_free_block(rrb);
return rc;
@@ -398,11 +380,13 @@ int clp_rescan_pci_devices(void)
struct clp_req_rsp_list_pci *rrb;
int rc;
+ zpci_remove_reserved_devices();
+
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, __clp_rescan);
+ rc = clp_list_pci(rrb, NULL, __clp_add);
clp_free_block(rrb);
return rc;
@@ -417,7 +401,40 @@ int clp_rescan_pci_devices_simple(void)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, __clp_update);
+ rc = clp_list_pci(rrb, NULL, __clp_update);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+struct clp_state_data {
+ u32 fid;
+ enum zpci_state state;
+};
+
+static void __clp_get_state(struct clp_fh_list_entry *entry, void *data)
+{
+ struct clp_state_data *sd = data;
+
+ if (entry->fid != sd->fid)
+ return;
+
+ sd->state = entry->config_state;
+}
+
+int clp_get_state(u32 fid, enum zpci_state *state)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED};
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, &sd, __clp_get_state);
+ if (!rc)
+ *state = sd.state;
clp_free_block(rrb);
return rc;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9081a57fa340..8eb1cc341dab 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -601,7 +601,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
*/
WARN_ON(zdev->s390_domain);
- zpci_unregister_ioat(zdev, 0);
+ if (zpci_unregister_ioat(zdev, 0))
+ return;
+
dma_cleanup_tables(zdev->dma_table);
zdev->dma_table = NULL;
vfree(zdev->iommu_bitmap);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index c2b27ad8e94d..0bbc04af4418 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -74,6 +74,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
+ enum zpci_state state;
int ret;
if (zdev)
@@ -108,6 +109,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
break;
case 0x0303: /* Deconfiguration requested */
+ if (!zdev)
+ break;
if (pdev)
pci_stop_and_remove_bus_device_locked(pdev);
@@ -121,7 +124,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev->state = ZPCI_FN_STATE_STANDBY;
break;
- case 0x0304: /* Configured -> Standby */
+ case 0x0304: /* Configured -> Standby|Reserved */
+ if (!zdev)
+ break;
if (pdev) {
/* Give the driver a hint that the function is
* already unusable. */
@@ -132,6 +137,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev->fh = ccdf->fh;
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
+ if (!clp_get_state(ccdf->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED) {
+ zpci_remove_device(zdev);
+ }
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
clp_rescan_pci_devices();
@@ -139,8 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
case 0x0308: /* Standby -> Reserved */
if (!zdev)
break;
- pci_stop_root_bus(zdev->bus);
- pci_remove_root_bus(zdev->bus);
+ zpci_remove_device(zdev);
break;
default:
break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index fa8d7d4b9751..ea34086c8674 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -40,20 +40,20 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
return cc;
}
-int zpci_mod_fc(u64 req, struct zpci_fib *fib)
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
{
- u8 cc, status;
+ u8 cc;
do {
- cc = __mpcifc(req, fib, &status);
+ cc = __mpcifc(req, fib, status);
if (cc == 2)
msleep(ZPCI_INSN_BUSY_DELAY);
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, 0);
+ zpci_err_insn(cc, *status, req, 0);
- return (cc) ? -EIO : 0;
+ return cc;
}
/* Refresh PCI Translations */
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index be63fbd699fd..025ea20fc4b4 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -34,8 +34,6 @@ static struct facility_def facility_defs[] = {
18, /* long displacement facility */
#endif
#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
- 7, /* stfle */
- 17, /* message security assist */
21, /* extended-immediate facility */
25, /* store clock fast */
#endif
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index d9a922d8711b..299274581968 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
*/
extern void (*cpu_wait)(void);
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
extern void start_thread(struct pt_regs *regs,
unsigned long pc, unsigned long sp);
extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index b15bf6bc0e94..c94ee54210bc 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/score/include/uapi/asm/siginfo.h b/arch/score/include/uapi/asm/siginfo.h
deleted file mode 100644
index 87ca35607a28..000000000000
--- a/arch/score/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_SIGINFO_H
-#define _ASM_SCORE_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif /* _ASM_SCORE_SIGINFO_H */
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index eb64d7a677cb..6e20241a1ed4 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
return 1;
}
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_pt_regs(tsk)->cp0_epc;
-}
-
unsigned long get_wchan(struct task_struct *task)
{
if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index 4e21949593cf..3554fcaa023b 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -10,7 +10,7 @@ config SH_DEVICE_TREE
bool "Board Described by Device Tree"
select OF
select OF_EARLY_FLATTREE
- select CLKSRC_OF
+ select TIMER_OF
select COMMON_CLK
select GENERIC_CALIBRATE_DELAY
help
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 1fb6d5714bae..4feb7c86f4ac 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -119,7 +119,7 @@ static void __init sh_of_mem_reserve(void)
static void __init sh_of_time_init(void)
{
pr_info("SH generic board support: scanning for clocksource devices\n");
- clocksource_probe();
+ timer_probe();
}
static void __init sh_of_setup(char **cmdline_p)
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index cf2a75063b53..590c91ae7541 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -29,7 +29,6 @@ generic-y += rwsem.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
-generic-y += siginfo.h
generic-y += sizes.h
generic-y += socket.h
generic-y += statfs.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index b15bf6bc0e94..b55fc2ae1e8c 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += siginfo.h
diff --git a/arch/sh/include/uapi/asm/ioctls.h b/arch/sh/include/uapi/asm/ioctls.h
index c9903e56ccf4..eec7901e9e65 100644
--- a/arch/sh/include/uapi/asm/ioctls.h
+++ b/arch/sh/include/uapi/asm/ioctls.h
@@ -93,6 +93,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define TIOCSERCONFIG _IO('T', 83) /* 0x5453 */
#define TIOCSERGWILD _IOR('T', 84, int) /* 0x5454 */
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index dd27159819eb..b395e5620c0b 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
.current_ds = KERNEL_DS, \
}
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
/* Do necessary setup to start up a newly executed thread. */
static inline void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index b58ee9018433..f04dc5a43062 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
#include <linux/types.h>
#include <asm/fpumacro.h>
-/* Return saved PC of a blocked thread. */
struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
/* On Uniprocessor, even in RMO processes see TSO semantics */
#ifdef CONFIG_SMP
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
deleted file mode 100644
index 48c34c19f810..000000000000
--- a/arch/sparc/include/asm/siginfo.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __SPARC_SIGINFO_H
-#define __SPARC_SIGINFO_H
-
-#include <uapi/asm/siginfo.h>
-
-
-#ifdef CONFIG_COMPAT
-
-struct compat_siginfo;
-
-#endif /* CONFIG_COMPAT */
-
-#endif /* !(__SPARC_SIGINFO_H) */
diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h
index 06b3f6c3bb9a..6d27398632ea 100644
--- a/arch/sparc/include/uapi/asm/ioctls.h
+++ b/arch/sparc/include/uapi/asm/ioctls.h
@@ -27,7 +27,7 @@
#define TIOCGRS485 _IOR('T', 0x41, struct serial_rs485)
#define TIOCSRS485 _IOWR('T', 0x42, struct serial_rs485)
-/* Note that all the ioctls that are not available in Linux have a
+/* Note that all the ioctls that are not available in Linux have a
* double underscore on the front to: a) avoid some programs to
* think we support some ioctls under Linux (autoconfiguration stuff)
*/
@@ -88,6 +88,7 @@
#define TIOCGPTN _IOR('t', 134, unsigned int) /* Get Pty Number */
#define TIOCSPTLCK _IOW('t', 135, int) /* Lock/unlock PTY */
#define TIOCSIG _IOW('t', 136, int) /* Generate signal on Pty slave */
+#define TIOCGPTPEER _IOR('t', 137, int) /* Safely open the slave */
/* Little f */
#define FIOCLEX _IO('f', 1)
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 07933b9e9ce0..93adde1ac166 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -41,12 +41,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
val = 0x01000000;
}
- get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
#endif
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b6dac8e980f0..9245f93398c7 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
}
/*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- return task_thread_info(tsk)->kpc;
-}
-
-/*
* Free current thread data structures etc..
*/
void exit_thread(struct task_struct *tsk)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc493e62e..b96104da5bd6 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
#endif
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct thread_info *ti = task_thread_info(tsk);
- unsigned long ret = 0xdeadbeefUL;
-
- if (ti && ti->ksp) {
- unsigned long *sp;
- sp = (unsigned long *)(ti->ksp + STACK_BIAS);
- if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
- sp[14]) {
- unsigned long *fp;
- fp = (unsigned long *)(sp[14] + STACK_BIAS);
- if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
- ret = fp[15];
- }
- }
- return ret;
-}
-
/* Free current thread data structures etc.. */
void exit_thread(struct task_struct *tsk)
{
diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
index 075d38980dee..7dd9b57f3a61 100644
--- a/arch/sparc/kernel/vio.c
+++ b/arch/sparc/kernel/vio.c
@@ -105,6 +105,7 @@ static ssize_t devspec_show(struct device *dev,
return sprintf(buf, "%s\n", str);
}
+static DEVICE_ATTR_RO(devspec);
static ssize_t type_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -112,6 +113,7 @@ static ssize_t type_show(struct device *dev,
struct vio_dev *vdev = to_vio_dev(dev);
return sprintf(buf, "%s\n", vdev->type);
}
+static DEVICE_ATTR_RO(type);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -120,17 +122,19 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "vio:T%sS%s\n", vdev->type, vdev->compat);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute vio_dev_attrs[] = {
- __ATTR_RO(devspec),
- __ATTR_RO(type),
- __ATTR_RO(modalias),
- __ATTR_NULL
-};
+static struct attribute *vio_dev_attrs[] = {
+ &dev_attr_devspec.attr,
+ &dev_attr_type.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+ };
+ATTRIBUTE_GROUPS(vio_dev);
static struct bus_type vio_bus_type = {
.name = "vio",
- .dev_attrs = vio_dev_attrs,
+ .dev_groups = vio_dev_groups,
.uevent = vio_hotplug,
.match = vio_bus_match,
.probe = vio_device_probe,
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 0bc9968b97a1..f71e5206650b 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t) ((t)->thread.pc)
-
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
diff --git a/arch/tile/kernel/jump_label.c b/arch/tile/kernel/jump_label.c
index 07802d586988..93931a46625b 100644
--- a/arch/tile/kernel/jump_label.c
+++ b/arch/tile/kernel/jump_label.c
@@ -45,14 +45,12 @@ static void __jump_label_transform(struct jump_entry *e,
void arch_jump_label_transform(struct jump_entry *e,
enum jump_label_type type)
{
- get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(e, type);
flush_icache_range(e->code, e->code + sizeof(tilegx_bundle_bits));
mutex_unlock(&text_mutex);
- put_online_cpus();
}
__init_or_module void arch_jump_label_transform_static(struct jump_entry *e,
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 1a70e6c0f259..94709ab41ed8 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -24,8 +24,7 @@
* has an opportunity to return -EFAULT to the user if needed.
* The 64-bit routines just return a "long long" with the value,
* since they are only used from kernel space and don't expect to fault.
- * Support for 16-bit ops is included in the framework but we don't provide
- * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ * Support for 16-bit ops is included in the framework but we don't provide any.
*
* Note that the caller is advised to issue a suitable L1 or L2
* prefetch on the address being manipulated to avoid extra stalls.
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
blk_end_request(
(*irq_req_buffer)[count]->req,
- 0,
+ BLK_STS_OK,
(*irq_req_buffer)[count]->length
);
kfree((*irq_req_buffer)[count]);
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 2d1e0dd5bb0b..f6d1a3f747a9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
{
}
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
static inline void mm_copy_segments(struct mm_struct *from_mm,
struct mm_struct *new_mm)
{
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 64a1fd06f3fd..7b5640117325 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
__attribute__((__section__(".data..init_irqstack"))) =
{ INIT_THREAD_INFO(init_task) };
-unsigned long thread_saved_pc(struct task_struct *task)
-{
- /* FIXME: Need to look up userspace_pid by cpu */
- return os_process_pc(userspace_pid[0]);
-}
-
/* Changed in setup_arch, which is called in early boot */
static char host_info[(__NEW_UTS_LEN + 1) * 5];
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index e9ad511c1043..7a53a55341de 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -44,7 +44,6 @@ generic-y += serial.h
generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
-generic-y += siginfo.h
generic-y += signal.h
generic-y += sizes.h
generic-y += socket.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 13a97aa2285f..1c44d3b3eba0 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -2,3 +2,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
+generic-y += siginfo.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0efb4c9497bc..e767ed24aeb4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,7 +69,7 @@ config X86
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
- select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select BUILDTIME_EXTABLE_SORT
@@ -87,6 +87,8 @@ config X86
select GENERIC_EARLY_IOREMAP
select GENERIC_FIND_FIRST_BIT
select GENERIC_IOMAP
+ select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
+ select GENERIC_IRQ_MIGRATION if SMP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_PENDING_IRQ if SMP
@@ -166,6 +168,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select IRQ_FORCED_THREADING
+ select PCI_LOCKLESS_CONFIG
select PERF_EVENTS
select RTC_LIB
select RTC_MC146818_LIB
@@ -1082,7 +1085,7 @@ config X86_MCE_THRESHOLD
def_bool y
config X86_MCE_INJECT
- depends on X86_MCE && X86_LOCAL_APIC && X86_MCELOG_LEGACY
+ depends on X86_MCE && X86_LOCAL_APIC && DEBUG_FS
tristate "Machine check injector support"
---help---
Provide support for injecting machine checks for testing purposes.
@@ -2793,6 +2796,9 @@ config X86_DMA_REMAP
bool
depends on STA2X11
+config HAVE_GENERIC_GUP
+ def_bool y
+
source "net/Kconfig"
source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index bf240b920473..ad2db82e9953 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -257,8 +257,6 @@ drivers-$(CONFIG_PM) += arch/x86/power/
drivers-$(CONFIG_FB) += arch/x86/video/
-drivers-$(CONFIG_RAS) += arch/x86/ras/
-
####
# boot loader support. Several targets are kept for legacy purposes
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 73ccf63b0f48..9dc1ce6ba3c0 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -13,7 +13,7 @@ static inline char rdfs8(addr_t addr)
return *((char *)(fs + addr));
}
#include "../cmdline.c"
-static unsigned long get_cmd_line_ptr(void)
+unsigned long get_cmd_line_ptr(void)
{
unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cbf4b87f55b9..c3e869eaef0c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
memset((char *)gdt->address, 0x0, gdt->size);
desc = (struct desc_struct *)gdt->address;
- /* The first GDT is a dummy and the second is unused. */
- desc += 2;
+ /* The first GDT is a dummy. */
+ desc++;
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* __KERNEL32_CS */
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+ } else {
+ /* Second entry is unused on 32-bit */
+ desc++;
+ }
+ /* __KERNEL_CS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
desc->p = 1;
desc->limit = 0xf;
desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ desc->l = 1;
+ desc->d = 0;
+ } else {
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ }
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
-
desc++;
+
+ /* __KERNEL_DS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
desc->d = SEG_OP_SIZE_32BIT;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
-
-#ifdef CONFIG_X86_64
- /* Task segment value */
desc++;
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
-#endif /* CONFIG_X86_64 */
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /* Task segment value */
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+ desc++;
+ }
asm volatile("cli");
asm volatile ("lgdt %0" : : "m" (*gdt));
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d2ae1f821e0c..fbf4c32d0b62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -346,6 +346,48 @@ preferred_addr:
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
+#ifdef CONFIG_X86_5LEVEL
+ /* Check if 5-level paging has already enabled */
+ movq %cr4, %rax
+ testl $X86_CR4_LA57, %eax
+ jnz lvl5
+
+ /*
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we want to enable 5-level paging.
+ *
+ * The problem is that we cannot do it directly. Setting LA57 in
+ * long mode would trigger #GP. So we need to switch off long mode
+ * first.
+ *
+ * NOTE: This is not going to work if bootloader put us above 4G
+ * limit.
+ *
+ * The first step is go into compatibility mode.
+ */
+
+ /* Clear additional page table */
+ leaq lvl5_pgtable(%rbx), %rdi
+ xorq %rax, %rax
+ movq $(PAGE_SIZE/8), %rcx
+ rep stosq
+
+ /*
+ * Setup current CR3 as the first and only entry in a new top level
+ * page table.
+ */
+ movq %cr3, %rdi
+ leaq 0x7 (%rdi), %rax
+ movq %rax, lvl5_pgtable(%rbx)
+
+ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+ pushq $__KERNEL32_CS
+ leaq compatible_mode(%rip), %rax
+ pushq %rax
+ lretq
+lvl5:
+#endif
+
/* Zero EFLAGS */
pushq $0
popfq
@@ -429,6 +471,44 @@ relocated:
jmp *%rax
.code32
+#ifdef CONFIG_X86_5LEVEL
+compatible_mode:
+ /* Setup data and stack segments */
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %ss
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Point CR3 to 5-level paging */
+ leal lvl5_pgtable(%ebx), %eax
+ movl %eax, %cr3
+
+ /* Enable PAE and LA57 mode */
+ movl %cr4, %eax
+ orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
+ movl %eax, %cr4
+
+ /* Calculate address we are running at */
+ call 1f
+1: popl %edi
+ subl $1b, %edi
+
+ /* Prepare stack for far return to Long Mode */
+ pushl $__KERNEL_CS
+ leal lvl5(%edi), %eax
+ push %eax
+
+ /* Enable paging back */
+ movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl %eax, %cr0
+
+ lret
+#endif
+
no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
@@ -442,7 +522,7 @@ gdt:
.word gdt_end - gdt
.long gdt
.word 0
- .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
@@ -486,3 +566,7 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
+#ifdef CONFIG_X86_5LEVEL
+lvl5_pgtable:
+ .fill PAGE_SIZE, 1, 0
+#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 54c24f0a43d3..91f27ab970ef 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -9,16 +9,42 @@
* contain the entire properly aligned running kernel image.
*
*/
+
+/*
+ * isspace() in linux/ctype.h is expected by next_args() to filter
+ * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h,
+ * since isdigit() is implemented in both of them. Hence disable it
+ * here.
+ */
+#define BOOT_CTYPE_H
+
+/*
+ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
+ * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
+ * which is meaningless and will cause compiling error in some cases.
+ * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
+ * as empty.
+ */
+#define _LINUX_EXPORT_H
+#define EXPORT_SYMBOL(sym)
+
#include "misc.h"
#include "error.h"
-#include "../boot.h"
+#include "../string.h"
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
+#include <linux/ctype.h>
#include <generated/utsrelease.h>
+/* Macros used by the included decompressor code below. */
+#define STATIC
+#include <linux/decompress/mm.h>
+
+extern unsigned long get_cmd_line_ptr(void);
+
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
@@ -62,6 +88,11 @@ struct mem_vector {
static bool memmap_too_large;
+
+/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
+unsigned long long mem_limit = ULLONG_MAX;
+
+
enum mem_avoid_index {
MEM_AVOID_ZO_RANGE = 0,
MEM_AVOID_INITRD,
@@ -85,49 +116,14 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
return true;
}
-/**
- * _memparse - Parse a string with mem suffixes into a number
- * @ptr: Where parse begins
- * @retptr: (output) Optional pointer to next char after parse completes
- *
- * Parses a string into a number. The number stored at @ptr is
- * potentially suffixed with K, M, G, T, P, E.
- */
-static unsigned long long _memparse(const char *ptr, char **retptr)
+char *skip_spaces(const char *str)
{
- char *endptr; /* Local pointer to end of parsed string */
-
- unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
-
- switch (*endptr) {
- case 'E':
- case 'e':
- ret <<= 10;
- case 'P':
- case 'p':
- ret <<= 10;
- case 'T':
- case 't':
- ret <<= 10;
- case 'G':
- case 'g':
- ret <<= 10;
- case 'M':
- case 'm':
- ret <<= 10;
- case 'K':
- case 'k':
- ret <<= 10;
- endptr++;
- default:
- break;
- }
-
- if (retptr)
- *retptr = endptr;
-
- return ret;
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
}
+#include "../../../../lib/ctype.c"
+#include "../../../../lib/cmdline.c"
static int
parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
@@ -142,40 +138,41 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
return -EINVAL;
oldp = p;
- *size = _memparse(p, &p);
+ *size = memparse(p, &p);
if (p == oldp)
return -EINVAL;
switch (*p) {
- case '@':
- /* Skip this region, usable */
- *start = 0;
- *size = 0;
- return 0;
case '#':
case '$':
case '!':
- *start = _memparse(p + 1, &p);
+ *start = memparse(p + 1, &p);
+ return 0;
+ case '@':
+ /* memmap=nn@ss specifies usable region, should be skipped */
+ *size = 0;
+ /* Fall through */
+ default:
+ /*
+ * If w/o offset, only size specified, memmap=nn[KMG] has the
+ * same behaviour as mem=nn[KMG]. It limits the max address
+ * system can use. Region above the limit should be avoided.
+ */
+ *start = 0;
return 0;
}
return -EINVAL;
}
-static void mem_avoid_memmap(void)
+static void mem_avoid_memmap(char *str)
{
- char arg[128];
+ static int i;
int rc;
- int i;
- char *str;
- /* See if we have any memmap areas */
- rc = cmdline_find_option("memmap", arg, sizeof(arg));
- if (rc <= 0)
+ if (i >= MAX_MEMMAP_REGIONS)
return;
- i = 0;
- str = arg;
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
unsigned long long start, size;
@@ -188,9 +185,14 @@ static void mem_avoid_memmap(void)
if (rc < 0)
break;
str = k;
- /* A usable region that should not be skipped */
- if (size == 0)
+
+ if (start == 0) {
+ /* Store the specified memory limit if size > 0 */
+ if (size > 0)
+ mem_limit = size;
+
continue;
+ }
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start;
mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size;
@@ -202,6 +204,57 @@ static void mem_avoid_memmap(void)
memmap_too_large = true;
}
+static int handle_mem_memmap(void)
+{
+ char *args = (char *)get_cmd_line_ptr();
+ size_t len = strlen((char *)args);
+ char *tmp_cmdline;
+ char *param, *val;
+ u64 mem_size;
+
+ if (!strstr(args, "memmap=") && !strstr(args, "mem="))
+ return 0;
+
+ tmp_cmdline = malloc(len + 1);
+ if (!tmp_cmdline )
+ error("Failed to allocate space for tmp_cmdline");
+
+ memcpy(tmp_cmdline, args, len);
+ tmp_cmdline[len] = 0;
+ args = tmp_cmdline;
+
+ /* Chew leading spaces */
+ args = skip_spaces(args);
+
+ while (*args) {
+ args = next_arg(args, &param, &val);
+ /* Stop at -- */
+ if (!val && strcmp(param, "--") == 0) {
+ warn("Only '--' specified in cmdline");
+ free(tmp_cmdline);
+ return -1;
+ }
+
+ if (!strcmp(param, "memmap")) {
+ mem_avoid_memmap(val);
+ } else if (!strcmp(param, "mem")) {
+ char *p = val;
+
+ if (!strcmp(p, "nopentium"))
+ continue;
+ mem_size = memparse(p, &p);
+ if (mem_size == 0) {
+ free(tmp_cmdline);
+ return -EINVAL;
+ }
+ mem_limit = mem_size;
+ }
+ }
+
+ free(tmp_cmdline);
+ return 0;
+}
+
/*
* In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
* The mem_avoid array is used to store the ranges that need to be avoided
@@ -323,7 +376,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* We don't need to set a mapping for setup_data. */
/* Mark the memmap regions we need to avoid */
- mem_avoid_memmap();
+ handle_mem_memmap();
#ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */
@@ -432,7 +485,8 @@ static void process_e820_entry(struct boot_e820_entry *entry,
{
struct mem_vector region, overlap;
struct slot_area slot_area;
- unsigned long start_orig;
+ unsigned long start_orig, end;
+ struct boot_e820_entry cur_entry;
/* Skip non-RAM entries. */
if (entry->type != E820_TYPE_RAM)
@@ -446,8 +500,15 @@ static void process_e820_entry(struct boot_e820_entry *entry,
if (entry->addr + entry->size < minimum)
return;
- region.start = entry->addr;
- region.size = entry->size;
+ /* Ignore entries above memory limit */
+ end = min(entry->size + entry->addr, mem_limit);
+ if (entry->addr >= end)
+ return;
+ cur_entry.addr = entry->addr;
+ cur_entry.size = end - entry->addr;
+
+ region.start = cur_entry.addr;
+ region.size = cur_entry.size;
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
@@ -461,7 +522,7 @@ static void process_e820_entry(struct boot_e820_entry *entry,
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above this e820 region? */
- if (region.start > entry->addr + entry->size)
+ if (region.start > cur_entry.addr + cur_entry.size)
return;
/* Reduce size by any delta from the original address. */
@@ -564,9 +625,6 @@ void choose_random_location(unsigned long input,
{
unsigned long random_addr, min_addr;
- /* By default, keep output position unchanged. */
- *virt_addr = *output;
-
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
return;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f030ce..00241c815524 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
- unsigned long virt_addr = (unsigned long)output;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
+ if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+ error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
error("Destination address too large");
@@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Destination address does not match LOAD_PHYSICAL_ADDR");
- if ((unsigned long)output != virt_addr)
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355eadbd1..766a5211f827 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
unsigned long output_size,
unsigned long *virt_addr)
{
- /* No change from existing output location. */
- *virt_addr = *output;
}
#endif
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 1d78f1739087..28029be47fbb 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context)
static struct alloc_pgt_data pgt_data;
/* The top level page table entry pointer. */
-static unsigned long level4p;
+static unsigned long top_level_pgt;
/*
* Mapping information structure passed to kernel_ident_mapping_init().
@@ -91,9 +91,15 @@ void initialize_identity_maps(void)
* If we came here via startup_32(), cr3 will be _pgtable already
* and we must append to the existing area instead of entirely
* overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
*/
- level4p = read_cr3();
- if (level4p == (unsigned long)_pgtable) {
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
@@ -103,7 +109,7 @@ void initialize_identity_maps(void)
pgt_data.pgt_buf = _pgtable;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- level4p = (unsigned long)alloc_pgt_page(&pgt_data);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
}
}
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
return;
/* Build the mapping. */
- kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
+ kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
start, end);
}
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
*/
void finalize_identity_maps(void)
{
- write_cr3(level4p);
+ write_cr3(top_level_pgt);
}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 1eb7d298b47d..15d9f74b0008 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -65,23 +65,3 @@ GLOBAL(copy_to_fs)
popw %es
retl
ENDPROC(copy_to_fs)
-
-#if 0 /* Not currently used, but can be enabled as needed */
-GLOBAL(copy_from_gs)
- pushw %ds
- pushw %gs
- popw %ds
- calll memcpy
- popw %ds
- retl
-ENDPROC(copy_from_gs)
-
-GLOBAL(copy_to_gs)
- pushw %es
- pushw %gs
- popw %es
- calll memcpy
- popw %es
- retl
-ENDPROC(copy_to_gs)
-#endif
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 5457b02fc050..630e3664906b 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -122,6 +122,14 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
return result;
}
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoull(cp + 1, endp, base);
+
+ return simple_strtoull(cp, endp, base);
+}
+
/**
* strlen - Find the length of a string
* @s: The string to be sized
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 113588ddb43f..f274a50db5fa 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -22,6 +22,7 @@ extern int strcmp(const char *str1, const char *str2);
extern int strncmp(const char *cs, const char *ct, size_t count);
extern size_t strlen(const char *s);
extern char *strstr(const char *s1, const char *s2);
+extern char *strchr(const char *s, int c);
extern size_t strnlen(const char *s, size_t maxlen);
extern unsigned int atou(const char *s);
extern unsigned long long simple_strtoull(const char *cp, char **endp,
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 34b3fa2889d1..9e32d40d71bd 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,8 @@
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
index 2f8756375df5..2e14acc3da25 100644
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ b/arch/x86/crypto/sha1-mb/Makefile
@@ -2,6 +2,8 @@
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
index 41089e7c400c..45b4fca6c4a8 100644
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ b/arch/x86/crypto/sha256-mb/Makefile
@@ -2,6 +2,8 @@
# Arch-specific CryptoAPI modules.
#
+OBJECT_FILES_NON_STANDARD := y
+
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4a4c0834f965..a9a8027a6c0e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -265,7 +265,8 @@ return_from_SYSCALL_64:
* If width of "canonical tail" ever becomes variable, this will need
* to be updated to remain correct on both old and new CPUs.
*
- * Change top 16 bits to be the sign-extension of 47th bit
+ * Change top bits to match most significant bit (47th or 56th bit
+ * depending on paging mode) in the address.
*/
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 580b60f5ac83..ff1ea2fb9705 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1750,6 +1750,8 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
return ret;
}
+static struct attribute_group x86_pmu_attr_group;
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1813,6 +1815,14 @@ static int __init init_hw_perf_events(void)
x86_pmu_events_group.attrs = tmp;
}
+ if (x86_pmu.attrs) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
+ if (!WARN_ON(!tmp))
+ x86_pmu_attr_group.attrs = tmp;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -2101,8 +2111,7 @@ static int x86_pmu_event_init(struct perf_event *event)
static void refresh_pce(void *ignored)
{
- if (current->active_mm)
- load_mm_cr4(current->active_mm);
+ load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
static void x86_pmu_event_mapped(struct perf_event *event)
@@ -2224,7 +2233,6 @@ void perf_check_microcode(void)
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
@@ -2255,7 +2263,7 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
- struct cyc2ns_data *data;
+ struct cyc2ns_data data;
u64 offset;
userpg->cap_user_time = 0;
@@ -2267,17 +2275,17 @@ void arch_perf_update_userpage(struct perf_event *event,
if (!using_native_sched_clock() || !sched_clock_stable())
return;
- data = cyc2ns_read_begin();
+ cyc2ns_read_begin(&data);
- offset = data->cyc2ns_offset + __sched_clock_offset;
+ offset = data.cyc2ns_offset + __sched_clock_offset;
/*
* Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain.
*/
userpg->cap_user_time = 1;
- userpg->time_mult = data->cyc2ns_mul;
- userpg->time_shift = data->cyc2ns_shift;
+ userpg->time_mult = data.cyc2ns_mul;
+ userpg->time_shift = data.cyc2ns_shift;
userpg->time_offset = offset - now;
/*
@@ -2289,7 +2297,7 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->time_zero = offset;
}
- cyc2ns_read_end(data);
+ cyc2ns_read_end();
}
void
@@ -2334,7 +2342,7 @@ static unsigned long get_segment_base(unsigned int segment)
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
- if (!ldt || idx > ldt->size)
+ if (!ldt || idx > ldt->nr_entries)
return 0;
desc = &ldt->entries[idx];
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a6d91d4e37a1..aa62437d1aa1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
- [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
- [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */
+ [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
@@ -3160,6 +3160,19 @@ err:
return -ENOMEM;
}
+static void flip_smm_bit(void *data)
+{
+ unsigned long set = *(unsigned long *)data;
+
+ if (set > 0) {
+ msr_set_bit(MSR_IA32_DEBUGCTLMSR,
+ DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+ } else {
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
+ DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
+ }
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -3174,6 +3187,8 @@ static void intel_pmu_cpu_starting(int cpu)
cpuc->lbr_sel = NULL;
+ flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+
if (!cpuc->shared_regs)
return;
@@ -3410,12 +3425,10 @@ static void intel_snb_check_microcode(void)
int pebs_broken = 0;
int cpu;
- get_online_cpus();
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
- put_online_cpus();
if (pebs_broken == x86_pmu.pebs_broken)
return;
@@ -3488,7 +3501,9 @@ static bool check_msr(unsigned long msr, u64 mask)
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
+ cpus_read_lock();
intel_snb_check_microcode();
+ cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -3595,6 +3610,52 @@ static struct attribute *hsw_events_attrs[] = {
NULL
};
+static ssize_t freeze_on_smi_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
+}
+
+static DEFINE_MUTEX(freeze_on_smi_mutex);
+
+static ssize_t freeze_on_smi_store(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val;
+ ssize_t ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val > 1)
+ return -EINVAL;
+
+ mutex_lock(&freeze_on_smi_mutex);
+
+ if (x86_pmu.attr_freeze_on_smi == val)
+ goto done;
+
+ x86_pmu.attr_freeze_on_smi = val;
+
+ get_online_cpus();
+ on_each_cpu(flip_smm_bit, &val, 1);
+ put_online_cpus();
+done:
+ mutex_unlock(&freeze_on_smi_mutex);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(freeze_on_smi);
+
+static struct attribute *intel_pmu_attrs[] = {
+ &dev_attr_freeze_on_smi.attr,
+ NULL,
+};
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -3641,6 +3702,8 @@ __init int intel_pmu_init(void)
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
+ x86_pmu.attrs = intel_pmu_attrs;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
@@ -4112,13 +4175,12 @@ static __init int fixup_ht_bug(void)
lockup_detector_resume();
- get_online_cpus();
+ cpus_read_lock();
- for_each_online_cpu(c) {
+ for_each_online_cpu(c)
free_excl_cntrs(c);
- }
- put_online_cpus();
+ cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 8c00dc09a5d2..2521f771f2f5 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -1682,7 +1682,7 @@ static int __init intel_cqm_init(void)
*
* Also, check that the scales match on all cpus.
*/
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1746,14 +1746,14 @@ static int __init intel_cqm_init(void)
* Setup the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.
*/
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
- "perf/x86/cqm:starting",
- intel_cqm_cpu_starting, NULL);
- cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "perf/x86/cqm:online",
- NULL, intel_cqm_cpu_exit);
-
+ cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_STARTING,
+ "perf/x86/cqm:starting",
+ intel_cqm_cpu_starting, NULL);
+ cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_ONLINE,
+ "perf/x86/cqm:online",
+ NULL, intel_cqm_cpu_exit);
out:
- put_online_cpus();
+ cpus_read_unlock();
if (ret) {
kfree(str);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f924629836a8..eb261656a320 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -18,7 +18,7 @@ enum {
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
};
-static enum {
+static const enum {
LBR_EIP_FLAGS = 1,
LBR_TSX = 2,
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
@@ -287,7 +287,7 @@ inline u64 lbr_from_signext_quirk_wr(u64 val)
/*
* If quirk is needed, ensure sign extension is 61 bits:
*/
-u64 lbr_from_signext_quirk_rd(u64 val)
+static u64 lbr_from_signext_quirk_rd(u64 val)
{
if (static_branch_unlikely(&lbr_from_quirk_key)) {
/*
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 758c1aa5009d..44ec523287f6 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
- if (!box && atomic_inc_return(&box->refcnt) == 1)
+ if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index be3d36254040..53728eea1bed 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -562,6 +562,9 @@ struct x86_pmu {
ssize_t (*events_sysfs_show)(char *page, u64 config);
struct attribute **cpu_events;
+ unsigned long attr_freeze_on_smi;
+ struct attribute **attrs;
+
/*
* CPU Hotplug hooks
*/
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 00c88a01301d..da181ad1d5f8 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,6 +3,7 @@
#include <linux/ioport.h>
#include <linux/pci.h>
+#include <linux/refcount.h>
struct amd_nb_bus_dev_range {
u8 bus;
@@ -55,7 +56,7 @@ struct threshold_bank {
struct threshold_block *blocks;
/* initialized to the number of CPUs on the node sharing this bank */
- atomic_t cpus;
+ refcount_t cpus;
};
struct amd_northbridge {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bdffcd9eab2b..5f01671c68f2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -252,6 +252,8 @@ static inline int x2apic_enabled(void) { return 0; }
#define x2apic_supported() (0)
#endif /* !CONFIG_X86_X2APIC */
+struct irq_data;
+
/*
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
@@ -296,9 +298,9 @@ struct apic {
/* Can't be NULL on 64-bit */
unsigned long (*set_apic_id)(unsigned int id);
- int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid);
+ int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
+ struct irq_data *irqdata,
+ unsigned int *apicid);
/* ipi */
void (*send_IPI)(int cpu, int vector);
@@ -540,28 +542,12 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
#endif
-static inline int
-flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
- cpumask_bits(andmask)[0] &
- cpumask_bits(cpu_online_mask)[0] &
- APIC_ALL_CPUS;
-
- if (likely(cpu_mask)) {
- *apicid = (unsigned int)cpu_mask;
- return 0;
- } else {
- return -EINVAL;
- }
-}
-
-extern int
-default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid);
+extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
+ struct irq_data *irqdata,
+ unsigned int *apicid);
+extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
+ struct irq_data *irqdata,
+ unsigned int *apicid);
static inline void
flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index caa5798c92f4..33380b871463 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
-/**
- * atomic_inc_short - increment of a short integer
- * @v: pointer to type int
- *
- * Atomically adds 1 to @v
- * Returns the new value of @u
- */
-static __always_inline short int atomic_inc_short(short int *v)
-{
- asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
- return *v;
-}
-
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 2f77bcefe6b4..d2ff779f347e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -74,7 +74,7 @@ struct efi_scratch {
__kernel_fpu_begin(); \
\
if (efi_scratch.use_pgd) { \
- efi_scratch.prev_cr3 = read_cr3(); \
+ efi_scratch.prev_cr3 = __read_cr3(); \
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 59405a248fc2..9b76cd331990 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,8 +22,8 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
- unsigned int irq_tlb_count;
#endif
+ unsigned int irq_tlb_count;
#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
#endif
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 16d3fa211962..668cca540025 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -29,7 +29,6 @@ struct irq_desc;
#include <linux/cpumask.h>
extern int check_irq_vectors_for_cpu_disable(void);
extern void fixup_irqs(void);
-extern void irq_force_complete_move(struct irq_desc *desc);
#endif
#ifdef CONFIG_HAVE_KVM
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index a210eba2727c..023b4a9fc846 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -55,7 +55,8 @@ extern struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info);
/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
-extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent);
+extern struct irq_domain *
+arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
/* Get parent irqdomain for interrupt remapping irqdomain */
static inline struct irq_domain *arch_get_ir_parent_domain(void)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 055962615779..722d0e568863 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3f9a3d2a5209..181264989db5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -285,10 +285,6 @@ int mce_notify_irq(void);
DECLARE_PER_CPU(struct mce, injectm);
-extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
- const char __user *ubuf,
- size_t usize, loff_t *off));
-
/* Disable CMCI/polling for MCA bank claimed by firmware */
extern void mce_disable_bank(int bank);
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index f9813b6d8b80..79b647a7ebd0 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -37,12 +37,6 @@ typedef struct {
#endif
} mm_context_t;
-#ifdef CONFIG_SMP
void leave_mm(int cpu);
-#else
-static inline void leave_mm(int cpu)
-{
-}
-#endif
#endif /* _ASM_X86_MMU_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 68b329d77b3a..ecfcb6643c9b 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -47,7 +47,7 @@ struct ldt_struct {
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
- unsigned int size;
+ unsigned int nr_entries;
};
/*
@@ -87,22 +87,46 @@ static inline void load_mm_ldt(struct mm_struct *mm)
*/
if (unlikely(ldt))
- set_ldt(ldt->entries, ldt->size);
+ set_ldt(ldt->entries, ldt->nr_entries);
else
clear_LDT();
#else
clear_LDT();
#endif
+}
+
+static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ /*
+ * Load the LDT if either the old or new mm had an LDT.
+ *
+ * An mm will never go from having an LDT to not having an LDT. Two
+ * mms never share an LDT, so we don't gain anything by checking to
+ * see whether the LDT changed. There's also no guarantee that
+ * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
+ * then prev->context.ldt will also be non-NULL.
+ *
+ * If we really cared, we could optimize the case where prev == next
+ * and we're exiting lazy mode. Most of the time, if this happens,
+ * we don't actually need to reload LDTR, but modify_ldt() is mostly
+ * used by legacy code and emulators where we don't need this level of
+ * performance.
+ *
+ * This uses | instead of || because it generates better code.
+ */
+ if (unlikely((unsigned long)prev->context.ldt |
+ (unsigned long)next->context.ldt))
+ load_mm_ldt(next);
+#endif
DEBUG_LOCKS_WARN_ON(preemptible());
}
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
-#ifdef CONFIG_SMP
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
}
static inline int init_new_context(struct task_struct *tsk,
@@ -220,18 +244,6 @@ static inline int vma_pkey(struct vm_area_struct *vma)
}
#endif
-static inline bool __pkru_allows_pkey(u16 pkey, bool write)
-{
- u32 pkru = read_pkru();
-
- if (!__pkru_allows_read(pkru, pkey))
- return false;
- if (write && !__pkru_allows_write(pkru, pkey))
- return false;
-
- return true;
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
@@ -268,4 +280,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
+
+/*
+ * This can be used from process context to figure out what the value of
+ * CR3 is without needing to do a (slow) __read_cr3().
+ *
+ * It's intended to be used for code like KVM that sneakily changes CR3
+ * and needs to restore it. It needs to be used very carefully.
+ */
+static inline unsigned long __get_current_cr3_fast(void)
+{
+ unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+
+ /* For now, be very restrictive about when this can be called. */
+ VM_WARN_ON(in_nmi() || !in_atomic());
+
+ VM_BUG_ON(cr3 != __read_cr3());
+ return cr3;
+}
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index fba100713924..2b58c8c1eeaa 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
#define _ASM_X86_MSHYPER_H
#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
#include <asm/hyperv.h>
/*
@@ -137,7 +136,6 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
}
}
-#define hv_get_current_tick(tick) rdmsrl(HV_X64_MSR_TIME_REF_COUNT, tick)
#define hv_init_timer(timer, tick) wrmsrl(timer, tick)
#define hv_init_timer_config(config, val) wrmsrl(config, val)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 673f9ac50f6d..18b162322eff 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -137,6 +137,8 @@
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
#define MSR_PEBS_FRONTEND 0x000003f7
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 55fa56fe4e45..9ccac1926587 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
}
-static inline unsigned long read_cr3(void)
+static inline unsigned long __read_cr3(void)
{
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
}
@@ -118,7 +118,7 @@ static inline u64 paravirt_read_msr(unsigned msr)
static inline void paravirt_write_msr(unsigned msr,
unsigned low, unsigned high)
{
- return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
+ PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
}
static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
@@ -312,11 +312,9 @@ static inline void __flush_tlb_single(unsigned long addr)
}
static inline void flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ const struct flush_tlb_info *info)
{
- PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
+ PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7465d6fe336f..cb976bab6299 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -51,6 +51,7 @@ struct mm_struct;
struct desc_struct;
struct task_struct;
struct cpumask;
+struct flush_tlb_info;
/*
* Wrapper type for pointers to code which uses the non-standard
@@ -223,9 +224,7 @@ struct pv_mmu_ops {
void (*flush_tlb_kernel)(void);
void (*flush_tlb_single)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end);
+ const struct flush_tlb_info *info);
/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f513cc231151..473a7295ab10 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -77,14 +77,8 @@ static inline bool is_vmd(struct pci_bus *bus)
extern unsigned int pcibios_assign_all_busses(void);
extern int pci_legacy_init(void);
-# ifdef CONFIG_ACPI
-# define x86_default_pci_init pci_acpi_init
-# else
-# define x86_default_pci_init pci_legacy_init
-# endif
#else
-# define pcibios_assign_all_busses() 0
-# define x86_default_pci_init NULL
+static inline int pcibios_assign_all_busses(void) { return 0; }
#endif
extern unsigned long pci_mem_start;
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 50d35e3185f5..c8821bab938f 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -212,4 +212,51 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
+#define gup_get_pte gup_get_pte
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking
+ * any locks. For this we would like to load the pointers atomically,
+ * but that is not possible (without expensive cmpxchg8b) on PAE. What
+ * we do have is the guarantee that a PTE will only either go from not
+ * present to present, or present to not present or both -- it will not
+ * switch to a completely different present page without a TLB flush in
+ * between; something that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' iff pte_high
+ * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
+ * don't see an older value of pte_high. *Then* we recheck pte_low,
+ * which ensures that we haven't picked up a changed pte high. We might
+ * have gotten rubbish values from pte_low and pte_high, but we are
+ * guaranteed that pte_low will not have the present bit set *unless*
+ * it is 'l'. Because get_user_pages_fast() only operates on present ptes
+ * we're safe.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+ pte_t pte;
+
+ do {
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ } while (unlikely(pte.pte_low != ptep->pte_low));
+
+ return pte;
+}
+
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f5af95a0c6b8..77037b6f1caa 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -244,6 +244,11 @@ static inline int pud_devmap(pud_t pud)
return 0;
}
#endif
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+ return 0;
+}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -917,7 +922,7 @@ extern pgd_t trampoline_pgd_entry;
static inline void __meminit init_trampoline_default(void)
{
/* Default trampoline pgd value */
- trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
+ trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
}
# ifdef CONFIG_RANDOMIZE_MEMORY
void __meminit init_trampoline(void);
@@ -1185,6 +1190,54 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags)
#endif
}
+static inline bool __pkru_allows_pkey(u16 pkey, bool write)
+{
+ u32 pkru = read_pkru();
+
+ if (!__pkru_allows_read(pkru, pkey))
+ return false;
+ if (write && !__pkru_allows_write(pkru, pkey))
+ return false;
+
+ return true;
+}
+
+/*
+ * 'pteval' can come from a PTE, PMD or PUD. We only check
+ * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
+ * same value on all 3 types.
+ */
+static inline bool __pte_access_permitted(unsigned long pteval, bool write)
+{
+ unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_RW;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return 0;
+
+ return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ return __pte_access_permitted(pte_val(pte), write);
+}
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+ return __pte_access_permitted(pmd_val(pmd), write);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+ return __pte_access_permitted(pud_val(pud), write);
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9991224f6238..2160c1fee920 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,15 +14,17 @@
#include <linux/bitops.h>
#include <linux/threads.h>
+extern p4d_t level4_kernel_pgt[512];
+extern p4d_t level4_ident_pgt[512];
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512];
-extern pgd_t init_level4_pgt[];
+extern pgd_t init_top_pgt[];
-#define swapper_pg_dir init_level4_pgt
+#define swapper_pg_dir init_top_pgt
extern void paging_init(void);
@@ -227,6 +229,20 @@ extern void cleanup_highmap(void);
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
-#endif /* !__ASSEMBLY__ */
+#define gup_fast_permitted gup_fast_permitted
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
+ int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long)nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (end < start)
+ return false;
+ if (end >> __VIRTUAL_MASK_SHIFT)
+ return false;
+ return true;
+}
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 39fb618e2211..79aa2f98398d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -8,4 +8,40 @@
#else
#define X86_VM_MASK 0 /* No VM86 support */
#endif
+
+/*
+ * CR3's layout varies depending on several things.
+ *
+ * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID.
+ * If PAE is enabled, then CR3[11:5] is part of the PDPT address
+ * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored.
+ * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and
+ * CR3[2:0] and CR3[11:5] are ignored.
+ *
+ * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD.
+ *
+ * CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be
+ * written as 1 to prevent the write to CR3 from flushing the TLB.
+ *
+ * On systems with SME, one bit (in a variable position!) is stolen to indicate
+ * that the top-level paging structure is encrypted.
+ *
+ * All of the remaining bits indicate the physical address of the top-level
+ * paging structure.
+ *
+ * CR3_ADDR_MASK is the mask used by read_cr3_pa().
+ */
+#ifdef CONFIG_X86_64
+/* Mask off the address space ID bits. */
+#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
+#define CR3_PCID_MASK 0xFFFull
+#else
+/*
+ * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
+ * a tiny bit of code size by setting all the bits.
+ */
+#define CR3_ADDR_MASK 0xFFFFFFFFull
+#define CR3_PCID_MASK 0ull
+#endif
+
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada998a402..6a79547e8ee0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -231,6 +231,14 @@ native_cpuid_reg(ebx)
native_cpuid_reg(ecx)
native_cpuid_reg(edx)
+/*
+ * Friendlier CR3 helpers.
+ */
+static inline unsigned long read_cr3_pa(void)
+{
+ return __read_cr3() & CR3_ADDR_MASK;
+}
+
static inline void load_cr3(pgd_t *pgdir)
{
write_cr3(__pa(pgdir));
@@ -860,8 +868,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp);
@@ -901,8 +907,13 @@ static inline int mpx_disable_management(void)
}
#endif /* CONFIG_X86_INTEL_MPX */
+#ifdef CONFIG_CPU_SUP_AMD
extern u16 amd_get_nb_id(int cpu);
extern u32 amd_get_nodes_per_socket(void);
+#else
+static inline u16 amd_get_nb_id(int cpu) { return 0; }
+static inline u32 amd_get_nodes_per_socket(void) { return 0; }
+#endif
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ac1d5da14734..e4585a393965 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -44,7 +44,6 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
-extern void setup_default_timer_irq(void);
#ifdef CONFIG_X86_INTEL_MID
extern void x86_intel_mid_early_setup(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 12af3e35edfa..9efaabf5b54b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -39,7 +39,7 @@ static inline void native_write_cr2(unsigned long val)
asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
}
-static inline unsigned long native_read_cr3(void)
+static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
@@ -159,9 +159,13 @@ static inline void write_cr2(unsigned long x)
native_write_cr2(x);
}
-static inline unsigned long read_cr3(void)
+/*
+ * Careful! CR3 contains more than just an address. You probably want
+ * read_cr3_pa() instead.
+ */
+static inline unsigned long __read_cr3(void)
{
- return native_read_cr3();
+ return __native_read_cr3();
}
static inline void write_cr3(unsigned long x)
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 27e9f9d769b8..2016962103df 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -29,11 +29,9 @@ struct cyc2ns_data {
u32 cyc2ns_mul;
u32 cyc2ns_shift;
u64 cyc2ns_offset;
- u32 __count;
- /* u32 hole */
-}; /* 24 bytes -- do not grow */
+}; /* 16 bytes */
-extern struct cyc2ns_data *cyc2ns_read_begin(void);
-extern void cyc2ns_read_end(struct cyc2ns_data *);
+extern void cyc2ns_read_begin(struct cyc2ns_data *);
+extern void cyc2ns_read_end(void);
#endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..f4a6ff352a0e
--- /dev/null
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -0,0 +1,14 @@
+#ifndef _ARCH_X86_TLBBATCH_H
+#define _ARCH_X86_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+struct arch_tlbflush_unmap_batch {
+ /*
+ * Each bit set is a CPU that potentially has a TLB entry for one of
+ * the PFNs being flushed..
+ */
+ struct cpumask cpumask;
+};
+
+#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6ed9ea469b48..50ea3482e1d1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -7,6 +7,7 @@
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
+#include <asm/smp.h>
static inline void __invpcid(unsigned long pcid, unsigned long addr,
unsigned long type)
@@ -65,10 +66,14 @@ static inline void invpcid_flush_all_nonglobals(void)
#endif
struct tlb_state {
-#ifdef CONFIG_SMP
- struct mm_struct *active_mm;
+ /*
+ * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
+ * are on. This means that it may not match current->active_mm,
+ * which will contain the previous user mm when we're in lazy TLB
+ * mode even if we've already switched back to swapper_pg_dir.
+ */
+ struct mm_struct *loaded_mm;
int state;
-#endif
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
@@ -151,7 +156,7 @@ static inline void __native_flush_tlb(void)
* back:
*/
preempt_disable();
- native_write_cr3(native_read_cr3());
+ native_write_cr3(__native_read_cr3());
preempt_enable();
}
@@ -220,84 +225,16 @@ static inline void __flush_tlb_one(unsigned long addr)
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
+ * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
*/
-
-#ifndef CONFIG_SMP
-
-/* "_up" is for UniProcessor.
- *
- * This is a helper for other header functions. *Not* intended to be called
- * directly. All global TLB flushes need to either call this, or to bump the
- * vm statistics themselves.
- */
-static inline void __flush_tlb_up(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb();
-}
-
-static inline void flush_tlb_all(void)
-{
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- __flush_tlb_all();
-}
-
-static inline void local_flush_tlb(void)
-{
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long addr)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_one(addr);
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- if (vma->vm_mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void flush_tlb_mm_range(struct mm_struct *mm,
- unsigned long start, unsigned long end, unsigned long vmflag)
-{
- if (mm == current->active_mm)
- __flush_tlb_up();
-}
-
-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
-}
-
-static inline void reset_lazy_tlbstate(void)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
-#else /* SMP */
-
-#include <asm/smp.h>
+struct flush_tlb_info {
+ struct mm_struct *mm;
+ unsigned long start;
+ unsigned long end;
+};
#define local_flush_tlb() __flush_tlb()
@@ -307,29 +244,32 @@ static inline void flush_tlb_kernel_range(unsigned long start,
flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
extern void flush_tlb_all(void);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
+{
+ flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+}
+
void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start, unsigned long end);
+ const struct flush_tlb_info *info);
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
-static inline void reset_lazy_tlbstate(void)
+static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm)
{
- this_cpu_write(cpu_tlbstate.state, 0);
- this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
}
-#endif /* SMP */
+extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, start, end) \
- native_flush_tlb_others(mask, mm, start, end)
+#define flush_tlb_others(mask, info) \
+ native_flush_tlb_others(mask, info)
#endif
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 6686820feae9..b5a32231abd8 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_UV_UV_H
#define _ASM_X86_UV_UV_H
+#include <asm/tlbflush.h>
+
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
struct cpumask;
@@ -15,10 +17,7 @@ extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- unsigned int cpu);
+ const struct flush_tlb_info *info);
#else /* X86_UV */
@@ -28,8 +27,8 @@ static inline int is_uv_hubless(void) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
-uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
- unsigned long start, unsigned long end, unsigned int cpu)
+uv_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
{ return cpumask; }
#endif /* X86_UV */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 432df4b1baec..f4fef5a24ebd 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -34,16 +34,10 @@
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
/*
- * There is a single feature flag that signifies the presence of the MSR
- * that can be used to retrieve both the local APIC Timer frequency as
- * well as the TSC frequency.
+ * There is a single feature flag that signifies if the partition has access
+ * to MSRs with local APIC and TSC frequencies.
*/
-
-/* Local APIC timer frequency MSR (HV_X64_MSR_APIC_FREQUENCY) is available */
-#define HV_X64_MSR_APIC_FREQUENCY_AVAILABLE (1 << 11)
-
-/* TSC frequency MSR (HV_X64_MSR_TSC_FREQUENCY) is available */
-#define HV_X64_MSR_TSC_FREQUENCY_AVAILABLE (1 << 11)
+#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
@@ -73,6 +67,9 @@
*/
#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
+/* Frequency MSRs available */
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE (1 << 8)
+
/* Crash MSR available */
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 567de50a4c2a..185f3d10c194 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -104,6 +104,8 @@
#define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT)
#define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */
#define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT)
+#define X86_CR4_LA57_BIT 12 /* enable 5-level page tables */
+#define X86_CR4_LA57 _BITUL(X86_CR4_LA57_BIT)
#define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */
#define X86_CR4_VMXE _BITUL(X86_CR4_VMXE_BIT)
#define X86_CR4_SMXE_BIT 14 /* enable safer mode (TXT) */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4b994232cb57..a01892bdd61a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,6 +18,7 @@ CFLAGS_REMOVE_pvclock.o = -pg
CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
+CFLAGS_REMOVE_head64.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
@@ -29,6 +30,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 26b78d86f25a..85a9e17e0dbc 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
obj-$(CONFIG_ACPI) += boot.o
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
obj-$(CONFIG_ACPI_APEI) += apei.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2d75faf743f2..98b3dd8cf2bf 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -54,6 +54,8 @@
#include <asm/mce.h>
#include <asm/tsc.h>
#include <asm/hypervisor.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
unsigned int num_processors;
@@ -545,6 +547,81 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+#define DEADLINE_MODEL_MATCH_FUNC(model, func) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }
+
+#define DEADLINE_MODEL_MATCH_REV(model, rev) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }
+
+static u32 hsx_deadline_rev(void)
+{
+ switch (boot_cpu_data.x86_mask) {
+ case 0x02: return 0x3a; /* EP */
+ case 0x04: return 0x0f; /* EX */
+ }
+
+ return ~0U;
+}
+
+static u32 bdx_deadline_rev(void)
+{
+ switch (boot_cpu_data.x86_mask) {
+ case 0x02: return 0x00000011;
+ case 0x03: return 0x0700000e;
+ case 0x04: return 0x0f00000c;
+ case 0x05: return 0x0e000003;
+ }
+
+ return ~0U;
+}
+
+static const struct x86_cpu_id deadline_match[] = {
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014),
+
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17),
+
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17),
+
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2),
+
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52),
+
+ {},
+};
+
+static void apic_check_deadline_errata(void)
+{
+ const struct x86_cpu_id *m = x86_match_cpu(deadline_match);
+ u32 rev;
+
+ if (!m)
+ return;
+
+ /*
+ * Function pointers will have the MSB set due to address layout,
+ * immediate revisions will not.
+ */
+ if ((long)m->driver_data < 0)
+ rev = ((u32 (*)(void))(m->driver_data))();
+ else
+ rev = (u32)m->driver_data;
+
+ if (boot_cpu_data.microcode >= rev)
+ return;
+
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+ pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
+ "please update microcode to version: 0x%x (or later)\n", rev);
+}
+
/*
* Setup the local APIC timer for this CPU. Copy the initialized values
* of the boot CPU and register the clock event in the framework.
@@ -563,6 +640,7 @@ static void setup_APIC_timer(void)
levt->cpumask = cpumask_of(smp_processor_id());
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+ levt->name = "lapic-deadline";
levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
@@ -1779,6 +1857,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
+ apic_check_deadline_errata();
+
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
return;
@@ -2201,23 +2281,32 @@ void default_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
+int default_cpu_mask_to_apicid(const struct cpumask *mask,
+ struct irq_data *irqdata,
+ unsigned int *apicid)
{
- unsigned int cpu;
+ unsigned int cpu = cpumask_first(mask);
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+ *apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
+ return 0;
+}
- if (likely(cpu < nr_cpu_ids)) {
- *apicid = per_cpu(x86_cpu_to_apicid, cpu);
- return 0;
- }
+int flat_cpu_mask_to_apicid(const struct cpumask *mask,
+ struct irq_data *irqdata,
+ unsigned int *apicid)
- return -EINVAL;
+{
+ struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+ unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
+
+ if (!cpu_mask)
+ return -EINVAL;
+ *apicid = (unsigned int)cpu_mask;
+ cpumask_bits(effmsk)[0] = cpu_mask;
+ return 0;
}
/*
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index a4d7ff20ed22..dedd5a41ba48 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -172,7 +172,7 @@ static struct apic apic_flat __ro_after_init = {
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.send_IPI = default_send_IPI_single,
.send_IPI_mask = flat_send_IPI_mask,
@@ -268,7 +268,7 @@ static struct apic apic_physflat __ro_after_init = {
.get_apic_id = flat_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.send_IPI = default_send_IPI_single_phys,
.send_IPI_mask = default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 2262eb6df796..6599f437b4ab 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -141,7 +141,7 @@ struct apic apic_noop __ro_after_init = {
.get_apic_id = noop_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.send_IPI = noop_send_IPI,
.send_IPI_mask = noop_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index e08fe2c8dd8c..2fda912219a6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -267,7 +267,7 @@ static const struct apic apic_numachip1 __refconst = {
.get_apic_id = numachip1_get_apic_id,
.set_apic_id = numachip1_set_apic_id,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
@@ -318,7 +318,7 @@ static const struct apic apic_numachip2 __refconst = {
.get_apic_id = numachip2_get_apic_id,
.set_apic_id = numachip2_set_apic_id,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 56012010332c..456e45e8bf84 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -172,7 +172,7 @@ static struct apic apic_bigsmp __ro_after_init = {
.get_apic_id = bigsmp_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.send_IPI = default_send_IPI_single_phys,
.send_IPI_mask = default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index ae50d3454d78..56ccf9346b08 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -150,16 +150,27 @@ static const struct irq_domain_ops htirq_domain_ops = {
.deactivate = htirq_domain_deactivate,
};
-void arch_init_htirq_domain(struct irq_domain *parent)
+void __init arch_init_htirq_domain(struct irq_domain *parent)
{
+ struct fwnode_handle *fn;
+
if (disable_apic)
return;
- htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL);
+ fn = irq_domain_alloc_named_fwnode("PCI-HT");
+ if (!fn)
+ goto warn;
+
+ htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
+ irq_domain_free_fwnode(fn);
if (!htirq_domain)
- pr_warn("failed to initialize irqdomain for HTIRQ.\n");
- else
- htirq_domain->parent = parent;
+ goto warn;
+
+ htirq_domain->parent = parent;
+ return;
+
+warn:
+ pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
}
int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 347bb9f65737..b4f5f73febdb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1200,28 +1200,6 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
static struct irq_chip ioapic_chip, ioapic_ir_chip;
-#ifdef CONFIG_X86_32
-static inline int IO_APIC_irq_trigger(int irq)
-{
- int apic, idx, pin;
-
- for_each_ioapic_pin(apic, pin) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
- return irq_trigger(idx);
- }
- /*
- * nonexistent IRQs are edge default
- */
- return 0;
-}
-#else
-static inline int IO_APIC_irq_trigger(int irq)
-{
- return 1;
-}
-#endif
-
static void __init setup_IO_APIC_irqs(void)
{
unsigned int ioapic, pin;
@@ -2223,6 +2201,8 @@ static int mp_irqdomain_create(int ioapic)
struct ioapic *ip = &ioapics[ioapic];
struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+ struct fwnode_handle *fn;
+ char *name = "IO-APIC";
if (cfg->type == IOAPIC_DOMAIN_INVALID)
return 0;
@@ -2233,9 +2213,25 @@ static int mp_irqdomain_create(int ioapic)
parent = irq_remapping_get_ir_irq_domain(&info);
if (!parent)
parent = x86_vector_domain;
+ else
+ name = "IO-APIC-IR";
+
+ /* Handle device tree enumerated APICs proper */
+ if (cfg->dev) {
+ fn = of_node_to_fwnode(cfg->dev);
+ } else {
+ fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
+ if (!fn)
+ return -ENOMEM;
+ }
+
+ ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
+ (void *)(long)ioapic);
+
+ /* Release fw handle if it was allocated above */
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
- ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
- (void *)(long)ioapic);
if (!ip->irqdomain)
return -ENOMEM;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index c61aec7e65f4..9b18be764422 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -136,13 +136,20 @@ static struct msi_domain_info pci_msi_domain_info = {
.handler_name = "edge",
};
-void arch_init_msi_domain(struct irq_domain *parent)
+void __init arch_init_msi_domain(struct irq_domain *parent)
{
+ struct fwnode_handle *fn;
+
if (disable_apic)
return;
- msi_default_domain = pci_msi_create_irq_domain(NULL,
- &pci_msi_domain_info, parent);
+ fn = irq_domain_alloc_named_fwnode("PCI-MSI");
+ if (fn) {
+ msi_default_domain =
+ pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
+ parent);
+ irq_domain_free_fwnode(fn);
+ }
if (!msi_default_domain)
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
}
@@ -167,9 +174,18 @@ static struct msi_domain_info pci_msi_ir_domain_info = {
.handler_name = "edge",
};
-struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent)
+struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
+ const char *name, int id)
{
- return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent);
+ struct fwnode_handle *fn;
+ struct irq_domain *d;
+
+ fn = irq_domain_alloc_named_id_fwnode(name, id);
+ if (!fn)
+ return NULL;
+ d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
+ irq_domain_free_fwnode(fn);
+ return d;
}
#endif
@@ -221,13 +237,20 @@ static struct irq_domain *dmar_get_irq_domain(void)
{
static struct irq_domain *dmar_domain;
static DEFINE_MUTEX(dmar_lock);
+ struct fwnode_handle *fn;
mutex_lock(&dmar_lock);
- if (dmar_domain == NULL)
- dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info,
+ if (dmar_domain)
+ goto out;
+
+ fn = irq_domain_alloc_named_fwnode("DMAR-MSI");
+ if (fn) {
+ dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
x86_vector_domain);
+ irq_domain_free_fwnode(fn);
+ }
+out:
mutex_unlock(&dmar_lock);
-
return dmar_domain;
}
@@ -317,9 +340,10 @@ static struct msi_domain_info hpet_msi_domain_info = {
struct irq_domain *hpet_create_irq_domain(int hpet_id)
{
- struct irq_domain *parent;
- struct irq_alloc_info info;
struct msi_domain_info *domain_info;
+ struct irq_domain *parent, *d;
+ struct irq_alloc_info info;
+ struct fwnode_handle *fn;
if (x86_vector_domain == NULL)
return NULL;
@@ -340,7 +364,16 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
else
hpet_msi_controller.name = "IR-HPET-MSI";
- return msi_create_irq_domain(NULL, domain_info, parent);
+ fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
+ hpet_id);
+ if (!fn) {
+ kfree(domain_info);
+ return NULL;
+ }
+
+ d = msi_create_irq_domain(fn, domain_info, parent);
+ irq_domain_free_fwnode(fn);
+ return d;
}
int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 2e8f7f048f4f..63287659adb6 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -102,7 +102,7 @@ static struct apic apic_default __ro_after_init = {
.get_apic_id = default_get_apic_id,
.set_apic_id = NULL,
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.send_IPI = default_send_IPI_single,
.send_IPI_mask = default_send_IPI_mask_logical,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f3557a1eb562..b3af457ed667 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -103,7 +103,8 @@ static void free_apic_chip_data(struct apic_chip_data *data)
}
static int __assign_irq_vector(int irq, struct apic_chip_data *d,
- const struct cpumask *mask)
+ const struct cpumask *mask,
+ struct irq_data *irqdata)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -141,7 +142,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
/*
* Clear the offline cpus from @vector_cpumask for searching
* and verify whether the result overlaps with @mask. If true,
- * then the call to apic->cpu_mask_to_apicid_and() will
+ * then the call to apic->cpu_mask_to_apicid() will
* succeed as well. If not, no point in trying to find a
* vector in this mask.
*/
@@ -221,34 +222,40 @@ success:
* Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
* as we already established, that mask & d->domain & cpu_online_mask
* is not empty.
+ *
+ * vector_searchmask is a subset of d->domain and has the offline
+ * cpus masked out.
*/
- BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
- &d->cfg.dest_apicid));
+ cpumask_and(vector_searchmask, vector_searchmask, mask);
+ BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
+ &d->cfg.dest_apicid));
return 0;
}
static int assign_irq_vector(int irq, struct apic_chip_data *data,
- const struct cpumask *mask)
+ const struct cpumask *mask,
+ struct irq_data *irqdata)
{
int err;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, data, mask);
+ err = __assign_irq_vector(irq, data, mask, irqdata);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
static int assign_irq_vector_policy(int irq, int node,
struct apic_chip_data *data,
- struct irq_alloc_info *info)
+ struct irq_alloc_info *info,
+ struct irq_data *irqdata)
{
if (info && info->mask)
- return assign_irq_vector(irq, data, info->mask);
+ return assign_irq_vector(irq, data, info->mask, irqdata);
if (node != NUMA_NO_NODE &&
- assign_irq_vector(irq, data, cpumask_of_node(node)) == 0)
+ assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
return 0;
- return assign_irq_vector(irq, data, apic->target_cpus());
+ return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
}
static void clear_irq_vector(int irq, struct apic_chip_data *data)
@@ -360,9 +367,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
irq_data->chip = &lapic_controller;
irq_data->chip_data = data;
irq_data->hwirq = virq + i;
- err = assign_irq_vector_policy(virq + i, node, data, info);
+ err = assign_irq_vector_policy(virq + i, node, data, info,
+ irq_data);
if (err)
goto error;
+ /*
+ * If the apic destination mode is physical, then the
+ * effective affinity is restricted to a single target
+ * CPU. Mark the interrupt accordingly.
+ */
+ if (!apic->irq_dest_mode)
+ irqd_set_single_target(irq_data);
}
return 0;
@@ -405,7 +420,7 @@ int __init arch_probe_nr_irqs(void)
}
#ifdef CONFIG_X86_IO_APIC
-static void init_legacy_irqs(void)
+static void __init init_legacy_irqs(void)
{
int i, node = cpu_to_node(0);
struct apic_chip_data *data;
@@ -424,16 +439,21 @@ static void init_legacy_irqs(void)
}
}
#else
-static void init_legacy_irqs(void) { }
+static inline void init_legacy_irqs(void) { }
#endif
int __init arch_early_irq_init(void)
{
+ struct fwnode_handle *fn;
+
init_legacy_irqs();
- x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops,
- NULL);
+ fn = irq_domain_alloc_named_fwnode("VECTOR");
+ BUG_ON(!fn);
+ x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
+ NULL);
BUG_ON(x86_vector_domain == NULL);
+ irq_domain_free_fwnode(fn);
irq_set_default_host(x86_vector_domain);
arch_init_msi_domain(x86_vector_domain);
@@ -529,11 +549,12 @@ static int apic_set_affinity(struct irq_data *irq_data,
if (!cpumask_intersects(dest, cpu_online_mask))
return -EINVAL;
- err = assign_irq_vector(irq, data, dest);
+ err = assign_irq_vector(irq, data, dest, irq_data);
return err ? err : IRQ_SET_MASK_OK;
}
static struct irq_chip lapic_controller = {
+ .name = "APIC",
.irq_ack = apic_ack_edge,
.irq_set_affinity = apic_set_affinity,
.irq_retrigger = apic_retrigger_irq,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 5a35f208ed95..481237cb1544 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -4,6 +4,7 @@
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/dmar.h>
+#include <linux/irq.h>
#include <linux/cpu.h>
#include <asm/smp.h>
@@ -104,35 +105,30 @@ static void x2apic_send_IPI_all(int vector)
}
static int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
+x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
+ unsigned int *apicid)
{
+ struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+ unsigned int cpu;
u32 dest = 0;
u16 cluster;
- int i;
-
- for_each_cpu_and(i, cpumask, andmask) {
- if (!cpumask_test_cpu(i, cpu_online_mask))
- continue;
- dest = per_cpu(x86_cpu_to_logical_apicid, i);
- cluster = x2apic_cluster(i);
- break;
- }
- if (!dest)
+ cpu = cpumask_first(mask);
+ if (cpu >= nr_cpu_ids)
return -EINVAL;
- for_each_cpu_and(i, cpumask, andmask) {
- if (!cpumask_test_cpu(i, cpu_online_mask))
- continue;
- if (cluster != x2apic_cluster(i))
+ dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+ cluster = x2apic_cluster(cpu);
+
+ cpumask_clear(effmsk);
+ for_each_cpu(cpu, mask) {
+ if (cluster != x2apic_cluster(cpu))
continue;
- dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+ dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
+ cpumask_set_cpu(cpu, effmsk);
}
*apicid = dest;
-
return 0;
}
@@ -256,7 +252,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
- .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index ff111f05a314..3baf0c3dc875 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -127,7 +127,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index b487b3a01615..0d57bb9079c9 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -526,27 +526,15 @@ static void uv_init_apic_ldr(void)
}
static int
-uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
+uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
+ unsigned int *apicid)
{
- int unsigned cpu;
+ int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
-
- if (likely(cpu < nr_cpu_ids)) {
- *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
- return 0;
- }
+ if (!ret)
+ *apicid |= uv_apicid_hibits;
- return -EINVAL;
+ return ret;
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -614,7 +602,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = set_apic_id,
- .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.send_IPI = uv_send_IPI_one,
.send_IPI_mask = uv_send_IPI_mask,
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f5af0cc7eb0d..9257bd9dc664 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
dentry = kernfs_mount(fs_type, flags, rdt_root,
RDTGROUP_SUPER_MAGIC, NULL);
if (IS_ERR(dentry))
- goto out_cdp;
+ goto out_destroy;
static_branch_enable(&rdt_enable_key);
goto out;
+out_destroy:
+ kernfs_remove(kn_info);
out_cdp:
cdp_disable();
out:
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 9c632cb88546..10cec43aac38 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -17,6 +17,8 @@
#include "mce-internal.h"
+static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
+
static DEFINE_MUTEX(mce_chrdev_read_mutex);
static char mce_helper[128];
@@ -345,24 +347,49 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
}
}
-static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
- size_t usize, loff_t *off);
+void mce_register_injector_chain(struct notifier_block *nb)
+{
+ blocking_notifier_chain_register(&mce_injector_chain, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_injector_chain);
-void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
- const char __user *ubuf,
- size_t usize, loff_t *off))
+void mce_unregister_injector_chain(struct notifier_block *nb)
{
- mce_write = fn;
+ blocking_notifier_chain_unregister(&mce_injector_chain, nb);
}
-EXPORT_SYMBOL_GPL(register_mce_write_callback);
+EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
size_t usize, loff_t *off)
{
- if (mce_write)
- return mce_write(filp, ubuf, usize, off);
- else
+ struct mce m;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /*
+ * There are some cases where real MSR reads could slip
+ * through.
+ */
+ if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
+ return -EIO;
+
+ if ((unsigned long)usize > sizeof(struct mce))
+ usize = sizeof(struct mce);
+ if (copy_from_user(&m, ubuf, usize))
+ return -EFAULT;
+
+ if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
return -EINVAL;
+
+ /*
+ * Need to give user space some time to set everything up,
+ * so do it a jiffie or two later everywhere.
+ */
+ schedule_timeout(2);
+
+ blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
+
+ return usize;
}
static const struct file_operations mce_chrdev_ops = {
@@ -388,9 +415,15 @@ static __init int dev_mcelog_init_device(void)
/* register character device /dev/mcelog */
err = misc_register(&mce_chrdev_device);
if (err) {
- pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+ if (err == -EBUSY)
+ /* Xen dom0 might have registered the device already. */
+ pr_info("Unable to init device /dev/mcelog, already registered");
+ else
+ pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+
return err;
}
+
mce_register_decode_chain(&dev_mcelog_nb);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 99165b206df3..231ad23b24a9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -10,23 +10,105 @@
* Authors:
* Andi Kleen
* Ying Huang
+ *
+ * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
+ * for testing different aspects of the RAS code. This driver should be
+ * built as module so that it can be loaded on production kernels for
+ * testing purposes.
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
+ * Advanced Micro Devices Inc.
*/
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <linux/timer.h>
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/preempt.h>
-#include <linux/smp.h>
+#include <linux/module.h>
#include <linux/notifier.h>
-#include <linux/kdebug.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include <asm/mce.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include <asm/amd_nb.h>
#include <asm/apic.h>
+#include <asm/irq_vectors.h>
+#include <asm/mce.h>
#include <asm/nmi.h>
+#include <asm/smp.h>
+
+#include "mce-internal.h"
+
+/*
+ * Collect all the MCi_XXX settings
+ */
+static struct mce i_mce;
+static struct dentry *dfs_inj;
+
+static u8 n_banks;
+
+#define MAX_FLAG_OPT_SIZE 3
+#define NBCFG 0x44
+
+enum injection_type {
+ SW_INJ = 0, /* SW injection, simply decode the error */
+ HW_INJ, /* Trigger a #MC */
+ DFR_INT_INJ, /* Trigger Deferred error interrupt */
+ THR_INT_INJ, /* Trigger threshold interrupt */
+ N_INJ_TYPES,
+};
+
+static const char * const flags_options[] = {
+ [SW_INJ] = "sw",
+ [HW_INJ] = "hw",
+ [DFR_INT_INJ] = "df",
+ [THR_INT_INJ] = "th",
+ NULL
+};
+
+/* Set default injection to SW_INJ */
+static enum injection_type inj_type = SW_INJ;
+
+#define MCE_INJECT_SET(reg) \
+static int inj_##reg##_set(void *data, u64 val) \
+{ \
+ struct mce *m = (struct mce *)data; \
+ \
+ m->reg = val; \
+ return 0; \
+}
+
+MCE_INJECT_SET(status);
+MCE_INJECT_SET(misc);
+MCE_INJECT_SET(addr);
+MCE_INJECT_SET(synd);
+
+#define MCE_INJECT_GET(reg) \
+static int inj_##reg##_get(void *data, u64 *val) \
+{ \
+ struct mce *m = (struct mce *)data; \
+ \
+ *val = m->reg; \
+ return 0; \
+}
+
+MCE_INJECT_GET(status);
+MCE_INJECT_GET(misc);
+MCE_INJECT_GET(addr);
+MCE_INJECT_GET(synd);
+
+DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
+
+static void setup_inj_struct(struct mce *m)
+{
+ memset(m, 0, sizeof(struct mce));
+
+ m->cpuvendor = boot_cpu_data.x86_vendor;
+}
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -143,7 +225,7 @@ static int raise_local(void)
return ret;
}
-static void raise_mce(struct mce *m)
+static void __maybe_unused raise_mce(struct mce *m)
{
int context = MCJ_CTX(m->inject_flags);
@@ -198,55 +280,454 @@ static void raise_mce(struct mce *m)
}
}
-/* Error injection interface */
-static ssize_t mce_write(struct file *filp, const char __user *ubuf,
- size_t usize, loff_t *off)
+static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
+ void *data)
{
- struct mce m;
+ struct mce *m = (struct mce *)data;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- /*
- * There are some cases where real MSR reads could slip
- * through.
- */
- if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
- return -EIO;
+ if (!m)
+ return NOTIFY_DONE;
+
+ mutex_lock(&mce_inject_mutex);
+ raise_mce(m);
+ mutex_unlock(&mce_inject_mutex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block inject_nb = {
+ .notifier_call = mce_inject_raise,
+};
+
+/*
+ * Caller needs to be make sure this cpu doesn't disappear
+ * from under us, i.e.: get_cpu/put_cpu.
+ */
+static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
+{
+ u32 l, h;
+ int err;
+
+ err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
+ if (err) {
+ pr_err("%s: error reading HWCR\n", __func__);
+ return err;
+ }
+
+ enable ? (l |= BIT(18)) : (l &= ~BIT(18));
+
+ err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
+ if (err)
+ pr_err("%s: error writing HWCR\n", __func__);
- if ((unsigned long)usize > sizeof(struct mce))
- usize = sizeof(struct mce);
- if (copy_from_user(&m, ubuf, usize))
+ return err;
+}
+
+static int __set_inj(const char *buf)
+{
+ int i;
+
+ for (i = 0; i < N_INJ_TYPES; i++) {
+ if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ inj_type = i;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+static ssize_t flags_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[MAX_FLAG_OPT_SIZE];
+ int n;
+
+ n = sprintf(buf, "%s\n", flags_options[inj_type]);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
+}
+
+static ssize_t flags_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[MAX_FLAG_OPT_SIZE], *__buf;
+ int err;
+
+ if (cnt > MAX_FLAG_OPT_SIZE)
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
- if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
+ buf[cnt - 1] = 0;
+
+ /* strip whitespace */
+ __buf = strstrip(buf);
+
+ err = __set_inj(__buf);
+ if (err) {
+ pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
+ return err;
+ }
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+static const struct file_operations flags_fops = {
+ .read = flags_read,
+ .write = flags_write,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * On which CPU to inject?
+ */
+MCE_INJECT_GET(extcpu);
+
+static int inj_extcpu_set(void *data, u64 val)
+{
+ struct mce *m = (struct mce *)data;
+
+ if (val >= nr_cpu_ids || !cpu_online(val)) {
+ pr_err("%s: Invalid CPU: %llu\n", __func__, val);
return -EINVAL;
+ }
+ m->extcpu = val;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
+
+static void trigger_mce(void *info)
+{
+ asm volatile("int $18");
+}
+
+static void trigger_dfr_int(void *info)
+{
+ asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
+}
+
+static void trigger_thr_int(void *info)
+{
+ asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
+}
+
+static u32 get_nbc_for_node(int node_id)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u32 cores_per_node;
+
+ cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
+
+ return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+ struct amd_northbridge *nb;
+ struct pci_dev *F3;
+ u32 val;
+ int err;
+
+ nb = node_to_amd_nb(nid);
+ if (!nb)
+ return;
+
+ F3 = nb->misc;
+ if (!F3)
+ return;
+
+ err = pci_read_config_dword(F3, NBCFG, &val);
+ if (err) {
+ pr_err("%s: Error reading F%dx%03x.\n",
+ __func__, PCI_FUNC(F3->devfn), NBCFG);
+ return;
+ }
+
+ if (val & BIT(27))
+ return;
+
+ pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
+ __func__);
+
+ val |= BIT(27);
+ err = pci_write_config_dword(F3, NBCFG, val);
+ if (err)
+ pr_err("%s: Error writing F%dx%03x.\n",
+ __func__, PCI_FUNC(F3->devfn), NBCFG);
+}
+
+static void prepare_msrs(void *info)
+{
+ struct mce m = *(struct mce *)info;
+ u8 b = m.bank;
+
+ wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (m.inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
+ } else {
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
+ }
+
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+ } else {
+ wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+ }
+}
+
+static void do_inject(void)
+{
+ u64 mcg_status = 0;
+ unsigned int cpu = i_mce.extcpu;
+ u8 b = i_mce.bank;
+
+ rdtscll(i_mce.tsc);
+
+ if (i_mce.misc)
+ i_mce.status |= MCI_STATUS_MISCV;
+
+ if (i_mce.synd)
+ i_mce.status |= MCI_STATUS_SYNDV;
+
+ if (inj_type == SW_INJ) {
+ mce_inject_log(&i_mce);
+ return;
+ }
+
+ /* prep MCE global settings for the injection */
+ mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
+
+ if (!(i_mce.status & MCI_STATUS_PCC))
+ mcg_status |= MCG_STATUS_RIPV;
/*
- * Need to give user space some time to set everything up,
- * so do it a jiffie or two later everywhere.
+ * Ensure necessary status bits for deferred errors:
+ * - MCx_STATUS[Deferred]: make sure it is a deferred error
+ * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
*/
- schedule_timeout(2);
+ if (inj_type == DFR_INT_INJ) {
+ i_mce.status |= MCI_STATUS_DEFERRED;
+ i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
+ }
- mutex_lock(&mce_inject_mutex);
- raise_mce(&m);
- mutex_unlock(&mce_inject_mutex);
- return usize;
+ /*
+ * For multi node CPUs, logging and reporting of bank 4 errors happens
+ * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
+ * Fam10h and later BKDGs.
+ */
+ if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+ b == 4 &&
+ boot_cpu_data.x86 < 0x17) {
+ toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+ cpu = get_nbc_for_node(amd_get_nb_id(cpu));
+ }
+
+ get_online_cpus();
+ if (!cpu_online(cpu))
+ goto err;
+
+ toggle_hw_mce_inject(cpu, true);
+
+ i_mce.mcgstatus = mcg_status;
+ i_mce.inject_flags = inj_type;
+ smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
+
+ toggle_hw_mce_inject(cpu, false);
+
+ switch (inj_type) {
+ case DFR_INT_INJ:
+ smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
+ break;
+ case THR_INT_INJ:
+ smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
+ break;
+ default:
+ smp_call_function_single(cpu, trigger_mce, NULL, 0);
+ }
+
+err:
+ put_online_cpus();
+
+}
+
+/*
+ * This denotes into which bank we're injecting and triggers
+ * the injection, at the same time.
+ */
+static int inj_bank_set(void *data, u64 val)
+{
+ struct mce *m = (struct mce *)data;
+
+ if (val >= n_banks) {
+ pr_err("Non-existent MCE bank: %llu\n", val);
+ return -EINVAL;
+ }
+
+ m->bank = val;
+ do_inject();
+
+ return 0;
+}
+
+MCE_INJECT_GET(bank);
+
+DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
+
+static const char readme_msg[] =
+"Description of the files and their usages:\n"
+"\n"
+"Note1: i refers to the bank number below.\n"
+"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
+"as they mirror the hardware registers.\n"
+"\n"
+"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
+"\t attributes of the error which caused the MCE.\n"
+"\n"
+"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
+"\t used for error thresholding purposes and its validity is indicated by\n"
+"\t MCi_STATUS[MiscV].\n"
+"\n"
+"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
+"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
+"\n"
+"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
+"\t associated with the error.\n"
+"\n"
+"cpu:\t The CPU to inject the error on.\n"
+"\n"
+"bank:\t Specify the bank you want to inject the error into: the number of\n"
+"\t banks in a processor varies and is family/model-specific, therefore, the\n"
+"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
+"\t injection.\n"
+"\n"
+"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
+"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
+"\t for AMD processors.\n"
+"\n"
+"\t Allowed error injection types:\n"
+"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
+"\t format only. Safe to use.\n"
+"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
+"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
+"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
+"\t before injecting.\n"
+"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
+"\t error APIC interrupt handler to handle the error if the feature is \n"
+"\t is present in hardware. \n"
+"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
+"\t APIC interrupt handler to handle the error. \n"
+"\n";
+
+static ssize_t
+inj_readme_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return simple_read_from_buffer(ubuf, cnt, ppos,
+ readme_msg, strlen(readme_msg));
+}
+
+static const struct file_operations readme_fops = {
+ .read = inj_readme_read,
+};
+
+static struct dfs_node {
+ char *name;
+ struct dentry *d;
+ const struct file_operations *fops;
+ umode_t perm;
+} dfs_fls[] = {
+ { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
+};
+
+static int __init debugfs_init(void)
+{
+ unsigned int i;
+ u64 cap;
+
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+ n_banks = cap & MCG_BANKCNT_MASK;
+
+ dfs_inj = debugfs_create_dir("mce-inject", NULL);
+ if (!dfs_inj)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
+ dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
+ dfs_fls[i].perm,
+ dfs_inj,
+ &i_mce,
+ dfs_fls[i].fops);
+
+ if (!dfs_fls[i].d)
+ goto err_dfs_add;
+ }
+
+ return 0;
+
+err_dfs_add:
+ while (i-- > 0)
+ debugfs_remove(dfs_fls[i].d);
+
+ debugfs_remove(dfs_inj);
+ dfs_inj = NULL;
+
+ return -ENODEV;
}
-static int inject_init(void)
+static int __init inject_init(void)
{
+ int err;
+
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
+
+ err = debugfs_init();
+ if (err) {
+ free_cpumask_var(mce_inject_cpumask);
+ return err;
+ }
+
+ register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
+ mce_register_injector_chain(&inject_nb);
+
+ setup_inj_struct(&i_mce);
+
pr_info("Machine check injector initialized\n");
- register_mce_write_callback(mce_write);
- register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
- "mce_notify");
+
return 0;
}
+static void __exit inject_exit(void)
+{
+
+ mce_unregister_injector_chain(&inject_nb);
+ unregister_nmi_handler(NMI_LOCAL, "mce_notify");
+
+ debugfs_remove_recursive(dfs_inj);
+ dfs_inj = NULL;
+
+ memset(&dfs_fls, 0, sizeof(dfs_fls));
+
+ free_cpumask_var(mce_inject_cpumask);
+}
+
module_init(inject_init);
-/*
- * Cannot tolerate unloading currently because we cannot
- * guarantee all openers of mce_chrdev will get a reference to us.
- */
+module_exit(inject_exit);
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 654ad0668d72..098530a93bb7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -100,7 +100,11 @@ static inline bool mce_cmp(struct mce *m1, struct mce *m2)
extern struct device_attribute dev_attr_trigger;
#ifdef CONFIG_X86_MCELOG_LEGACY
-extern void mce_work_trigger(void);
+void mce_work_trigger(void);
+void mce_register_injector_chain(struct notifier_block *nb);
+void mce_unregister_injector_chain(struct notifier_block *nb);
#else
static inline void mce_work_trigger(void) { }
+static inline void mce_register_injector_chain(struct notifier_block *nb) { }
+static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5cfbaeb6529a..6dde0497efc7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -673,7 +673,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
bool error_seen = false;
struct mce m;
- int severity;
int i;
this_cpu_inc(mce_poll_count);
@@ -710,11 +709,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_read_aux(&m, i);
- severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
-
- if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
- if (m.status & MCI_STATUS_ADDRV)
- m.severity = severity;
+ m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
@@ -1550,7 +1545,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 < 17 && cfg->bootlog < 0) {
+ if (c->x86 < 0x11 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@@ -1832,7 +1827,8 @@ void mce_disable_bank(int bank)
* mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
* monarchtimeout is how long to wait for other CPUs on machine
* check, or 0 to not wait
- * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+ * mce=bootlog Log MCEs from before booting. Disabled by default on AMD Fam10h
+ and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
* mce=recovery force enable memcpy_mcsafe()
@@ -1912,12 +1908,13 @@ static void mce_disable_error_reporting(void)
static void vendor_disable_error_reporting(void)
{
/*
- * Don't clear on Intel CPUs. Some of these MSRs are socket-wide.
+ * Don't clear on Intel or AMD CPUs. Some of these MSRs are socket-wide.
* Disabling them for just a single offlined CPU is bad, since it will
* inhibit reporting for all shared resources on the socket like the
* last level cache (LLC), the integrated memory controller (iMC), etc.
*/
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
return;
mce_disable_error_reporting();
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6e4a047e4b68..9e314bcf67cc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -164,17 +164,48 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
-static void get_smca_bank_info(unsigned int bank)
+static void smca_configure(unsigned int bank, unsigned int cpu)
{
- unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+ unsigned int i, hwid_mcatype;
struct smca_hwid *s_hwid;
- u32 high, instance_id;
+ u32 high, low;
+ u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+ /* Set appropriate bits in MCA_CONFIG */
+ if (!rdmsr_safe(smca_config, &low, &high)) {
+ /*
+ * OS is required to set the MCAX bit to acknowledge that it is
+ * now using the new MSR ranges and new registers under each
+ * bank. It also means that the OS will configure deferred
+ * errors in the new MCx_CONFIG register. If the bit is not set,
+ * uncorrectable errors will cause a system panic.
+ *
+ * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
+ */
+ high |= BIT(0);
+
+ /*
+ * SMCA sets the Deferred Error Interrupt type per bank.
+ *
+ * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
+ * if the DeferredIntType bit field is available.
+ *
+ * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
+ * high portion of the MSR). OS should set this to 0x1 to enable
+ * APIC based interrupt. First, check that no interrupt has been
+ * set.
+ */
+ if ((low & BIT(5)) && !((high >> 5) & 0x3))
+ high |= BIT(5);
+
+ wrmsr(smca_config, low, high);
+ }
/* Collect bank_info using CPU 0 for now. */
if (cpu)
return;
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) {
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
}
@@ -191,7 +222,7 @@ static void get_smca_bank_info(unsigned int bank)
smca_get_name(s_hwid->bank_type));
smca_banks[bank].hwid = s_hwid;
- smca_banks[bank].id = instance_id;
+ smca_banks[bank].id = low;
smca_banks[bank].sysfs_id = s_hwid->count++;
break;
}
@@ -433,7 +464,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
- u32 smca_low, smca_high, smca_addr;
+ u32 smca_low, smca_high;
struct threshold_block b;
int new;
@@ -457,51 +488,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
goto set_offset;
}
- smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
-
- if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
- /*
- * OS is required to set the MCAX bit to acknowledge that it is
- * now using the new MSR ranges and new registers under each
- * bank. It also means that the OS will configure deferred
- * errors in the new MCx_CONFIG register. If the bit is not set,
- * uncorrectable errors will cause a system panic.
- *
- * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
- */
- smca_high |= BIT(0);
-
- /*
- * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
- * registers with the option of additionally logging to
- * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
- *
- * This bit is usually set by BIOS to retain the old behavior
- * for OSes that don't use the new registers. Linux supports the
- * new registers so let's disable that additional logging here.
- *
- * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
- * portion of the MSR).
- */
- smca_high &= ~BIT(2);
-
- /*
- * SMCA sets the Deferred Error Interrupt type per bank.
- *
- * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
- * if the DeferredIntType bit field is available.
- *
- * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
- * high portion of the MSR). OS should set this to 0x1 to enable
- * APIC based interrupt. First, check that no interrupt has been
- * set.
- */
- if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
- smca_high |= BIT(5);
-
- wrmsr(smca_addr, smca_low, smca_high);
- }
-
/* Gather LVT offset for thresholding: */
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
goto out;
@@ -530,7 +516,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
- get_smca_bank_info(bank);
+ smca_configure(bank, cpu);
for (block = 0; block < NR_BLOCKS; ++block) {
address = get_block_address(cpu, address, low, high, bank, block);
@@ -755,37 +741,19 @@ out_err:
}
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
-static void
-__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
+static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
- u32 msr_status = msr_ops.status(bank);
- u32 msr_addr = msr_ops.addr(bank);
struct mce m;
- u64 status;
-
- WARN_ON_ONCE(deferred_err && threshold_err);
-
- if (deferred_err && mce_flags.smca) {
- msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
- msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
- }
-
- rdmsrl(msr_status, status);
-
- if (!(status & MCI_STATUS_VAL))
- return;
mce_setup(&m);
m.status = status;
+ m.misc = misc;
m.bank = bank;
m.tsc = rdtsc();
- if (threshold_err)
- m.misc = misc;
-
if (m.status & MCI_STATUS_ADDRV) {
- rdmsrl(msr_addr, m.addr);
+ m.addr = addr;
/*
* Extract [55:<lsb>] where lsb is the least significant
@@ -806,8 +774,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
}
mce_log(&m);
-
- wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@@ -832,87 +798,126 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
exiting_ack_irq();
}
-/* APIC interrupt handler for deferred errors */
-static void amd_deferred_error_interrupt(void)
+/*
+ * Returns true if the logged error is deferred. False, otherwise.
+ */
+static inline bool
+_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
{
- unsigned int bank;
- u32 msr_status;
- u64 status;
+ u64 status, addr = 0;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
- msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
- : msr_ops.status(bank);
+ rdmsrl(msr_stat, status);
+ if (!(status & MCI_STATUS_VAL))
+ return false;
- rdmsrl(msr_status, status);
+ if (status & MCI_STATUS_ADDRV)
+ rdmsrl(msr_addr, addr);
- if (!(status & MCI_STATUS_VAL) ||
- !(status & MCI_STATUS_DEFERRED))
- continue;
+ __log_error(bank, status, addr, misc);
- __log_error(bank, true, false, 0);
- break;
- }
+ wrmsrl(msr_stat, 0);
+
+ return status & MCI_STATUS_DEFERRED;
}
/*
- * APIC Interrupt Handler
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
*/
+static void log_error_deferred(unsigned int bank)
+{
+ bool defrd;
-/*
- * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
- * the interrupt goes off when error_count reaches threshold_limit.
- * the handler will simply log mcelog w/ software defined bank number.
- */
+ defrd = _log_error_bank(bank, msr_ops.status(bank),
+ msr_ops.addr(bank), 0);
-static void amd_threshold_interrupt(void)
+ if (!mce_flags.smca)
+ return;
+
+ /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
+ if (defrd) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
+ return;
+ }
+
+ /*
+ * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
+ * for a valid error.
+ */
+ _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
+ MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
+}
+
+/* APIC interrupt handler for deferred errors */
+static void amd_deferred_error_interrupt(void)
{
- u32 low = 0, high = 0, address = 0;
- unsigned int bank, block, cpu = smp_processor_id();
- struct thresh_restart tr;
+ unsigned int bank;
- /* assume first bank caused it */
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
- if (!(per_cpu(bank_map, cpu) & (1 << bank)))
- continue;
- for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(cpu, address, low, high, bank, block);
- if (!address)
- break;
+ for (bank = 0; bank < mca_cfg.banks; ++bank)
+ log_error_deferred(bank);
+}
- if (rdmsr_safe(address, &low, &high))
- break;
+static void log_error_thresholding(unsigned int bank, u64 misc)
+{
+ _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
+}
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
+static void log_and_reset_block(struct threshold_block *block)
+{
+ struct thresh_restart tr;
+ u32 low = 0, high = 0;
- if (!(high & MASK_CNTP_HI) ||
- (high & MASK_LOCKED_HI))
- continue;
+ if (!block)
+ return;
- /*
- * Log the machine check that caused the threshold
- * event.
- */
- if (high & MASK_OVERFLOW_HI)
- goto log;
- }
- }
- return;
+ if (rdmsr_safe(block->address, &low, &high))
+ return;
+
+ if (!(high & MASK_OVERFLOW_HI))
+ return;
-log:
- __log_error(bank, false, true, ((u64)high << 32) | low);
+ /* Log the MCE which caused the threshold event. */
+ log_error_thresholding(block->bank, ((u64)high << 32) | low);
/* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr));
- tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
+ tr.b = block;
threshold_restart_bank(&tr);
}
/*
+ * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
+ * goes off when error_count reaches threshold_limit.
+ */
+static void amd_threshold_interrupt(void)
+{
+ struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
+ unsigned int bank, cpu = smp_processor_id();
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ continue;
+
+ first_block = per_cpu(threshold_banks, cpu)[bank]->blocks;
+ if (!first_block)
+ continue;
+
+ /*
+ * The first block is also the head of the list. Check it first
+ * before iterating over the rest.
+ */
+ log_and_reset_block(first_block);
+ list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+ log_and_reset_block(block);
+ }
+}
+
+/*
* Sysfs Interface
*/
@@ -1202,7 +1207,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
per_cpu(threshold_banks, cpu)[bank] = b;
- atomic_inc(&b->cpus);
+ refcount_inc(&b->cpus);
err = __threshold_add_blocks(b);
@@ -1225,7 +1230,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
per_cpu(threshold_banks, cpu)[bank] = b;
if (is_shared_bank(bank)) {
- atomic_set(&b->cpus, 1);
+ refcount_set(&b->cpus, 1);
/* nb is already initialized, see above */
if (nb) {
@@ -1289,7 +1294,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
goto free_out;
if (is_shared_bank(bank)) {
- if (!atomic_dec_and_test(&b->cpus)) {
+ if (!refcount_dec_and_test(&b->cpus)) {
__threshold_remove_blocks(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index e9f4d762aa5b..21b185793c80 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -251,7 +251,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
#endif
}
-void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
+static void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
{
struct ucode_cpu_info *uci;
struct cpio_data cp;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index e53d3c909840..9cb98ee103db 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -290,6 +290,17 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
return (struct cpio_data){ NULL, 0, "" };
if (initrd_start)
start = initrd_start;
+ } else {
+ /*
+ * The picture with physical addresses is a bit different: we
+ * need to get the *physical* address to which the ramdisk was
+ * relocated, i.e., relocated_ramdisk (not initrd_start) and
+ * since we're running from physical addresses, we need to access
+ * relocated_ramdisk through its *physical* address too.
+ */
+ u64 *rr = (u64 *)__pa_nodebug(&relocated_ramdisk);
+ if (*rr)
+ start = *rr;
}
return find_cpio_data(path, (void *)start, size, NULL);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index f522415bf9e5..59edbe9d4ccb 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -42,7 +42,7 @@
static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
/* Current microcode patch used in early patching on the APs. */
-struct microcode_intel *intel_ucode_patch;
+static struct microcode_intel *intel_ucode_patch;
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
unsigned int s2, unsigned int p2)
@@ -166,7 +166,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size)
static void save_microcode_patch(void *data, unsigned int size)
{
struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
- struct ucode_patch *iter, *tmp, *p;
+ struct ucode_patch *iter, *tmp, *p = NULL;
bool prev_found = false;
unsigned int sig, pf;
@@ -202,6 +202,18 @@ static void save_microcode_patch(void *data, unsigned int size)
else
list_add_tail(&p->plist, &microcode_cache);
}
+
+ /*
+ * Save for early loading. On 32-bit, that needs to be a physical
+ * address as the APs are running from physical addresses, before
+ * paging has been enabled.
+ */
+ if (p) {
+ if (IS_ENABLED(CONFIG_X86_32))
+ intel_ucode_patch = (struct microcode_intel *)__pa_nodebug(p->data);
+ else
+ intel_ucode_patch = p->data;
+ }
}
static int microcode_sanity_check(void *mc, int print_err)
@@ -607,6 +619,14 @@ int __init save_microcode_in_initrd_intel(void)
struct ucode_cpu_info uci;
struct cpio_data cp;
+ /*
+ * initrd is going away, clear patch ptr. We will scan the microcode one
+ * last time before jettisoning and save a patch, if found. Then we will
+ * update that pointer too, with a stable patch address to use when
+ * resuming the cores.
+ */
+ intel_ucode_patch = NULL;
+
if (!load_builtin_intel_microcode(&cp))
cp = find_microcode_in_initrd(ucode_path, false);
@@ -619,9 +639,6 @@ int __init save_microcode_in_initrd_intel(void)
show_saved_mc();
- /* initrd is going away, clear patch ptr. */
- intel_ucode_patch = NULL;
-
return 0;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 04cb8d34ccb8..70e717fccdd6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -161,6 +161,15 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
}
#endif
+static unsigned long hv_get_tsc_khz(void)
+{
+ unsigned long freq;
+
+ rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+
+ return freq / 1000;
+}
+
static void __init ms_hyperv_init_platform(void)
{
int hv_host_info_eax;
@@ -193,8 +202,15 @@ static void __init ms_hyperv_init_platform(void)
hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
}
+ if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
+ x86_platform.calibrate_tsc = hv_get_tsc_khz;
+ x86_platform.calibrate_cpu = hv_get_tsc_khz;
+ }
+
#ifdef CONFIG_X86_LOCAL_APIC
- if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
+ if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
/*
* Get the APIC frequency.
*/
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 2bce84d91c2b..c5bb63be4ba1 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -807,10 +807,8 @@ void mtrr_save_state(void)
if (!mtrr_enabled())
return;
- get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
- put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8e598a1ad986..6b91e2eb8d3f 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -125,7 +125,7 @@ void __init init_espfix_bsp(void)
p4d_t *p4d;
/* Install the espfix pud into the kernel page directory */
- pgd = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)];
p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
p4d_populate(&init_mm, p4d, espfix_pud_page);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 43b7002f44fb..46c3c73e7f43 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -33,17 +33,120 @@
/*
* Manage page tables very early on.
*/
-extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
-static unsigned int __initdata next_early_pgt = 2;
+static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
+#define __head __section(.head.text)
+
+static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
+{
+ return ptr - (void *)_text + (void *)physaddr;
+}
+
+void __head __startup_64(unsigned long physaddr)
+{
+ unsigned long load_delta, *p;
+ pgdval_t *pgd;
+ p4dval_t *p4d;
+ pudval_t *pud;
+ pmdval_t *pmd, pmd_entry;
+ int i;
+
+ /* Is the address too large? */
+ if (physaddr >> MAX_PHYSMEM_BITS)
+ for (;;);
+
+ /*
+ * Compute the delta between the address I am compiled to run at
+ * and the address I am actually running at.
+ */
+ load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+
+ /* Is the address not 2M aligned? */
+ if (load_delta & ~PMD_PAGE_MASK)
+ for (;;);
+
+ /* Fixup the physical addresses in the page table */
+
+ pgd = fixup_pointer(&early_top_pgt, physaddr);
+ pgd[pgd_index(__START_KERNEL_map)] += load_delta;
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
+ p4d[511] += load_delta;
+ }
+
+ pud = fixup_pointer(&level3_kernel_pgt, physaddr);
+ pud[510] += load_delta;
+ pud[511] += load_delta;
+
+ pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
+ pmd[506] += load_delta;
+
+ /*
+ * Set up the identity mapping for the switchover. These
+ * entries should *NOT* have the global bit set! This also
+ * creates a bunch of nonsense entries but that is fine --
+ * it avoids problems around wraparound.
+ */
+
+ pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+ pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+
+ i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+ pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
+ pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
+
+ i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
+ p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
+ p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+ } else {
+ i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
+ pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
+ pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
+ }
+
+ i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
+ pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
+ pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
+
+ pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+ pmd_entry += physaddr;
+
+ for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
+ int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
+ pmd[idx] = pmd_entry + i * PMD_SIZE;
+ }
+
+ /*
+ * Fixup the kernel text+data virtual addresses. Note that
+ * we might write invalid pmds, when the kernel is relocated
+ * cleanup_highmap() fixes this up along with the mappings
+ * beyond _end.
+ */
+
+ pmd = fixup_pointer(level2_kernel_pgt, physaddr);
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (pmd[i] & _PAGE_PRESENT)
+ pmd[i] += load_delta;
+ }
+
+ /* Fixup phys_base */
+ p = fixup_pointer(&phys_base, physaddr);
+ *p += load_delta;
+}
+
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
- memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
+ memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
next_early_pgt = 0;
- write_cr3(__pa_nodebug(early_level4_pgt));
+ write_cr3(__pa_nodebug(early_top_pgt));
}
/* Create a new PMD entry */
@@ -51,15 +154,16 @@ int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
+ p4dval_t p4d, *p4d_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
/* Invalid address or early pgt is done ? */
- if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
+ if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
return -1;
again:
- pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
+ pgd_p = &early_top_pgt[pgd_index(address)].pgd;
pgd = *pgd_p;
/*
@@ -67,8 +171,25 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
- if (pgd)
- pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ p4d_p = pgd_p;
+ else if (pgd)
+ p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
+ else {
+ if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
+ reset_early_page_tables();
+ goto again;
+ }
+
+ p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
+ memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
+ *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+ }
+ p4d_p += p4d_index(address);
+ p4d = *p4d_p;
+
+ if (p4d)
+ pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
else {
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
reset_early_page_tables();
@@ -77,7 +198,7 @@ again:
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
+ *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
pud = *pud_p;
@@ -156,7 +277,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
- clear_page(init_level4_pgt);
+ clear_page(init_top_pgt);
kasan_early_init();
@@ -171,8 +292,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
*/
load_ucode_bsp();
- /* set init_level4_pgt kernel high mapping*/
- init_level4_pgt[511] = early_level4_pgt[511];
+ /* set init_top_pgt kernel high mapping*/
+ init_top_pgt[511] = early_top_pgt[511];
x86_64_start_reservations(real_mode_data);
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ac9d327d2e42..6225550883df 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -37,10 +37,11 @@
*
*/
+#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
-L4_START_KERNEL = pgd_index(__START_KERNEL_map)
+PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
+PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -72,101 +73,12 @@ startup_64:
/* Sanitize CPU configuration */
call verify_cpu
- /*
- * Compute the delta between the address I am compiled to run at and the
- * address I am actually running at.
- */
- leaq _text(%rip), %rbp
- subq $_text - __START_KERNEL_map, %rbp
-
- /* Is the address not 2M aligned? */
- testl $~PMD_PAGE_MASK, %ebp
- jnz bad_address
-
- /*
- * Is the address too large?
- */
- leaq _text(%rip), %rax
- shrq $MAX_PHYSMEM_BITS, %rax
- jnz bad_address
-
- /*
- * Fixup the physical addresses in the page table
- */
- addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
-
- addq %rbp, level3_kernel_pgt + (510*8)(%rip)
- addq %rbp, level3_kernel_pgt + (511*8)(%rip)
-
- addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
-
- /*
- * Set up the identity mapping for the switchover. These
- * entries should *NOT* have the global bit set! This also
- * creates a bunch of nonsense entries but that is fine --
- * it avoids problems around wraparound.
- */
leaq _text(%rip), %rdi
- leaq early_level4_pgt(%rip), %rbx
-
- movq %rdi, %rax
- shrq $PGDIR_SHIFT, %rax
-
- leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
- movq %rdx, 0(%rbx,%rax,8)
- movq %rdx, 8(%rbx,%rax,8)
-
- addq $PAGE_SIZE, %rdx
- movq %rdi, %rax
- shrq $PUD_SHIFT, %rax
- andl $(PTRS_PER_PUD-1), %eax
- movq %rdx, PAGE_SIZE(%rbx,%rax,8)
- incl %eax
- andl $(PTRS_PER_PUD-1), %eax
- movq %rdx, PAGE_SIZE(%rbx,%rax,8)
-
- addq $PAGE_SIZE * 2, %rbx
- movq %rdi, %rax
- shrq $PMD_SHIFT, %rdi
- addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
- leaq (_end - 1)(%rip), %rcx
- shrq $PMD_SHIFT, %rcx
- subq %rdi, %rcx
- incl %ecx
-
-1:
- andq $(PTRS_PER_PMD - 1), %rdi
- movq %rax, (%rbx,%rdi,8)
- incq %rdi
- addq $PMD_SIZE, %rax
- decl %ecx
- jnz 1b
-
- test %rbp, %rbp
- jz .Lskip_fixup
+ pushq %rsi
+ call __startup_64
+ popq %rsi
- /*
- * Fixup the kernel text+data virtual addresses. Note that
- * we might write invalid pmds, when the kernel is relocated
- * cleanup_highmap() fixes this up along with the mappings
- * beyond _end.
- */
- leaq level2_kernel_pgt(%rip), %rdi
- leaq PAGE_SIZE(%rdi), %r8
- /* See if it is a valid page table entry */
-1: testb $_PAGE_PRESENT, 0(%rdi)
- jz 2f
- addq %rbp, 0(%rdi)
- /* Go to the next page */
-2: addq $8, %rdi
- cmp %r8, %rdi
- jne 1b
-
- /* Fixup phys_base */
- addq %rbp, phys_base(%rip)
-
-.Lskip_fixup:
- movq $(early_level4_pgt - __START_KERNEL_map), %rax
+ movq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
/*
@@ -186,14 +98,17 @@ ENTRY(secondary_startup_64)
/* Sanitize CPU configuration */
call verify_cpu
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
+ movq $(init_top_pgt - __START_KERNEL_map), %rax
1:
- /* Enable PAE mode and PGE */
+ /* Enable PAE mode, PGE and LA57 */
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
+#ifdef CONFIG_X86_5LEVEL
+ orl $X86_CR4_LA57, %ecx
+#endif
movq %rcx, %cr4
- /* Setup early boot stage 4 level pagetables. */
+ /* Setup early boot stage 4-/5-level pagetables. */
addq phys_base(%rip), %rax
movq %rax, %cr3
@@ -417,9 +332,13 @@ GLOBAL(name)
.endr
__INITDATA
-NEXT_PAGE(early_level4_pgt)
+NEXT_PAGE(early_top_pgt)
.fill 511,8,0
+#ifdef CONFIG_X86_5LEVEL
+ .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#endif
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -427,14 +346,14 @@ NEXT_PAGE(early_dynamic_pgts)
.data
#ifndef CONFIG_XEN
-NEXT_PAGE(init_level4_pgt)
+NEXT_PAGE(init_top_pgt)
.fill 512,8,0
#else
-NEXT_PAGE(init_level4_pgt)
+NEXT_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .org init_level4_pgt + L4_START_KERNEL*8, 0
+ .org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
@@ -448,6 +367,12 @@ NEXT_PAGE(level2_ident_pgt)
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#endif
+#ifdef CONFIG_X86_5LEVEL
+NEXT_PAGE(level4_kernel_pgt)
+ .fill 511,8,0
+ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
+#endif
+
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 89ff7af2de50..16f82a3aaec7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -285,7 +285,7 @@ static void hpet_legacy_clockevent_register(void)
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
+ hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
clockevents_config_and_register(&hpet_clockevent, hpet_freq,
HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
global_clock_event = &hpet_clockevent;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f34fe7444836..4aa03c5a14c9 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -432,84 +432,12 @@ int check_irq_vectors_for_cpu_disable(void)
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
{
- unsigned int irq, vector;
- static int warned;
+ unsigned int irr, vector;
struct irq_desc *desc;
struct irq_data *data;
struct irq_chip *chip;
- int ret;
- for_each_irq_desc(irq, desc) {
- int break_affinity = 0;
- int set_affinity = 1;
- const struct cpumask *affinity;
-
- if (!desc)
- continue;
- if (irq == 2)
- continue;
-
- /* interrupt's are disabled at this point */
- raw_spin_lock(&desc->lock);
-
- data = irq_desc_get_irq_data(desc);
- affinity = irq_data_get_affinity_mask(data);
- if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
- cpumask_subset(affinity, cpu_online_mask)) {
- raw_spin_unlock(&desc->lock);
- continue;
- }
-
- /*
- * Complete the irq move. This cpu is going down and for
- * non intr-remapping case, we can't wait till this interrupt
- * arrives at this cpu before completing the irq move.
- */
- irq_force_complete_move(desc);
-
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- break_affinity = 1;
- affinity = cpu_online_mask;
- }
-
- chip = irq_data_get_irq_chip(data);
- /*
- * The interrupt descriptor might have been cleaned up
- * already, but it is not yet removed from the radix tree
- */
- if (!chip) {
- raw_spin_unlock(&desc->lock);
- continue;
- }
-
- if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
- chip->irq_mask(data);
-
- if (chip->irq_set_affinity) {
- ret = chip->irq_set_affinity(data, affinity, true);
- if (ret == -ENOSPC)
- pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq);
- } else {
- if (!(warned++))
- set_affinity = 0;
- }
-
- /*
- * We unmask if the irq was not marked masked by the
- * core code. That respects the lazy irq disable
- * behaviour.
- */
- if (!irqd_can_move_in_process_context(data) &&
- !irqd_irq_masked(data) && chip->irq_unmask)
- chip->irq_unmask(data);
-
- raw_spin_unlock(&desc->lock);
-
- if (break_affinity && set_affinity)
- pr_notice("Broke affinity for irq %i\n", irq);
- else if (!set_affinity)
- pr_notice("Cannot set affinity for irq %i\n", irq);
- }
+ irq_migrate_all_off_this_cpu();
/*
* We can remove mdelay() and then send spuriuous interrupts to
@@ -528,8 +456,6 @@ void fixup_irqs(void)
* nothing else will touch it.
*/
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- unsigned int irr;
-
if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
continue;
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index c37bd0f39c70..ab4f491da2a9 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -105,11 +105,9 @@ static void __jump_label_transform(struct jump_entry *entry,
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(entry, type, NULL, 0);
mutex_unlock(&text_mutex);
- put_online_cpus();
}
static enum {
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 901c640d152f..69ea0bc1cfa3 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -28,6 +28,7 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <linux/frame.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
@@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
}
asm (
+ "optprobe_template_func:\n"
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
#ifdef CONFIG_X86_64
@@ -131,7 +133,12 @@ asm (
" popf\n"
#endif
".global optprobe_template_end\n"
- "optprobe_template_end:\n");
+ "optprobe_template_end:\n"
+ ".type optprobe_template_func, @function\n"
+ ".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_MOVE_IDX \
((long)&optprobe_template_val - (long)&optprobe_template_entry)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d4a15831ac58..a870910c8565 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,24 +22,25 @@
#include <asm/syscalls.h>
/* context.lock is held for us, so we don't need any locking. */
-static void flush_ldt(void *current_mm)
+static void flush_ldt(void *__mm)
{
+ struct mm_struct *mm = __mm;
mm_context_t *pc;
- if (current->active_mm != current_mm)
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
return;
- pc = &current->active_mm->context;
- set_ldt(pc->ldt->entries, pc->ldt->size);
+ pc = &mm->context;
+ set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
}
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
-static struct ldt_struct *alloc_ldt_struct(unsigned int size)
+static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
struct ldt_struct *new_ldt;
unsigned int alloc_size;
- if (size > LDT_ENTRIES)
+ if (num_entries > LDT_ENTRIES)
return NULL;
new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
@@ -47,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
return NULL;
BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
- alloc_size = size * LDT_ENTRY_SIZE;
+ alloc_size = num_entries * LDT_ENTRY_SIZE;
/*
* Xen is very picky: it requires a page-aligned LDT that has no
@@ -65,14 +66,14 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
return NULL;
}
- new_ldt->size = size;
+ new_ldt->nr_entries = num_entries;
return new_ldt;
}
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
- paravirt_alloc_ldt(ldt->entries, ldt->size);
+ paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}
/* context.lock is held */
@@ -91,8 +92,8 @@ static void free_ldt_struct(struct ldt_struct *ldt)
if (likely(!ldt))
return;
- paravirt_free_ldt(ldt->entries, ldt->size);
- if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+ paravirt_free_ldt(ldt->entries, ldt->nr_entries);
+ if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree_atomic(ldt->entries);
else
free_page((unsigned long)ldt->entries);
@@ -122,14 +123,14 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
goto out_unlock;
}
- new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
+ new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
if (!new_ldt) {
retval = -ENOMEM;
goto out_unlock;
}
memcpy(new_ldt->entries, old_mm->context.ldt->entries,
- new_ldt->size * LDT_ENTRY_SIZE);
+ new_ldt->nr_entries * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
mm->context.ldt = new_ldt;
@@ -152,9 +153,9 @@ void destroy_context_ldt(struct mm_struct *mm)
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
- int retval;
- unsigned long size;
struct mm_struct *mm = current->mm;
+ unsigned long entries_size;
+ int retval;
mutex_lock(&mm->context.lock);
@@ -166,18 +167,18 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
- size = mm->context.ldt->size * LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
+ entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
+ if (entries_size > bytecount)
+ entries_size = bytecount;
- if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+ if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
retval = -EFAULT;
goto out_unlock;
}
- if (size != bytecount) {
+ if (entries_size != bytecount) {
/* Zero-fill the rest and pretend we read bytecount bytes. */
- if (clear_user(ptr + size, bytecount - size)) {
+ if (clear_user(ptr + entries_size, bytecount - entries_size)) {
retval = -EFAULT;
goto out_unlock;
}
@@ -208,7 +209,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct *mm = current->mm;
struct ldt_struct *new_ldt, *old_ldt;
- unsigned int oldsize, newsize;
+ unsigned int old_nr_entries, new_nr_entries;
struct user_desc ldt_info;
struct desc_struct ldt;
int error;
@@ -247,17 +248,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
mutex_lock(&mm->context.lock);
- old_ldt = mm->context.ldt;
- oldsize = old_ldt ? old_ldt->size : 0;
- newsize = max(ldt_info.entry_number + 1, oldsize);
+ old_ldt = mm->context.ldt;
+ old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
+ new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
error = -ENOMEM;
- new_ldt = alloc_ldt_struct(newsize);
+ new_ldt = alloc_ldt_struct(new_nr_entries);
if (!new_ldt)
goto out_unlock;
if (old_ldt)
- memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
+ memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
+
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6f5ca4ebe6e5..cb0a30473c23 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -347,7 +347,7 @@ void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
- VMCOREINFO_SYMBOL(init_level4_pgt);
+ VMCOREINFO_SYMBOL(init_top_pgt);
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 6d9582ec0324..d27f8d84c4ff 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -78,7 +78,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
/* Don't wait longer than a second */
timeout = USEC_PER_SEC;
- while (!cpumask_empty(mask) && timeout--)
+ while (!cpumask_empty(mask) && --timeout)
udelay(1);
/* What happens if we timeout, do we still unregister?? */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3586996fc50d..bc0a849589bb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
- .read_cr3 = native_read_cr3,
+ .read_cr3 = __native_read_cr3,
.write_cr3 = native_write_cr3,
.flush_tlb_user = native_flush_tlb,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb88428cbf2..3ca198080ea9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
}
/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
- struct inactive_task_frame *frame =
- (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
- return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ffeae818aa7a..c6d6dc5f8bb2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -92,7 +92,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
- cr3 = read_cr3();
+ cr3 = __read_cr3();
cr4 = __read_cr4();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b6840bf3940b..c3169be4c596 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -104,7 +104,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
- cr3 = read_cr3();
+ cr3 = __read_cr3();
cr4 = __read_cr4();
printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
@@ -142,7 +142,7 @@ void release_thread(struct task_struct *dead_task)
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt->entries,
- dead_task->mm->context.ldt->size);
+ dead_task->mm->context.ldt->nr_entries);
BUG();
}
#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2544700a2a87..67393fc88353 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/tboot.h>
#include <linux/delay.h>
+#include <linux/frame.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type)
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(machine_real_restart);
#endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
/*
* Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f81823695014..65622f07e633 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -503,7 +503,7 @@ static int __init reserve_crashkernel_low(void)
return 0;
}
- low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN);
+ low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20));
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f04479a8f74f..b474c8de7fba 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int apicid)
if (cpu == 1)
printk(KERN_INFO "x86: Booting SMP configuration:\n");
- if (system_state == SYSTEM_BOOTING) {
+ if (system_state < SYSTEM_RUNNING) {
if (node != current_node) {
if (current_node > (-1))
pr_cont("\n");
@@ -1589,7 +1589,6 @@ void native_cpu_die(unsigned int cpu)
void play_dead_common(void)
{
idle_task_exit();
- reset_lazy_tlbstate();
/* Ack it */
(void)cpu_report_death();
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f07f83b3611b..5f25cfbd952e 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -34,7 +34,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
mutex_lock(&child->mm->context.lock);
if (unlikely(!child->mm->context.ldt ||
- seg >= child->mm->context.ldt->size))
+ seg >= child->mm->context.ldt->nr_entries))
addr = -1L; /* bogus selector, access would fault */
else {
desc = &child->mm->context.ldt->entries[seg];
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 4b1724059909..a4eb27918ceb 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,7 +514,7 @@ int tboot_force_iommu(void)
if (!tboot_enabled())
return 0;
- if (!intel_iommu_tboot_noforce)
+ if (intel_iommu_tboot_noforce)
return 1;
if (no_iommu || swiotlb || dmar_disabled)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index d39c09119db6..e0754cdbad37 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -66,7 +66,7 @@ static struct irqaction irq0 = {
.name = "timer"
};
-void __init setup_default_timer_irq(void)
+static void __init setup_default_timer_irq(void)
{
if (!nr_legacy_irqs())
return;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 714dfba6a1e7..796d96bb0821 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -51,115 +51,34 @@ static u32 art_to_tsc_denominator;
static u64 art_to_tsc_offset;
struct clocksource *art_related_clocksource;
-/*
- * Use a ring-buffer like data structure, where a writer advances the head by
- * writing a new data entry and a reader advances the tail when it observes a
- * new entry.
- *
- * Writers are made to wait on readers until there's space to write a new
- * entry.
- *
- * This means that we can always use an {offset, mul} pair to compute a ns
- * value that is 'roughly' in the right direction, even if we're writing a new
- * {offset, mul} pair during the clock read.
- *
- * The down-side is that we can no longer guarantee strict monotonicity anymore
- * (assuming the TSC was that to begin with), because while we compute the
- * intersection point of the two clock slopes and make sure the time is
- * continuous at the point of switching; we can no longer guarantee a reader is
- * strictly before or after the switch point.
- *
- * It does mean a reader no longer needs to disable IRQs in order to avoid
- * CPU-Freq updates messing with his times, and similarly an NMI reader will
- * no longer run the risk of hitting half-written state.
- */
-
struct cyc2ns {
- struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */
- struct cyc2ns_data *head; /* 48 + 8 = 56 */
- struct cyc2ns_data *tail; /* 56 + 8 = 64 */
-}; /* exactly fits one cacheline */
-
-static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-
-struct cyc2ns_data *cyc2ns_read_begin(void)
-{
- struct cyc2ns_data *head;
-
- preempt_disable();
+ struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
+ seqcount_t seq; /* 32 + 4 = 36 */
- head = this_cpu_read(cyc2ns.head);
- /*
- * Ensure we observe the entry when we observe the pointer to it.
- * matches the wmb from cyc2ns_write_end().
- */
- smp_read_barrier_depends();
- head->__count++;
- barrier();
-
- return head;
-}
+}; /* fits one cacheline */
-void cyc2ns_read_end(struct cyc2ns_data *head)
-{
- barrier();
- /*
- * If we're the outer most nested read; update the tail pointer
- * when we're done. This notifies possible pending writers
- * that we've observed the head pointer and that the other
- * entry is now free.
- */
- if (!--head->__count) {
- /*
- * x86-TSO does not reorder writes with older reads;
- * therefore once this write becomes visible to another
- * cpu, we must be finished reading the cyc2ns_data.
- *
- * matches with cyc2ns_write_begin().
- */
- this_cpu_write(cyc2ns.tail, head);
- }
- preempt_enable();
-}
+static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-/*
- * Begin writing a new @data entry for @cpu.
- *
- * Assumes some sort of write side lock; currently 'provided' by the assumption
- * that cpufreq will call its notifiers sequentially.
- */
-static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
+void cyc2ns_read_begin(struct cyc2ns_data *data)
{
- struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
- struct cyc2ns_data *data = c2n->data;
+ int seq, idx;
- if (data == c2n->head)
- data++;
+ preempt_disable_notrace();
- /* XXX send an IPI to @cpu in order to guarantee a read? */
+ do {
+ seq = this_cpu_read(cyc2ns.seq.sequence);
+ idx = seq & 1;
- /*
- * When we observe the tail write from cyc2ns_read_end(),
- * the cpu must be done with that entry and its safe
- * to start writing to it.
- */
- while (c2n->tail == data)
- cpu_relax();
+ data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
+ data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
+ data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
- return data;
+ } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
+void cyc2ns_read_end(void)
{
- struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
-
- /*
- * Ensure the @data writes are visible before we publish the
- * entry. Matches the data-depencency in cyc2ns_read_begin().
- */
- smp_wmb();
-
- ACCESS_ONCE(c2n->head) = data;
+ preempt_enable_notrace();
}
/*
@@ -191,7 +110,6 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
data->cyc2ns_mul = 0;
data->cyc2ns_shift = 0;
data->cyc2ns_offset = 0;
- data->__count = 0;
}
static void cyc2ns_init(int cpu)
@@ -201,51 +119,29 @@ static void cyc2ns_init(int cpu)
cyc2ns_data_init(&c2n->data[0]);
cyc2ns_data_init(&c2n->data[1]);
- c2n->head = c2n->data;
- c2n->tail = c2n->data;
+ seqcount_init(&c2n->seq);
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
- struct cyc2ns_data *data, *tail;
+ struct cyc2ns_data data;
unsigned long long ns;
- /*
- * See cyc2ns_read_*() for details; replicated in order to avoid
- * an extra few instructions that came with the abstraction.
- * Notable, it allows us to only do the __count and tail update
- * dance when its actually needed.
- */
-
- preempt_disable_notrace();
- data = this_cpu_read(cyc2ns.head);
- tail = this_cpu_read(cyc2ns.tail);
-
- if (likely(data == tail)) {
- ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
- } else {
- data->__count++;
-
- barrier();
+ cyc2ns_read_begin(&data);
- ns = data->cyc2ns_offset;
- ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+ ns = data.cyc2ns_offset;
+ ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
- barrier();
-
- if (!--data->__count)
- this_cpu_write(cyc2ns.tail, data);
- }
- preempt_enable_notrace();
+ cyc2ns_read_end();
return ns;
}
-static void set_cyc2ns_scale(unsigned long khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
- unsigned long long tsc_now, ns_now;
- struct cyc2ns_data *data;
+ unsigned long long ns_now;
+ struct cyc2ns_data data;
+ struct cyc2ns *c2n;
unsigned long flags;
local_irq_save(flags);
@@ -254,9 +150,6 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
if (!khz)
goto done;
- data = cyc2ns_write_begin(cpu);
-
- tsc_now = rdtsc();
ns_now = cycles_2_ns(tsc_now);
/*
@@ -264,7 +157,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
+ clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
NSEC_PER_MSEC, 0);
/*
@@ -273,20 +166,26 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu)
* conversion algorithm shifting a 32-bit value (now specifies a 64-bit
* value) - refer perf_event_mmap_page documentation in perf_event.h.
*/
- if (data->cyc2ns_shift == 32) {
- data->cyc2ns_shift = 31;
- data->cyc2ns_mul >>= 1;
+ if (data.cyc2ns_shift == 32) {
+ data.cyc2ns_shift = 31;
+ data.cyc2ns_mul >>= 1;
}
- data->cyc2ns_offset = ns_now -
- mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
+ data.cyc2ns_offset = ns_now -
+ mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
- cyc2ns_write_end(cpu, data);
+ c2n = per_cpu_ptr(&cyc2ns, cpu);
+
+ raw_write_seqcount_latch(&c2n->seq);
+ c2n->data[0] = data;
+ raw_write_seqcount_latch(&c2n->seq);
+ c2n->data[1] = data;
done:
- sched_clock_idle_wakeup_event(0);
+ sched_clock_idle_wakeup_event();
local_irq_restore(flags);
}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -374,6 +273,8 @@ static int __init tsc_setup(char *str)
tsc_clocksource_reliable = 1;
if (!strncmp(str, "noirqtime", 9))
no_sched_irq_time = 1;
+ if (!strcmp(str, "unstable"))
+ mark_tsc_unstable("boot parameter");
return 1;
}
@@ -986,7 +887,6 @@ void tsc_restore_sched_clock_state(void)
}
#ifdef CONFIG_CPU_FREQ
-
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
* changes.
*
@@ -1027,7 +927,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
mark_tsc_unstable("cpufreq changes");
- set_cyc2ns_scale(tsc_khz, freq->cpu);
+ set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc());
}
return 0;
@@ -1127,6 +1027,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
pr_info("Marking TSC unstable due to clocksource watchdog\n");
}
+static void tsc_cs_tick_stable(struct clocksource *cs)
+{
+ if (tsc_unstable)
+ return;
+
+ if (using_native_sched_clock())
+ sched_clock_tick_stable();
+}
+
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
@@ -1140,6 +1049,7 @@ static struct clocksource clocksource_tsc = {
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
+ .tick_stable = tsc_cs_tick_stable,
};
void mark_tsc_unstable(char *reason)
@@ -1255,6 +1165,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
static int hpet;
u64 tsc_stop, ref_stop, delta;
unsigned long freq;
+ int cpu;
/* Don't bother refining TSC on unstable systems */
if (check_tsc_unstable())
@@ -1305,6 +1216,10 @@ static void tsc_refine_calibration_work(struct work_struct *work)
/* Inform the TSC deadline clockevent devices about the recalibration */
lapic_update_tsc_freq();
+ /* Update the sched_clock() rate to match the clocksource one */
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
+
out:
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
@@ -1350,7 +1265,7 @@ device_initcall(init_tsc_clocksource);
void __init tsc_init(void)
{
- u64 lpj;
+ u64 lpj, cyc;
int cpu;
if (!boot_cpu_has(X86_FEATURE_TSC)) {
@@ -1390,9 +1305,10 @@ void __init tsc_init(void)
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/
+ cyc = rdtsc();
for_each_possible_cpu(cpu) {
cyc2ns_init(cpu);
- set_cyc2ns_scale(tsc_khz, cpu);
+ set_cyc2ns_scale(tsc_khz, cpu, cyc);
}
if (tsc_disabled > 0)
@@ -1412,11 +1328,11 @@ void __init tsc_init(void)
use_tsc_delay();
+ check_system_tsc_reliable();
+
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
- check_system_tsc_reliable();
-
detect_art();
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 728f75378475..7842371bc9e4 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -71,13 +71,8 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
* non zero. We don't do that on non boot cpus because physical
* hotplug should have set the ADJUST register to a value > 0 so
* the TSC is in sync with the already running cpus.
- *
- * But we always force positive ADJUST values. Otherwise the TSC
- * deadline timer creates an interrupt storm. We also have to
- * prevent values > 0x7FFFFFFF as those wreckage the timer as well.
*/
- if ((bootcpu && bootval != 0) || (!bootcpu && bootval < 0) ||
- (bootval > 0x7FFFFFFF)) {
+ if (bootcpu && bootval != 0) {
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
bootval);
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
@@ -451,20 +446,6 @@ retry:
*/
cur->adjusted += cur_max_warp;
- /*
- * TSC deadline timer stops working or creates an interrupt storm
- * with adjust values < 0 and > x07ffffff.
- *
- * To allow adjust values > 0x7FFFFFFF we need to disable the
- * deadline timer and use the local APIC timer, but that requires
- * more intrusive changes and we do not have any useful information
- * from Intel about the underlying HW wreckage yet.
- */
- if (cur->adjusted < 0)
- cur->adjusted = 0;
- if (cur->adjusted > 0x7FFFFFFF)
- cur->adjusted = 0x7FFFFFFF;
-
pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
cpu, cur_max_warp, cur->adjusted);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0816ab2e8adc..80890dee66ce 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ba9891ac5c56..33460fcdeef9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
+#include <linux/frame.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -4906,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
mark_all_clean(svm->vmcb);
}
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ca5d2b93385c..6dcc4873e435 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
+#include <linux/frame.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -48,6 +49,7 @@
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
+#include <asm/mmu_context.h>
#include "trace.h"
#include "pmu.h"
@@ -596,6 +598,7 @@ struct vcpu_vmx {
int gs_ldt_reload_needed;
int fs_reload_needed;
u64 msr_host_bndcfgs;
+ unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state;
struct {
@@ -5012,12 +5015,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
u32 low32, high32;
unsigned long tmpl;
struct desc_ptr dt;
- unsigned long cr0, cr4;
+ unsigned long cr0, cr3, cr4;
cr0 = read_cr0();
WARN_ON(cr0 & X86_CR0_TS);
vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
- vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
+
+ /*
+ * Save the most likely value for this task's CR3 in the VMCS.
+ * We can't use __get_current_cr3_fast() because we're not atomic.
+ */
+ cr3 = __read_cr3();
+ vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
+ vmx->host_state.vmcs_host_cr3 = cr3;
/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow();
@@ -8652,6 +8662,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
);
}
}
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
static bool vmx_has_high_real_mode_segbase(void)
{
@@ -8820,7 +8831,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr, cr4;
+ unsigned long debugctlmsr, cr3, cr4;
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
@@ -8842,6 +8853,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->host_state.vmcs_host_cr3 = cr3;
+ }
+
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
@@ -9028,6 +9045,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 87d3cb901935..0e846f0cb83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
int r = EMULATE_DONE;
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+ /*
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
+ *
+ * This is correct even for TF set by the guest, because "the
+ * processor will not generate this exception after the instruction
+ * that sets the TF flag".
+ */
+ if (unlikely(rflags & X86_EFLAGS_TF))
+ kvm_vcpu_do_singlestep(vcpu, &r);
return r == EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index c5959576c315..020f75cc8cf6 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled)
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz 17f
+ jz .L_copy_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled)
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
-17: movl %edx,%ecx
+.L_copy_short_string:
+ movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
*/
ENTRY(copy_user_enhanced_fast_string)
ASM_STAC
+ cmpl $64,%edx
+ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
movl %edx,%ecx
1: rep
movsb
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index c81556409bbb..10ffa7e8519f 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -13,14 +13,14 @@
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
pushq %rbx
- pushq %rbp
+ pushq %r12
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
movl 4(%rdi), %ecx
movl 8(%rdi), %edx
movl 12(%rdi), %ebx
- movl 20(%rdi), %ebp
+ movl 20(%rdi), %r12d
movl 24(%rdi), %esi
movl 28(%rdi), %edi
1: \op
@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs)
movl %ecx, 4(%r10)
movl %edx, 8(%r10)
movl %ebx, 12(%r10)
- movl %ebp, 20(%r10)
+ movl %r12d, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
- popq %rbp
+ popq %r12
popq %rbx
ret
3:
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 767be7c76034..12e377184ee4 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1009,7 +1009,7 @@ GrpTable: Grp15
1: fxstor | RDGSBASE Ry (F3),(11B)
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
-4: XSAVE
+4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
7: clflush | clflushopt (66) | sfence (11B)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 5e044d506b7a..a179254a5122 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -27,7 +27,7 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
seg >>= 3;
mutex_lock(&current->mm->context.lock);
- if (current->mm->context.ldt && seg < current->mm->context.ldt->size)
+ if (current->mm->context.ldt && seg < current->mm->context.ldt->nr_entries)
ret = current->mm->context.ldt->entries[seg];
mutex_unlock(&current->mm->context.lock);
#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 96d2b847e09e..0fbdcb64f9f8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@
KCOV_INSTRUMENT_tlb.o := n
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index bce6990b1d81..0470826d2bdc 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -431,7 +431,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
{
#ifdef CONFIG_X86_64
- pgd_t *start = (pgd_t *) &init_level4_pgt;
+ pgd_t *start = (pgd_t *) &init_top_pgt;
#else
pgd_t *start = swapper_pg_dir;
#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8ad91a01cbc8..2a1fa10c6a98 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -346,7 +346,7 @@ static noinline int vmalloc_fault(unsigned long address)
* Do _not_ use "current" here. We might be inside
* an interrupt in the middle of a task switch..
*/
- pgd_paddr = read_cr3();
+ pgd_paddr = read_cr3_pa();
pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
if (!pmd_k)
return -1;
@@ -388,7 +388,7 @@ static bool low_pfn(unsigned long pfn)
static void dump_pagetable(unsigned long address)
{
- pgd_t *base = __va(read_cr3());
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(address)];
p4d_t *p4d;
pud_t *pud;
@@ -451,7 +451,7 @@ static noinline int vmalloc_fault(unsigned long address)
* happen within a race in page table update. In the later
* case just flush:
*/
- pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
+ pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
@@ -555,7 +555,7 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long address)
{
- pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = base + pgd_index(address);
p4d_t *p4d;
pud_t *pud;
@@ -700,7 +700,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
pgd_t *pgd;
pte_t *pte;
- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd = __va(read_cr3_pa());
pgd += pgd_index(address);
pte = lookup_address_in_pgd(pgd, address, &level);
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
deleted file mode 100644
index 456dfdfd2249..000000000000
--- a/arch/x86/mm/gup.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Lockless get_user_pages_fast for x86
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/memremap.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#ifndef CONFIG_X86_PAE
- return READ_ONCE(*ptep);
-#else
- /*
- * With get_user_pages_fast, we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE. What
- * we do have is the guarantee that a pte will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees l iff pte_high
- * sees h. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have got rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. And get_user_pages_fast only operates on present ptes, so
- * we're safe.
- *
- * gup_get_pte should not be used or copied outside gup.c without being
- * very careful -- it does not atomically load the pte or anything that
- * is likely to be useful for you.
- */
- pte_t pte;
-
-retry:
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- if (unlikely(pte.pte_low != ptep->pte_low))
- goto retry;
-
- return pte;
-#endif
-}
-
-static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
-{
- while ((*nr) - nr_start) {
- struct page *page = pages[--(*nr)];
-
- ClearPageReferenced(page);
- put_page(page);
- }
-}
-
-/*
- * 'pteval' can come from a pte, pmd, pud or p4d. We only check
- * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 4 types.
- */
-static inline int pte_allows_gup(unsigned long pteval, int write)
-{
- unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
-
- if (write)
- need_pte_bits |= _PAGE_RW;
-
- if ((pteval & need_pte_bits) != need_pte_bits)
- return 0;
-
- /* Check memory protection keys permissions. */
- if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write))
- return 0;
-
- return 1;
-}
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct dev_pagemap *pgmap = NULL;
- int nr_start = *nr, ret = 0;
- pte_t *ptep, *ptem;
-
- /*
- * Keep the original mapped PTE value (ptem) around since we
- * might increment ptep off the end of the page when finishing
- * our loop iteration.
- */
- ptem = ptep = pte_offset_map(&pmd, addr);
- do {
- pte_t pte = gup_get_pte(ptep);
- struct page *page;
-
- /* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_protnone(pte))
- break;
-
- if (!pte_allows_gup(pte_val(pte), write))
- break;
-
- if (pte_devmap(pte)) {
- pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
- if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, pages);
- break;
- }
- } else if (pte_special(pte))
- break;
-
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- get_page(page);
- put_dev_pagemap(pgmap);
- SetPageReferenced(page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
- if (addr == end)
- ret = 1;
- pte_unmap(ptem);
-
- return ret;
-}
-
-static inline void get_head_page_multiple(struct page *page, int nr)
-{
- VM_BUG_ON_PAGE(page != compound_head(page), page);
- VM_BUG_ON_PAGE(page_count(page) == 0, page);
- page_ref_add(page, nr);
- SetPageReferenced(page);
-}
-
-static int __gup_device_huge(unsigned long pfn, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- int nr_start = *nr;
- struct dev_pagemap *pgmap = NULL;
-
- do {
- struct page *page = pfn_to_page(pfn);
-
- pgmap = get_dev_pagemap(pfn, pgmap);
- if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, pages);
- return 0;
- }
- SetPageReferenced(page);
- pages[*nr] = page;
- get_page(page);
- put_dev_pagemap(pgmap);
- (*nr)++;
- pfn++;
- } while (addr += PAGE_SIZE, addr != end);
- return 1;
-}
-
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- unsigned long fault_pfn;
-
- fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- unsigned long fault_pfn;
-
- fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!pte_allows_gup(pmd_val(pmd), write))
- return 0;
-
- VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
- if (pmd_devmap(pmd))
- return __gup_device_huge_pmd(pmd, addr, end, pages, nr);
-
- /* hugepages are never "special" */
- VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
-
- refs = 0;
- head = pmd_page(pmd);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
- get_head_page_multiple(head, refs);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = *pmdp;
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_protnone(pmd))
- return 0;
- if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
- return 0;
- } else {
- if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- }
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!pte_allows_gup(pud_val(pud), write))
- return 0;
-
- VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
- if (pud_devmap(pud))
- return __gup_device_huge_pud(pud, addr, end, pages, nr);
-
- /* hugepages are never "special" */
- VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
-
- refs = 0;
- head = pud_page(pud);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
- get_head_page_multiple(head, refs);
-
- return 1;
-}
-
-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&p4d, addr);
- do {
- pud_t pud = *pudp;
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_large(pud))) {
- if (!gup_huge_pud(pud, addr, next, write, pages, nr))
- return 0;
- } else {
- if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- }
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- p4d_t *p4dp;
-
- p4dp = p4d_offset(&pgd, addr);
- do {
- p4d_t p4d = *p4dp;
-
- next = p4d_addr_end(addr, end);
- if (p4d_none(p4d))
- return 0;
- BUILD_BUG_ON(p4d_large(p4d));
- if (!gup_pud_range(p4d, addr, next, write, pages, nr))
- return 0;
- } while (p4dp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- (void __user *)start, len)))
- return 0;
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed on x86.
- *
- * So long as we atomically load page table pointers versus teardown
- * (which we do on x86, with the above PAE exception), we can follow the
- * address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
-
- end = start + len;
- if (end < start)
- goto slow_irqon;
-
-#ifdef CONFIG_X86_64
- if (end >> __VIRTUAL_MASK_SHIFT)
- goto slow_irqon;
-#endif
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed on x86.
- *
- * So long as we atomically load page table pointers versus teardown
- * (which we do on x86, with the above PAE exception), we can follow the
- * address down to the the page and take a ref on it.
- */
- local_irq_disable();
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-
- {
- int ret;
-
-slow:
- local_irq_enable();
-slow_irqon:
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- ret = get_user_pages_unlocked(start,
- (end - start) >> PAGE_SHIFT,
- pages, write ? FOLL_WRITE : 0);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
-
- return ret;
- }
-}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9b3f9fa5b283..673541eb3b3f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -811,10 +811,8 @@ void __init zone_sizes_init(void)
}
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
-#ifdef CONFIG_SMP
- .active_mm = &init_mm,
+ .loaded_mm = &init_mm,
.state = 0,
-#endif
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
EXPORT_SYMBOL_GPL(cpu_tlbstate);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 95651dc58e09..dae6a5e5ad4a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -92,6 +92,44 @@ __setup("noexec32=", nonx32_setup);
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
+#ifdef CONFIG_X86_5LEVEL
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ const pgd_t *pgd_ref = pgd_offset_k(addr);
+ struct page *page;
+
+ /* Check for overflow */
+ if (addr < start)
+ break;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock(&pgd_lock);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ spinlock_t *pgt_lock;
+
+ pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+ /* the pgt_lock only for Xen */
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+ spin_lock(pgt_lock);
+
+ if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+
+ spin_unlock(pgt_lock);
+ }
+ spin_unlock(&pgd_lock);
+ }
+}
+#else
void sync_global_pgds(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -135,6 +173,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock);
}
}
+#endif
/*
* NOTE: This function is marked __ref because it calls __init function
@@ -585,6 +624,57 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
return paddr_last;
}
+static unsigned long __meminit
+phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ unsigned long paddr_next, paddr_last = paddr_end;
+ unsigned long vaddr = (unsigned long)__va(paddr);
+ int i = p4d_index(vaddr);
+
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+
+ for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
+ p4d_t *p4d;
+ pud_t *pud;
+
+ vaddr = (unsigned long)__va(paddr);
+ p4d = p4d_page + p4d_index(vaddr);
+ paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+
+ if (paddr >= paddr_end) {
+ if (!after_bootmem &&
+ !e820__mapped_any(paddr & P4D_MASK, paddr_next,
+ E820_TYPE_RAM) &&
+ !e820__mapped_any(paddr & P4D_MASK, paddr_next,
+ E820_TYPE_RESERVED_KERN))
+ set_p4d(p4d, __p4d(0));
+ continue;
+ }
+
+ if (!p4d_none(*p4d)) {
+ pud = pud_offset(p4d, 0);
+ paddr_last = phys_pud_init(pud, paddr,
+ paddr_end,
+ page_size_mask);
+ __flush_tlb_all();
+ continue;
+ }
+
+ pud = alloc_low_page();
+ paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ page_size_mask);
+
+ spin_lock(&init_mm.page_table_lock);
+ p4d_populate(&init_mm, p4d, pud);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ __flush_tlb_all();
+
+ return paddr_last;
+}
+
/*
* Create page table mapping for the physical memory for specific physical
* addresses. The virtual and physical addresses have to be aligned on PMD level
@@ -606,26 +696,26 @@ kernel_physical_mapping_init(unsigned long paddr_start,
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
pgd_t *pgd = pgd_offset_k(vaddr);
p4d_t *p4d;
- pud_t *pud;
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
- BUILD_BUG_ON(pgd_none(*pgd));
- p4d = p4d_offset(pgd, vaddr);
- if (p4d_val(*p4d)) {
- pud = (pud_t *)p4d_page_vaddr(*p4d);
- paddr_last = phys_pud_init(pud, __pa(vaddr),
+ if (pgd_val(*pgd)) {
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
page_size_mask);
continue;
}
- pud = alloc_low_page();
- paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+ p4d = alloc_low_page();
+ paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- p4d_populate(&init_mm, p4d, pud);
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ pgd_populate(&init_mm, pgd, p4d);
+ else
+ p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
@@ -990,7 +1080,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
pud_base = pud_offset(p4d, 0);
remove_pud_table(pud_base, addr, next, direct);
- free_pud_table(pud_base, p4d);
+ /*
+ * For 4-level page tables we do not want to free PUDs, but in the
+ * 5-level case we should free them. This code will have to change
+ * to adapt for boot-time switching between 4 and 5 level page tables.
+ */
+ if (CONFIG_PGTABLE_LEVELS == 5)
+ free_pud_table(pud_base, p4d);
}
if (direct)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bbc558b88a88..4c1b5fd0c7ad 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -424,7 +424,7 @@ static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
/* Don't assume we're using swapper_pg_dir at this point */
- pgd_t *base = __va(read_cr3());
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(addr)];
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0c7d8129bed6..88215ac16b24 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -12,7 +12,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
-extern pgd_t early_level4_pgt[PTRS_PER_PGD];
+extern pgd_t early_top_pgt[PTRS_PER_PGD];
extern struct range pfn_mapped[E820_MAX_ENTRIES];
static int __init map_range(struct range *range)
@@ -109,8 +109,8 @@ void __init kasan_early_init(void)
for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
- kasan_map_early_shadow(early_level4_pgt);
- kasan_map_early_shadow(init_level4_pgt);
+ kasan_map_early_shadow(early_top_pgt);
+ kasan_map_early_shadow(init_top_pgt);
}
void __init kasan_init(void)
@@ -121,8 +121,8 @@ void __init kasan_init(void)
register_die_notifier(&kasan_die_notifier);
#endif
- memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
- load_cr3(early_level4_pgt);
+ memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
+ load_cr3(early_top_pgt);
__flush_tlb_all();
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@@ -148,7 +148,7 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
- load_cr3(init_level4_pgt);
+ load_cr3(init_top_pgt);
__flush_tlb_all();
/*
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aed206475aa7..af599167fe3c 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -6,12 +6,12 @@
*
* Entropy is generated using the KASLR early boot functions now shared in
* the lib directory (originally written by Kees Cook). Randomization is
- * done on PGD & PUD page table levels to increase possible addresses. The
- * physical memory mapping code was adapted to support PUD level virtual
- * addresses. This implementation on the best configuration provides 30,000
- * possible virtual addresses in average for each memory region. An additional
- * low memory page is used to ensure each CPU can start with a PGD aligned
- * virtual address (for realmode).
+ * done on PGD & P4D/PUD page table levels to increase possible addresses.
+ * The physical memory mapping code was adapted to support P4D/PUD level
+ * virtual addresses. This implementation on the best configuration provides
+ * 30,000 possible virtual addresses in average for each memory region.
+ * An additional low memory page is used to ensure each CPU can start with
+ * a PGD aligned virtual address (for realmode).
*
* The order of each memory region is not changed. The feature looks at
* the available space for the regions based on different configuration
@@ -70,7 +70,7 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 64/* Maximum */ },
+ { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
{ &vmalloc_base, VMALLOC_SIZE_TB },
{ &vmemmap_base, 1 },
};
@@ -142,7 +142,10 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- entropy = (rand % (entropy + 1)) & PUD_MASK;
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ entropy = (rand % (entropy + 1)) & P4D_MASK;
+ else
+ entropy = (rand % (entropy + 1)) & PUD_MASK;
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
@@ -151,27 +154,21 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- vaddr = round_up(vaddr + 1, PUD_SIZE);
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ vaddr = round_up(vaddr + 1, P4D_SIZE);
+ else
+ vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
}
-/*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
- */
-void __meminit init_trampoline(void)
+static void __meminit init_trampoline_pud(void)
{
unsigned long paddr, paddr_next;
pgd_t *pgd;
pud_t *pud_page, *pud_page_tramp;
int i;
- if (!kaslr_memory_enabled()) {
- init_trampoline_default();
- return;
- }
-
pud_page_tramp = alloc_low_page();
paddr = 0;
@@ -192,3 +189,49 @@ void __meminit init_trampoline(void)
set_pgd(&trampoline_pgd_entry,
__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
}
+
+static void __meminit init_trampoline_p4d(void)
+{
+ unsigned long paddr, paddr_next;
+ pgd_t *pgd;
+ p4d_t *p4d_page, *p4d_page_tramp;
+ int i;
+
+ p4d_page_tramp = alloc_low_page();
+
+ paddr = 0;
+ pgd = pgd_offset_k((unsigned long)__va(paddr));
+ p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
+
+ for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
+ p4d_t *p4d, *p4d_tramp;
+ unsigned long vaddr = (unsigned long)__va(paddr);
+
+ p4d_tramp = p4d_page_tramp + p4d_index(paddr);
+ p4d = p4d_page + p4d_index(vaddr);
+ paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+
+ *p4d_tramp = *p4d;
+ }
+
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+}
+
+/*
+ * Create PGD aligned trampoline table to allow real mode initialization
+ * of additional CPUs. Consume only 1 low memory page.
+ */
+void __meminit init_trampoline(void)
+{
+
+ if (!kaslr_memory_enabled()) {
+ init_trampoline_default();
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ init_trampoline_p4d();
+ else
+ init_trampoline_pud();
+}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 19ad095b41df..797295e792b2 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -74,9 +74,6 @@ static int mmap_is_legacy(void)
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
- return 1;
-
return sysctl_legacy_va_layout;
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6e7bedf69af7..014d07a80053 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -15,7 +15,7 @@
#include <linux/debugfs.h>
/*
- * Smarter SMP flushing macros.
+ * TLB flushing, formerly SMP-only
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
@@ -28,39 +28,28 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
-#ifdef CONFIG_SMP
-
-struct flush_tlb_info {
- struct mm_struct *flush_mm;
- unsigned long flush_start;
- unsigned long flush_end;
-};
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- */
void leave_mm(int cpu)
{
- struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
+ struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+
+ /*
+ * It's plausible that we're in lazy TLB mode while our mm is init_mm.
+ * If so, our callers still expect us to flush the TLB, but there
+ * aren't any user TLB entries in init_mm to worry about.
+ *
+ * This needs to happen before any other sanity checks due to
+ * intel_idle's shenanigans.
+ */
+ if (loaded_mm == &init_mm)
+ return;
+
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
- cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
- load_cr3(swapper_pg_dir);
- /*
- * This gets called in the idle path where RCU
- * functions differently. Tracing normally
- * uses RCU, so we have to call the tracepoint
- * specially here.
- */
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- }
+
+ switch_mm(NULL, &init_mm, NULL);
}
EXPORT_SYMBOL_GPL(leave_mm);
-#endif /* CONFIG_SMP */
-
void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -75,216 +64,167 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
+ struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
- if (likely(prev != next)) {
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
- /*
- * If our current stack is in vmalloc space and isn't
- * mapped in the new pgd, we'll double-fault. Forcibly
- * map it.
- */
- unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
-
- pgd_t *pgd = next->pgd + stack_pgd_index;
-
- if (unlikely(pgd_none(*pgd)))
- set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
- }
+ /*
+ * NB: The scheduler will call us with prev == next when
+ * switching from lazy TLB mode to normal mode if active_mm
+ * isn't changing. When this happens, there is no guarantee
+ * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
+ *
+ * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+ */
-#ifdef CONFIG_SMP
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- cpumask_set_cpu(cpu, mm_cpumask(next));
+ if (real_prev == next) {
+ /*
+ * There's nothing to do: we always keep the per-mm control
+ * regs in sync with cpu_tlbstate.loaded_mm. Just
+ * sanity-check mm_cpumask.
+ */
+ if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ return;
+ }
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
- * Re-load page tables.
- *
- * This logic has an ordering constraint:
- *
- * CPU 0: Write to a PTE for 'next'
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
- * CPU 1: set bit 1 in next's mm_cpumask
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
- *
- * We need to prevent an outcome in which CPU 1 observes
- * the new PTE value and CPU 0 observes bit 1 clear in
- * mm_cpumask. (If that occurs, then the IPI will never
- * be sent, and CPU 0's TLB will contain a stale entry.)
- *
- * The bad outcome can occur if either CPU's load is
- * reordered before that CPU's store, so both CPUs must
- * execute full barriers to prevent this from happening.
- *
- * Thus, switch_mm needs a full barrier between the
- * store to mm_cpumask and any operation that could load
- * from next->pgd. TLB fills are special and can happen
- * due to instruction fetches or for no reason at all,
- * and neither LOCK nor MFENCE orders them.
- * Fortunately, load_cr3() is serializing and gives the
- * ordering guarantee we need.
- *
+ * If our current stack is in vmalloc space and isn't
+ * mapped in the new pgd, we'll double-fault. Forcibly
+ * map it.
*/
- load_cr3(next->pgd);
+ unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ pgd_t *pgd = next->pgd + stack_pgd_index;
- /* Stop flush ipis for the previous mm */
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ if (unlikely(pgd_none(*pgd)))
+ set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+ }
- /* Load per-mm CR4 state */
- load_mm_cr4(next);
+ this_cpu_write(cpu_tlbstate.loaded_mm, next);
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT, if the LDT is different.
- *
- * It's possible that prev->context.ldt doesn't match
- * the LDT register. This can happen if leave_mm(prev)
- * was called and then modify_ldt changed
- * prev->context.ldt but suppressed an IPI to this CPU.
- * In this case, prev->context.ldt != NULL, because we
- * never set context.ldt to NULL while the mm still
- * exists. That means that next->context.ldt !=
- * prev->context.ldt, because mms never share an LDT.
- */
- if (unlikely(prev->context.ldt != next->context.ldt))
- load_mm_ldt(next);
-#endif
+ WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * Re-load page tables.
+ *
+ * This logic has an ordering constraint:
+ *
+ * CPU 0: Write to a PTE for 'next'
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+ * CPU 1: set bit 1 in next's mm_cpumask
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
+ *
+ * We need to prevent an outcome in which CPU 1 observes
+ * the new PTE value and CPU 0 observes bit 1 clear in
+ * mm_cpumask. (If that occurs, then the IPI will never
+ * be sent, and CPU 0's TLB will contain a stale entry.)
+ *
+ * The bad outcome can occur if either CPU's load is
+ * reordered before that CPU's store, so both CPUs must
+ * execute full barriers to prevent this from happening.
+ *
+ * Thus, switch_mm needs a full barrier between the
+ * store to mm_cpumask and any operation that could load
+ * from next->pgd. TLB fills are special and can happen
+ * due to instruction fetches or for no reason at all,
+ * and neither LOCK nor MFENCE orders them.
+ * Fortunately, load_cr3() is serializing and gives the
+ * ordering guarantee we need.
+ */
+ load_cr3(next->pgd);
+
+ /*
+ * This gets called via leave_mm() in the idle path where RCU
+ * functions differently. Tracing normally uses RCU, so we have to
+ * call the tracepoint specially here.
+ */
+ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+ /* Stop flush ipis for the previous mm */
+ WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+ real_prev != &init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+
+ /* Load per-mm CR4 and LDTR state */
+ load_mm_cr4(next);
+ switch_ldt(real_prev, next);
+}
+
+static void flush_tlb_func_common(const struct flush_tlb_info *f,
+ bool local, enum tlb_flush_reason reason)
+{
+ /* This code cannot presently handle being reentered. */
+ VM_WARN_ON(!irqs_disabled());
+
+ if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
+ leave_mm(smp_processor_id());
+ return;
}
-#ifdef CONFIG_SMP
- else {
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
- if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
- /*
- * On established mms, the mm_cpumask is only changed
- * from irq context, from ptep_clear_flush() while in
- * lazy tlb mode, and here. Irqs are blocked during
- * schedule, protecting us from simultaneous changes.
- */
- cpumask_set_cpu(cpu, mm_cpumask(next));
- /*
- * We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload CR3
- * to make sure to use no freed page tables.
- *
- * As above, load_cr3() is serializing and orders TLB
- * fills with respect to the mm_cpumask write.
- */
- load_cr3(next->pgd);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- load_mm_cr4(next);
- load_mm_ldt(next);
+ if (f->end == TLB_FLUSH_ALL) {
+ local_flush_tlb();
+ if (local)
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ trace_tlb_flush(reason, TLB_FLUSH_ALL);
+ } else {
+ unsigned long addr;
+ unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+ addr = f->start;
+ while (addr < f->end) {
+ __flush_tlb_single(addr);
+ addr += PAGE_SIZE;
}
+ if (local)
+ count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
+ trace_tlb_flush(reason, nr_pages);
}
-#endif
}
-#ifdef CONFIG_SMP
+static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+{
+ const struct flush_tlb_info *f = info;
-/*
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) set cpu_tlbstate to TLBSTATE_OK
- * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
- * if cpu0 was in lazy tlb mode.
- * 1a2) update cpu active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but flush_tlb_func ignore flush ipis for the wrong
- * mm, and in the worst case we perform a superfluous tlb flush.
- * 1b) thread switch without mm change
- * cpu active_mm is correct, cpu0 already handles flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
+ flush_tlb_func_common(f, true, reason);
+}
-/*
- * TLB flush funcation:
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-static void flush_tlb_func(void *info)
+static void flush_tlb_func_remote(void *info)
{
- struct flush_tlb_info *f = info;
+ const struct flush_tlb_info *f = info;
inc_irq_stat(irq_tlb_count);
- if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
+ if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
return;
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
- if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
- if (f->flush_end == TLB_FLUSH_ALL) {
- local_flush_tlb();
- trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
- } else {
- unsigned long addr;
- unsigned long nr_pages =
- (f->flush_end - f->flush_start) / PAGE_SIZE;
- addr = f->flush_start;
- while (addr < f->flush_end) {
- __flush_tlb_single(addr);
- addr += PAGE_SIZE;
- }
- trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
- }
- } else
- leave_mm(smp_processor_id());
-
+ flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm, unsigned long start,
- unsigned long end)
+ const struct flush_tlb_info *info)
{
- struct flush_tlb_info info;
-
- info.flush_mm = mm;
- info.flush_start = start;
- info.flush_end = end;
-
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
- if (end == TLB_FLUSH_ALL)
+ if (info->end == TLB_FLUSH_ALL)
trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
else
trace_tlb_flush(TLB_REMOTE_SEND_IPI,
- (end - start) >> PAGE_SHIFT);
+ (info->end - info->start) >> PAGE_SHIFT);
if (is_uv_system()) {
unsigned int cpu;
cpu = smp_processor_id();
- cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
+ cpumask = uv_flush_tlb_others(cpumask, info);
if (cpumask)
- smp_call_function_many(cpumask, flush_tlb_func,
- &info, 1);
+ smp_call_function_many(cpumask, flush_tlb_func_remote,
+ (void *)info, 1);
return;
}
- smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+ smp_call_function_many(cpumask, flush_tlb_func_remote,
+ (void *)info, 1);
}
/*
@@ -302,85 +242,41 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag)
{
- unsigned long addr;
- /* do a global flush by default */
- unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
-
- preempt_disable();
+ int cpu;
- if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
- base_pages_to_flush = (end - start) >> PAGE_SHIFT;
- if (base_pages_to_flush > tlb_single_page_flush_ceiling)
- base_pages_to_flush = TLB_FLUSH_ALL;
+ struct flush_tlb_info info = {
+ .mm = mm,
+ };
- if (current->active_mm != mm) {
- /* Synchronize with switch_mm. */
- smp_mb();
+ cpu = get_cpu();
- goto out;
- }
-
- if (!current->mm) {
- leave_mm(smp_processor_id());
+ /* Synchronize with switch_mm. */
+ smp_mb();
- /* Synchronize with switch_mm. */
- smp_mb();
-
- goto out;
- }
-
- /*
- * Both branches below are implicit full barriers (MOV to CR or
- * INVLPG) that synchronize with switch_mm.
- */
- if (base_pages_to_flush == TLB_FLUSH_ALL) {
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
- local_flush_tlb();
+ /* Should we flush just the requested range? */
+ if ((end != TLB_FLUSH_ALL) &&
+ !(vmflag & VM_HUGETLB) &&
+ ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
+ info.start = start;
+ info.end = end;
} else {
- /* flush range by one by one 'invlpg' */
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
- __flush_tlb_single(addr);
- }
- }
- trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
-out:
- if (base_pages_to_flush == TLB_FLUSH_ALL) {
- start = 0UL;
- end = TLB_FLUSH_ALL;
+ info.start = 0UL;
+ info.end = TLB_FLUSH_ALL;
}
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, start, end);
- preempt_enable();
-}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
-{
- struct mm_struct *mm = vma->vm_mm;
-
- preempt_disable();
-
- if (current->active_mm == mm) {
- if (current->mm) {
- /*
- * Implicit full barrier (INVLPG) that synchronizes
- * with switch_mm.
- */
- __flush_tlb_one(start);
- } else {
- leave_mm(smp_processor_id());
-
- /* Synchronize with switch_mm. */
- smp_mb();
- }
+ if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
+ VM_WARN_ON(irqs_disabled());
+ local_irq_disable();
+ flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+ local_irq_enable();
}
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
-
- preempt_enable();
+ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), &info);
+ put_cpu();
}
+
static void do_flush_tlb_all(void *info)
{
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
@@ -401,7 +297,7 @@ static void do_kernel_range_flush(void *info)
unsigned long addr;
/* flush range by one by one 'invlpg' */
- for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
+ for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
__flush_tlb_single(addr);
}
@@ -410,16 +306,40 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
+ (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
on_each_cpu(do_flush_tlb_all, NULL, 1);
} else {
struct flush_tlb_info info;
- info.flush_start = start;
- info.flush_end = end;
+ info.start = start;
+ info.end = end;
on_each_cpu(do_kernel_range_flush, &info, 1);
}
}
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ struct flush_tlb_info info = {
+ .mm = NULL,
+ .start = 0UL,
+ .end = TLB_FLUSH_ALL,
+ };
+
+ int cpu = get_cpu();
+
+ if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+ VM_WARN_ON(irqs_disabled());
+ local_irq_disable();
+ flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+ local_irq_enable();
+ }
+
+ if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
+ flush_tlb_others(&batch->cpumask, &info);
+ cpumask_clear(&batch->cpumask);
+
+ put_cpu();
+}
+
static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -465,5 +385,3 @@ static int __init create_tlb_single_page_flush_ceiling(void)
return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);
-
-#endif /* CONFIG_SMP */
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 90568c33ddb0..fefb4b619598 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,4 +1,6 @@
#
# Arch-specific network modules
#
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index b914e20b5a00..3353b76dcff0 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -65,6 +65,9 @@ struct sim_reg_op {
{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
{0, SIZE_TO_MASK(size)} },
+/*
+ * All read/write functions are called with pci_config_lock held.
+ */
static void reg_init(struct sim_dev_reg *reg)
{
pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
@@ -73,21 +76,13 @@ static void reg_init(struct sim_dev_reg *reg)
static void reg_read(struct sim_dev_reg *reg, u32 *value)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&pci_config_lock, flags);
*value = reg->sim_reg.value;
- raw_spin_unlock_irqrestore(&pci_config_lock, flags);
}
static void reg_write(struct sim_dev_reg *reg, u32 value)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&pci_config_lock, flags);
reg->sim_reg.value = (value & reg->sim_reg.mask) |
(reg->sim_reg.value & ~reg->sim_reg.mask);
- raw_spin_unlock_irqrestore(&pci_config_lock, flags);
}
static void sata_reg_init(struct sim_dev_reg *reg)
@@ -117,12 +112,8 @@ static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&pci_config_lock, flags);
/* force interrupt pin value to 0 */
*value = reg->sim_reg.value & 0xfff00ff;
- raw_spin_unlock_irqrestore(&pci_config_lock, flags);
}
static struct sim_dev_reg bus1_fixups[] = {
@@ -265,24 +256,33 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
return retval;
}
-static int ce4100_conf_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
+static int ce4100_bus1_read(unsigned int devfn, int reg, int len, u32 *value)
{
+ unsigned long flags;
int i;
- WARN_ON(seg);
- if (bus == 1) {
- for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
- if (bus1_fixups[i].dev_func == devfn &&
- bus1_fixups[i].reg == (reg & ~3) &&
- bus1_fixups[i].read) {
- bus1_fixups[i].read(&(bus1_fixups[i]),
- value);
- extract_bytes(value, reg, len);
- return 0;
- }
+ for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+ if (bus1_fixups[i].dev_func == devfn &&
+ bus1_fixups[i].reg == (reg & ~3) &&
+ bus1_fixups[i].read) {
+
+ raw_spin_lock_irqsave(&pci_config_lock, flags);
+ bus1_fixups[i].read(&(bus1_fixups[i]), value);
+ raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+ extract_bytes(value, reg, len);
+ return 0;
}
}
+ return -1;
+}
+
+static int ce4100_conf_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
+{
+ WARN_ON(seg);
+
+ if (bus == 1 && !ce4100_bus1_read(devfn, reg, len, value))
+ return 0;
if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
!bridge_read(devfn, reg, len, value))
@@ -291,23 +291,32 @@ static int ce4100_conf_read(unsigned int seg, unsigned int bus,
return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
}
-static int ce4100_conf_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
+static int ce4100_bus1_write(unsigned int devfn, int reg, int len, u32 value)
{
+ unsigned long flags;
int i;
- WARN_ON(seg);
- if (bus == 1) {
- for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
- if (bus1_fixups[i].dev_func == devfn &&
- bus1_fixups[i].reg == (reg & ~3) &&
- bus1_fixups[i].write) {
- bus1_fixups[i].write(&(bus1_fixups[i]),
- value);
- return 0;
- }
+ for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+ if (bus1_fixups[i].dev_func == devfn &&
+ bus1_fixups[i].reg == (reg & ~3) &&
+ bus1_fixups[i].write) {
+
+ raw_spin_lock_irqsave(&pci_config_lock, flags);
+ bus1_fixups[i].write(&(bus1_fixups[i]), value);
+ raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+ return 0;
}
}
+ return -1;
+}
+
+static int ce4100_conf_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value)
+{
+ WARN_ON(seg);
+
+ if (bus == 1 && !ce4100_bus1_write(devfn, reg, len, value))
+ return 0;
/* Discard writes to A/V bridge BAR. */
if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
@@ -318,8 +327,8 @@ static int ce4100_conf_write(unsigned int seg, unsigned int bus,
}
static const struct pci_raw_ops ce4100_pci_conf = {
- .read = ce4100_conf_read,
- .write = ce4100_conf_write,
+ .read = ce4100_conf_read,
+ .write = ce4100_conf_write,
};
int __init ce4100_pci_init(void)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 190e718694b1..cfd1a89fd04e 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -75,8 +75,8 @@ struct pci_ops pci_root_ops = {
};
/*
- * This interrupt-safe spinlock protects all accesses to PCI
- * configuration space.
+ * This interrupt-safe spinlock protects all accesses to PCI configuration
+ * space, except for the mmconfig (ECAM) based operations.
*/
DEFINE_RAW_SPINLOCK(pci_config_lock);
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index ea6f3802c17b..1cb01abcb1be 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -24,12 +24,10 @@ static void pcibios_fixup_peer_bridges(void)
int __init pci_legacy_init(void)
{
- if (!raw_pci_ops) {
- printk("PCI: System does not support PCI\n");
- return 0;
- }
+ if (!raw_pci_ops)
+ return 1;
- printk("PCI: Probing PCI hardware\n");
+ pr_info("PCI: Probing PCI hardware\n");
pcibios_scan_root(0);
return 0;
}
@@ -46,7 +44,7 @@ void pcibios_scan_specific_bus(int busn)
if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
l != 0x0000 && l != 0xffff) {
DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
- printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn);
+ pr_info("PCI: Discovered peer bus %02x\n", busn);
pcibios_scan_root(busn);
return;
}
@@ -60,8 +58,12 @@ static int __init pci_subsys_init(void)
* The init function returns an non zero value when
* pci_legacy_init should be invoked.
*/
- if (x86_init.pci.init())
- pci_legacy_init();
+ if (x86_init.pci.init()) {
+ if (pci_legacy_init()) {
+ pr_info("PCI: System does not support PCI\n");
+ return -ENODEV;
+ }
+ }
pcibios_fixup_peer_bridges();
x86_init.pci.init_irq();
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index f1d83b34c329..2f56e1ed61c3 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,4 +1,5 @@
OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 43b96f5f78ba..f084d8718ac4 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -1014,7 +1014,6 @@ static void __init __efi_enter_virtual_mode(void)
* necessary relocation fixups for the new virtual addresses.
*/
efi_runtime_update_mappings();
- efi_dump_pagetable();
/* clean DUMMY object */
efi_delete_dummy_variable();
@@ -1029,6 +1028,8 @@ void __init efi_enter_virtual_mode(void)
kexec_enter_virtual_mode();
else
__efi_enter_virtual_mode();
+
+ efi_dump_pagetable();
}
/*
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 3481268da3d0..52f7faa1538f 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -44,7 +44,14 @@ int __init efi_alloc_page_tables(void)
}
void efi_sync_low_kernel_mappings(void) {}
-void __init efi_dump_pagetable(void) {}
+
+void __init efi_dump_pagetable(void)
+{
+#ifdef CONFIG_EFI_PGT_DUMP
+ ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+#endif
+}
+
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
return 0;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index eb8dff15a7f6..9bf72f5bfedb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -80,7 +80,7 @@ pgd_t * __init efi_call_phys_prolog(void)
int n_pgds, i, j;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
- save_pgd = (pgd_t *)read_cr3();
+ save_pgd = (pgd_t *)__read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
goto out;
}
@@ -589,7 +589,10 @@ void __init efi_runtime_update_mappings(void)
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
- ptdump_walk_pgd_level(NULL, efi_pgd);
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ ptdump_walk_pgd_level(NULL, swapper_pg_dir);
+ else
+ ptdump_walk_pgd_level(NULL, efi_pgd);
#endif
}
@@ -646,7 +649,7 @@ efi_status_t efi_thunk_set_virtual_address_map(
efi_sync_low_kernel_mappings();
local_irq_save(flags);
- efi_scratch.prev_cr3 = read_cr3();
+ efi_scratch.prev_cr3 = __read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
__flush_tlb_all();
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index e0cf95a83f3f..8a99a2e96537 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -15,12 +15,66 @@
#include <asm/e820/api.h>
#include <asm/efi.h>
#include <asm/uv/uv.h>
+#include <asm/cpu_device_id.h>
#define EFI_MIN_RESERVE 5120
#define EFI_DUMMY_GUID \
EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */
+#define QUARK_SECURITY_HEADER_SIZE 0x400
+
+/*
+ * Header prepended to the standard EFI capsule on Quark systems the are based
+ * on Intel firmware BSP.
+ * @csh_signature: Unique identifier to sanity check signed module
+ * presence ("_CSH").
+ * @version: Current version of CSH used. Should be one for Quark A0.
+ * @modulesize: Size of the entire module including the module header
+ * and payload.
+ * @security_version_number_index: Index of SVN to use for validation of signed
+ * module.
+ * @security_version_number: Used to prevent against roll back of modules.
+ * @rsvd_module_id: Currently unused for Clanton (Quark).
+ * @rsvd_module_vendor: Vendor Identifier. For Intel products value is
+ * 0x00008086.
+ * @rsvd_date: BCD representation of build date as yyyymmdd, where
+ * yyyy=4 digit year, mm=1-12, dd=1-31.
+ * @headersize: Total length of the header including including any
+ * padding optionally added by the signing tool.
+ * @hash_algo: What Hash is used in the module signing.
+ * @cryp_algo: What Crypto is used in the module signing.
+ * @keysize: Total length of the key data including including any
+ * padding optionally added by the signing tool.
+ * @signaturesize: Total length of the signature including including any
+ * padding optionally added by the signing tool.
+ * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the
+ * chain, if there is a next header.
+ * @rsvd: Reserved, padding structure to required size.
+ *
+ * See also QuartSecurityHeader_t in
+ * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h
+ * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP
+ */
+struct quark_security_header {
+ u32 csh_signature;
+ u32 version;
+ u32 modulesize;
+ u32 security_version_number_index;
+ u32 security_version_number;
+ u32 rsvd_module_id;
+ u32 rsvd_module_vendor;
+ u32 rsvd_date;
+ u32 headersize;
+ u32 hash_algo;
+ u32 cryp_algo;
+ u32 keysize;
+ u32 signaturesize;
+ u32 rsvd_next_header;
+ u32 rsvd[2];
+};
+
static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
static bool efi_no_storage_paranoia;
@@ -504,3 +558,86 @@ bool efi_poweroff_required(void)
{
return acpi_gbl_reduced_hardware || acpi_no_s5;
}
+
+#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH
+
+static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
+ size_t hdr_bytes)
+{
+ struct quark_security_header *csh = *pkbuff;
+
+ /* Only process data block that is larger than the security header */
+ if (hdr_bytes < sizeof(struct quark_security_header))
+ return 0;
+
+ if (csh->csh_signature != QUARK_CSH_SIGNATURE ||
+ csh->headersize != QUARK_SECURITY_HEADER_SIZE)
+ return 1;
+
+ /* Only process data block if EFI header is included */
+ if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE +
+ sizeof(efi_capsule_header_t))
+ return 0;
+
+ pr_debug("Quark security header detected\n");
+
+ if (csh->rsvd_next_header != 0) {
+ pr_err("multiple Quark security headers not supported\n");
+ return -EINVAL;
+ }
+
+ *pkbuff += csh->headersize;
+ cap_info->total_size = csh->headersize;
+
+ /*
+ * Update the first page pointer to skip over the CSH header.
+ */
+ cap_info->pages[0] += csh->headersize;
+
+ return 1;
+}
+
+#define ICPU(family, model, quirk_handler) \
+ { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \
+ (unsigned long)&quirk_handler }
+
+static const struct x86_cpu_id efi_capsule_quirk_ids[] = {
+ ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */
+ { }
+};
+
+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
+ size_t hdr_bytes)
+{
+ int (*quirk_handler)(struct capsule_info *, void **, size_t);
+ const struct x86_cpu_id *id;
+ int ret;
+
+ if (hdr_bytes < sizeof(efi_capsule_header_t))
+ return 0;
+
+ cap_info->total_size = 0;
+
+ id = x86_match_cpu(efi_capsule_quirk_ids);
+ if (id) {
+ /*
+ * The quirk handler is supposed to return
+ * - a value > 0 if the setup should continue, after advancing
+ * kbuff as needed
+ * - 0 if not enough hdr_bytes are available yet
+ * - a negative error code otherwise
+ */
+ quirk_handler = (typeof(quirk_handler))id->driver_data;
+ ret = quirk_handler(cap_info, &kbuff, hdr_bytes);
+ if (ret <= 0)
+ return ret;
+ }
+
+ memcpy(&cap_info->header, kbuff, sizeof(cap_info->header));
+
+ cap_info->total_size += cap_info->header.imagesize;
+
+ return __efi_capsule_setup_info(cap_info);
+}
+
+#endif
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index c5350fd27d70..0668aaff8bfe 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -77,7 +77,7 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
- void *pgd_addr = __va(read_cr3());
+ void *pgd_addr = __va(read_cr3_pa());
/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 42e65fee5673..2983faab5b18 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -456,12 +456,13 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
*/
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
- struct cyc2ns_data *data = cyc2ns_read_begin();
+ struct cyc2ns_data data;
unsigned long long ns;
- ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
+ cyc2ns_read_begin(&data);
+ ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
+ cyc2ns_read_end();
- cyc2ns_read_end(data);
return ns;
}
@@ -470,12 +471,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
*/
static inline unsigned long long ns_2_cycles(unsigned long long ns)
{
- struct cyc2ns_data *data = cyc2ns_read_begin();
+ struct cyc2ns_data data;
unsigned long long cyc;
- cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
+ cyc2ns_read_begin(&data);
+ cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul;
+ cyc2ns_read_end();
- cyc2ns_read_end(data);
return cyc;
}
@@ -1121,11 +1123,9 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
* done. The returned pointer is valid till preemption is re-enabled.
*/
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- unsigned int cpu)
+ const struct flush_tlb_info *info)
{
+ unsigned int cpu = smp_processor_id();
int locals = 0, remotes = 0, hubs = 0;
struct bau_desc *bau_desc;
struct cpumask *flush_mask;
@@ -1179,8 +1179,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
record_send_statistics(stat, locals, hubs, remotes, bau_desc);
- if (!end || (end - start) <= PAGE_SIZE)
- address = start;
+ if (!info->end || (info->end - info->start) <= PAGE_SIZE)
+ address = info->start;
else
address = TLB_FLUSH_ALL;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 776c6592136c..03fc397335b7 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -160,13 +160,21 @@ static struct irq_domain *uv_get_irq_domain(void)
{
static struct irq_domain *uv_domain;
static DEFINE_MUTEX(uv_lock);
+ struct fwnode_handle *fn;
mutex_lock(&uv_lock);
- if (uv_domain == NULL) {
- uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL);
- if (uv_domain)
- uv_domain->parent = x86_vector_domain;
- }
+ if (uv_domain)
+ goto out;
+
+ fn = irq_domain_alloc_named_fwnode("UV-CORE");
+ if (!fn)
+ goto out;
+
+ uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL);
+ irq_domain_free_fwnode(fn);
+ if (uv_domain)
+ uv_domain->parent = x86_vector_domain;
+out:
mutex_unlock(&uv_lock);
return uv_domain;
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a6a198c33623..05041871ac90 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
# __restore_processor_state() restores %gs after S3 resume and so should not
# itself be stack-protected
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 6b05a9219ea2..78459a6d455a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -129,7 +129,7 @@ static void __save_processor_state(struct saved_context *ctxt)
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
- ctxt->cr3 = read_cr3();
+ ctxt->cr3 = __read_cr3();
ctxt->cr4 = __read_cr4();
#ifdef CONFIG_X86_64
ctxt->cr8 = read_cr8();
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a6e21fee22ea..e3e62c8a8e70 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -150,7 +150,8 @@ static int relocate_restore_code(void)
memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
/* Make the page containing the relocated code executable */
- pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+ pgd = (pgd_t *)__va(read_cr3_pa()) +
+ pgd_index(relocated_restore_code);
p4d = p4d_offset(pgd, relocated_restore_code);
if (p4d_large(*p4d)) {
set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index 2a2d89d39af6..bb026699ad19 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,13 +1,3 @@
-config MCE_AMD_INJ
- tristate "Simple MCE injection interface for AMD processors"
- depends on RAS && X86_MCE && DEBUG_FS && AMD_NB
- default n
- help
- This is a simple debugfs interface to inject MCEs and test different
- aspects of the MCE handling code.
-
- WARNING: Do not even assume this interface is staying stable!
-
config RAS_CEC
bool "Correctable Errors Collector"
depends on X86_MCE && MEMORY_FAILURE && DEBUG_FS
@@ -20,4 +10,3 @@ config RAS_CEC
Bear in mind that this is absolutely useless if your platform doesn't
have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
-
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
deleted file mode 100644
index 5f94546db280..000000000000
--- a/arch/x86/ras/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o
-
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
deleted file mode 100644
index 8730c2882fff..000000000000
--- a/arch/x86/ras/mce_amd_inj.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * A simple MCE injection facility for testing different aspects of the RAS
- * code. This driver should be built as module so that it can be loaded
- * on production kernels for testing purposes.
- *
- * This file may be distributed under the terms of the GNU General Public
- * License version 2.
- *
- * Copyright (c) 2010-15: Borislav Petkov <bp@alien8.de>
- * Advanced Micro Devices Inc.
- */
-
-#include <linux/kobject.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
-#include <linux/pci.h>
-
-#include <asm/mce.h>
-#include <asm/smp.h>
-#include <asm/amd_nb.h>
-#include <asm/irq_vectors.h>
-
-#include "../kernel/cpu/mcheck/mce-internal.h"
-
-/*
- * Collect all the MCi_XXX settings
- */
-static struct mce i_mce;
-static struct dentry *dfs_inj;
-
-static u8 n_banks;
-
-#define MAX_FLAG_OPT_SIZE 3
-#define NBCFG 0x44
-
-enum injection_type {
- SW_INJ = 0, /* SW injection, simply decode the error */
- HW_INJ, /* Trigger a #MC */
- DFR_INT_INJ, /* Trigger Deferred error interrupt */
- THR_INT_INJ, /* Trigger threshold interrupt */
- N_INJ_TYPES,
-};
-
-static const char * const flags_options[] = {
- [SW_INJ] = "sw",
- [HW_INJ] = "hw",
- [DFR_INT_INJ] = "df",
- [THR_INT_INJ] = "th",
- NULL
-};
-
-/* Set default injection to SW_INJ */
-static enum injection_type inj_type = SW_INJ;
-
-#define MCE_INJECT_SET(reg) \
-static int inj_##reg##_set(void *data, u64 val) \
-{ \
- struct mce *m = (struct mce *)data; \
- \
- m->reg = val; \
- return 0; \
-}
-
-MCE_INJECT_SET(status);
-MCE_INJECT_SET(misc);
-MCE_INJECT_SET(addr);
-MCE_INJECT_SET(synd);
-
-#define MCE_INJECT_GET(reg) \
-static int inj_##reg##_get(void *data, u64 *val) \
-{ \
- struct mce *m = (struct mce *)data; \
- \
- *val = m->reg; \
- return 0; \
-}
-
-MCE_INJECT_GET(status);
-MCE_INJECT_GET(misc);
-MCE_INJECT_GET(addr);
-MCE_INJECT_GET(synd);
-
-DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
-
-/*
- * Caller needs to be make sure this cpu doesn't disappear
- * from under us, i.e.: get_cpu/put_cpu.
- */
-static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
-{
- u32 l, h;
- int err;
-
- err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
- if (err) {
- pr_err("%s: error reading HWCR\n", __func__);
- return err;
- }
-
- enable ? (l |= BIT(18)) : (l &= ~BIT(18));
-
- err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
- if (err)
- pr_err("%s: error writing HWCR\n", __func__);
-
- return err;
-}
-
-static int __set_inj(const char *buf)
-{
- int i;
-
- for (i = 0; i < N_INJ_TYPES; i++) {
- if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
- inj_type = i;
- return 0;
- }
- }
- return -EINVAL;
-}
-
-static ssize_t flags_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[MAX_FLAG_OPT_SIZE];
- int n;
-
- n = sprintf(buf, "%s\n", flags_options[inj_type]);
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
-}
-
-static ssize_t flags_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[MAX_FLAG_OPT_SIZE], *__buf;
- int err;
-
- if (cnt > MAX_FLAG_OPT_SIZE)
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt - 1] = 0;
-
- /* strip whitespace */
- __buf = strstrip(buf);
-
- err = __set_inj(__buf);
- if (err) {
- pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
- return err;
- }
-
- *ppos += cnt;
-
- return cnt;
-}
-
-static const struct file_operations flags_fops = {
- .read = flags_read,
- .write = flags_write,
- .llseek = generic_file_llseek,
-};
-
-/*
- * On which CPU to inject?
- */
-MCE_INJECT_GET(extcpu);
-
-static int inj_extcpu_set(void *data, u64 val)
-{
- struct mce *m = (struct mce *)data;
-
- if (val >= nr_cpu_ids || !cpu_online(val)) {
- pr_err("%s: Invalid CPU: %llu\n", __func__, val);
- return -EINVAL;
- }
- m->extcpu = val;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
-
-static void trigger_mce(void *info)
-{
- asm volatile("int $18");
-}
-
-static void trigger_dfr_int(void *info)
-{
- asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
-}
-
-static void trigger_thr_int(void *info)
-{
- asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
-}
-
-static u32 get_nbc_for_node(int node_id)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
- u32 cores_per_node;
-
- cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
-
- return cores_per_node * node_id;
-}
-
-static void toggle_nb_mca_mst_cpu(u16 nid)
-{
- struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
- u32 val;
- int err;
-
- if (!F3)
- return;
-
- err = pci_read_config_dword(F3, NBCFG, &val);
- if (err) {
- pr_err("%s: Error reading F%dx%03x.\n",
- __func__, PCI_FUNC(F3->devfn), NBCFG);
- return;
- }
-
- if (val & BIT(27))
- return;
-
- pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
- __func__);
-
- val |= BIT(27);
- err = pci_write_config_dword(F3, NBCFG, val);
- if (err)
- pr_err("%s: Error writing F%dx%03x.\n",
- __func__, PCI_FUNC(F3->devfn), NBCFG);
-}
-
-static void prepare_msrs(void *info)
-{
- struct mce m = *(struct mce *)info;
- u8 b = m.bank;
-
- wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
-
- if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (m.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
- } else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
- }
-
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
- wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
- } else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
- }
-}
-
-static void do_inject(void)
-{
- u64 mcg_status = 0;
- unsigned int cpu = i_mce.extcpu;
- u8 b = i_mce.bank;
-
- rdtscll(i_mce.tsc);
-
- if (i_mce.misc)
- i_mce.status |= MCI_STATUS_MISCV;
-
- if (i_mce.synd)
- i_mce.status |= MCI_STATUS_SYNDV;
-
- if (inj_type == SW_INJ) {
- mce_inject_log(&i_mce);
- return;
- }
-
- /* prep MCE global settings for the injection */
- mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
-
- if (!(i_mce.status & MCI_STATUS_PCC))
- mcg_status |= MCG_STATUS_RIPV;
-
- /*
- * Ensure necessary status bits for deferred errors:
- * - MCx_STATUS[Deferred]: make sure it is a deferred error
- * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
- */
- if (inj_type == DFR_INT_INJ) {
- i_mce.status |= MCI_STATUS_DEFERRED;
- i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
- }
-
- /*
- * For multi node CPUs, logging and reporting of bank 4 errors happens
- * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
- * Fam10h and later BKDGs.
- */
- if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
- b == 4 &&
- boot_cpu_data.x86 < 0x17) {
- toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
- cpu = get_nbc_for_node(amd_get_nb_id(cpu));
- }
-
- get_online_cpus();
- if (!cpu_online(cpu))
- goto err;
-
- toggle_hw_mce_inject(cpu, true);
-
- i_mce.mcgstatus = mcg_status;
- i_mce.inject_flags = inj_type;
- smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
-
- toggle_hw_mce_inject(cpu, false);
-
- switch (inj_type) {
- case DFR_INT_INJ:
- smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
- break;
- case THR_INT_INJ:
- smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
- break;
- default:
- smp_call_function_single(cpu, trigger_mce, NULL, 0);
- }
-
-err:
- put_online_cpus();
-
-}
-
-/*
- * This denotes into which bank we're injecting and triggers
- * the injection, at the same time.
- */
-static int inj_bank_set(void *data, u64 val)
-{
- struct mce *m = (struct mce *)data;
-
- if (val >= n_banks) {
- pr_err("Non-existent MCE bank: %llu\n", val);
- return -EINVAL;
- }
-
- m->bank = val;
- do_inject();
-
- return 0;
-}
-
-MCE_INJECT_GET(bank);
-
-DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
-
-static const char readme_msg[] =
-"Description of the files and their usages:\n"
-"\n"
-"Note1: i refers to the bank number below.\n"
-"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
-"as they mirror the hardware registers.\n"
-"\n"
-"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
-"\t attributes of the error which caused the MCE.\n"
-"\n"
-"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
-"\t used for error thresholding purposes and its validity is indicated by\n"
-"\t MCi_STATUS[MiscV].\n"
-"\n"
-"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
-"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
-"\n"
-"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
-"\t associated with the error.\n"
-"\n"
-"cpu:\t The CPU to inject the error on.\n"
-"\n"
-"bank:\t Specify the bank you want to inject the error into: the number of\n"
-"\t banks in a processor varies and is family/model-specific, therefore, the\n"
-"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
-"\t injection.\n"
-"\n"
-"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
-"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
-"\t for AMD processors.\n"
-"\n"
-"\t Allowed error injection types:\n"
-"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
-"\t format only. Safe to use.\n"
-"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
-"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
-"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
-"\t before injecting.\n"
-"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
-"\t error APIC interrupt handler to handle the error if the feature is \n"
-"\t is present in hardware. \n"
-"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
-"\t APIC interrupt handler to handle the error. \n"
-"\n";
-
-static ssize_t
-inj_readme_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- return simple_read_from_buffer(ubuf, cnt, ppos,
- readme_msg, strlen(readme_msg));
-}
-
-static const struct file_operations readme_fops = {
- .read = inj_readme_read,
-};
-
-static struct dfs_node {
- char *name;
- struct dentry *d;
- const struct file_operations *fops;
- umode_t perm;
-} dfs_fls[] = {
- { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
- { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
-};
-
-static int __init init_mce_inject(void)
-{
- unsigned int i;
- u64 cap;
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- n_banks = cap & MCG_BANKCNT_MASK;
-
- dfs_inj = debugfs_create_dir("mce-inject", NULL);
- if (!dfs_inj)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
- dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
- dfs_fls[i].perm,
- dfs_inj,
- &i_mce,
- dfs_fls[i].fops);
-
- if (!dfs_fls[i].d)
- goto err_dfs_add;
- }
-
- return 0;
-
-err_dfs_add:
- while (i-- > 0)
- debugfs_remove(dfs_fls[i].d);
-
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
-
- return -ENODEV;
-}
-
-static void __exit exit_mce_inject(void)
-{
-
- debugfs_remove_recursive(dfs_inj);
- dfs_inj = NULL;
-
- memset(&dfs_fls, 0, sizeof(dfs_fls));
-}
-module_init(init_mce_inject);
-module_exit(exit_mce_inject);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
-MODULE_AUTHOR("AMD Inc.");
-MODULE_DESCRIPTION("MCE injection facility for RAS testing");
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index a163a90af4aa..cd4be19c36dc 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -102,7 +102,7 @@ static void __init setup_real_mode(void)
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
- trampoline_pgd[511] = init_level4_pgt[511].pgd;
+ trampoline_pgd[511] = init_top_pgt[511].pgd;
#endif
}
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index fffb0a16f9e3..bced7a369a11 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index bcea81f36fc5..b5e48da7fbff 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -178,7 +178,7 @@ static struct apic xen_pv_apic = {
.get_apic_id = xen_get_apic_id,
.set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
#ifdef CONFIG_SMP
.send_IPI_mask = xen_send_IPI_mask,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 30bb2e80cfe7..a18703be9ead 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = {
.tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */
};
-static const struct efi efi_xen __initconst = {
- .systab = NULL, /* Initialized later. */
- .runtime_version = 0, /* Initialized later. */
- .mps = EFI_INVALID_TABLE_ADDR,
- .acpi = EFI_INVALID_TABLE_ADDR,
- .acpi20 = EFI_INVALID_TABLE_ADDR,
- .smbios = EFI_INVALID_TABLE_ADDR,
- .smbios3 = EFI_INVALID_TABLE_ADDR,
- .sal_systab = EFI_INVALID_TABLE_ADDR,
- .boot_info = EFI_INVALID_TABLE_ADDR,
- .hcdp = EFI_INVALID_TABLE_ADDR,
- .uga = EFI_INVALID_TABLE_ADDR,
- .uv_systab = EFI_INVALID_TABLE_ADDR,
- .fw_vendor = EFI_INVALID_TABLE_ADDR,
- .runtime = EFI_INVALID_TABLE_ADDR,
- .config_table = EFI_INVALID_TABLE_ADDR,
- .get_time = xen_efi_get_time,
- .set_time = xen_efi_set_time,
- .get_wakeup_time = xen_efi_get_wakeup_time,
- .set_wakeup_time = xen_efi_set_wakeup_time,
- .get_variable = xen_efi_get_variable,
- .get_next_variable = xen_efi_get_next_variable,
- .set_variable = xen_efi_set_variable,
- .query_variable_info = xen_efi_query_variable_info,
- .update_capsule = xen_efi_update_capsule,
- .query_capsule_caps = xen_efi_query_capsule_caps,
- .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
- .reset_system = xen_efi_reset_system,
- .set_virtual_address_map = NULL, /* Not used under Xen. */
- .flags = 0 /* Initialized later. */
-};
-
static efi_system_table_t __init *xen_efi_probe(void)
{
struct xen_platform_op op = {
@@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
/* Here we know that Xen runs on EFI platform. */
- efi = efi_xen;
+ efi.get_time = xen_efi_get_time;
+ efi.set_time = xen_efi_set_time;
+ efi.get_wakeup_time = xen_efi_get_wakeup_time;
+ efi.set_wakeup_time = xen_efi_set_wakeup_time;
+ efi.get_variable = xen_efi_get_variable;
+ efi.get_next_variable = xen_efi_get_next_variable;
+ efi.set_variable = xen_efi_set_variable;
+ efi.query_variable_info = xen_efi_query_variable_info;
+ efi.update_capsule = xen_efi_update_capsule;
+ efi.query_capsule_caps = xen_efi_query_capsule_caps;
+ efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+ efi.reset_system = xen_efi_reset_system;
efi_systab_xen.tables = info->cfg.addr;
efi_systab_xen.nr_tables = info->cfg.nent;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1f386d7fdf70..1d7a7213a310 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -975,37 +975,32 @@ static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
-
-#ifdef CONFIG_SMP
-/* Another cpu may still have their %cr3 pointing at the pagetable, so
- we need to repoint it somewhere else before we can unpin it. */
-static void drop_other_mm_ref(void *info)
+static void drop_mm_ref_this_cpu(void *info)
{
struct mm_struct *mm = info;
- struct mm_struct *active_mm;
-
- active_mm = this_cpu_read(cpu_tlbstate.active_mm);
- if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
leave_mm(smp_processor_id());
- /* If this cpu still has a stale cr3 reference, then make sure
- it has been flushed. */
+ /*
+ * If this cpu still has a stale cr3 reference, then make sure
+ * it has been flushed.
+ */
if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
- load_cr3(swapper_pg_dir);
+ xen_mc_flush();
}
+#ifdef CONFIG_SMP
+/*
+ * Another cpu may still have their %cr3 pointing at the pagetable, so
+ * we need to repoint it somewhere else before we can unpin it.
+ */
static void xen_drop_mm_ref(struct mm_struct *mm)
{
cpumask_var_t mask;
unsigned cpu;
- if (current->active_mm == mm) {
- if (current->mm == mm)
- load_cr3(swapper_pg_dir);
- else
- leave_mm(smp_processor_id());
- }
+ drop_mm_ref_this_cpu(mm);
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
@@ -1013,31 +1008,31 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
- smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+ smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
}
return;
}
cpumask_copy(mask, mm_cpumask(mm));
- /* It's possible that a vcpu may have a stale reference to our
- cr3, because its in lazy mode, and it hasn't yet flushed
- its set of pending hypercalls yet. In this case, we can
- look at its actual current cr3 value, and force it to flush
- if needed. */
+ /*
+ * It's possible that a vcpu may have a stale reference to our
+ * cr3, because its in lazy mode, and it hasn't yet flushed
+ * its set of pending hypercalls yet. In this case, we can
+ * look at its actual current cr3 value, and force it to flush
+ * if needed.
+ */
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
cpumask_set_cpu(cpu, mask);
}
- if (!cpumask_empty(mask))
- smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+ smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
free_cpumask_var(mask);
}
#else
static void xen_drop_mm_ref(struct mm_struct *mm)
{
- if (current->active_mm == mm)
- load_cr3(swapper_pg_dir);
+ drop_mm_ref_this_cpu(mm);
}
#endif
@@ -1366,8 +1361,7 @@ static void xen_flush_tlb_single(unsigned long addr)
}
static void xen_flush_tlb_others(const struct cpumask *cpus,
- struct mm_struct *mm, unsigned long start,
- unsigned long end)
+ const struct flush_tlb_info *info)
{
struct {
struct mmuext_op op;
@@ -1379,7 +1373,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
+ trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus))
return; /* nothing to do */
@@ -1393,9 +1387,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+ if (info->end != TLB_FLUSH_ALL &&
+ (info->end - info->start) <= PAGE_SIZE) {
args->op.cmd = MMUEXT_INVLPG_MULTI;
- args->op.arg1.linear_addr = start;
+ args->op.arg1.linear_addr = info->start;
}
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
@@ -1470,8 +1465,8 @@ static void xen_write_cr3(unsigned long cr3)
* At the start of the day - when Xen launches a guest, it has already
* built pagetables for the guest. We diligently look over them
* in xen_setup_kernel_pagetable and graft as appropriate them in the
- * init_level4_pgt and its friends. Then when we are happy we load
- * the new init_level4_pgt - and continue on.
+ * init_top_pgt and its friends. Then when we are happy we load
+ * the new init_top_pgt - and continue on.
*
* The generic code starts (start_kernel) and 'init_mem_mapping' sets
* up the rest of the pagetables. When it has completed it loads the cr3.
@@ -1914,12 +1909,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
pt_end = pt_base + xen_start_info->nr_pt_frames;
/* Zap identity mapping */
- init_level4_pgt[0] = __pgd(0);
+ init_top_pgt[0] = __pgd(0);
/* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[272] -> level3_ident_pgt */
/* L4[511] -> level3_kernel_pgt */
- convert_pfn_mfn(init_level4_pgt);
+ convert_pfn_mfn(init_top_pgt);
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt);
@@ -1950,10 +1945,10 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Copy the initial P->M table mappings if necessary. */
i = pgd_index(xen_start_info->mfn_list);
if (i && i < pgd_index(__START_KERNEL_map))
- init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+ init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
/* Make pagetable pieces RO */
- set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+ set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
@@ -1964,7 +1959,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa_symbol(init_level4_pgt)));
+ PFN_DOWN(__pa_symbol(init_top_pgt)));
/* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1974,7 +1969,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
* attach it to, so make sure we just set kernel pgd.
*/
xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
+ __xen_write_cr3(true, __pa(init_top_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU);
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
@@ -2022,7 +2017,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
pmd_t pmd;
pte_t pte;
- pa = read_cr3();
+ pa = read_cr3_pa();
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
sizeof(pgd)));
if (!pgd_present(pgd))
@@ -2102,7 +2097,7 @@ void __init xen_relocate_p2m(void)
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
- pgd = __va(read_cr3());
+ pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
idx_p4d = 0;
save_pud = n_pud;
@@ -2209,7 +2204,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
- BUG_ON(read_cr3() != __pa(initial_page_table));
+ BUG_ON(read_cr3_pa() != __pa(initial_page_table));
BUG_ON(cr3 != __pa(swapper_pg_dir));
/*
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index 5e246716d58f..e1a5fbeae08d 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -87,7 +87,7 @@ ENTRY(pvh_start_xen)
wrmsr
/* Enable pre-constructed page tables. */
- mov $_pa(init_level4_pgt), %eax
+ mov $_pa(init_top_pgt), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index cc23e9ecc6bb..30f6290109d4 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += preempt.h
generic-y += resource.h
generic-y += rwsem.h
generic-y += sections.h
-generic-y += siginfo.h
generic-y += statfs.h
generic-y += termios.h
generic-y += topology.h
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 003eeee3fbc6..30ee8c608853 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
#define release_segments(mm) do { } while(0)
#define forget_segments() do { } while (0)
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index b15bf6bc0e94..4cb0d2f8868c 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,2 +1,3 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += siginfo.h
diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index 518954e74e6d..98b004e24e85 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -105,6 +105,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define TIOCSERCONFIG _IO('T', 83)
#define TIOCSERGWILD _IOR('T', 84, int)
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 668c1056f9e4..fd524a54d2ab 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -187,7 +187,7 @@ void __init time_init(void)
local_timer_setup(0);
setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction);
sched_clock_register(ccount_sched_clock_read, 32, ccount_freq);
- clocksource_probe();
+ timer_probe();
}
/*