summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig87
-rw-r--r--arch/powerpc/Kconfig.debug4
-rw-r--r--arch/powerpc/Makefile11
-rw-r--r--arch/powerpc/boot/Makefile2
-rw-r--r--arch/powerpc/boot/dts/Makefile1
-rw-r--r--arch/powerpc/boot/dts/akebono.dts2
-rw-r--r--arch/powerpc/boot/dts/bluestone.dts2
-rw-r--r--arch/powerpc/boot/dts/currituck.dts2
-rw-r--r--arch/powerpc/boot/dts/iss4xx-mpic.dts2
-rw-r--r--arch/powerpc/boot/dts/wii.dts22
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h14
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h32
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h34
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h30
-rw-r--r--arch/powerpc/include/asm/checksum.h4
-rw-r--r--arch/powerpc/include/asm/device.h10
-rw-r--r--arch/powerpc/include/asm/dma-direct.h18
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h92
-rw-r--r--arch/powerpc/include/asm/eeh.h10
-rw-r--r--arch/powerpc/include/asm/eeh_event.h1
-rw-r--r--arch/powerpc/include/asm/exception-64s.h4
-rw-r--r--arch/powerpc/include/asm/hvsi.h2
-rw-r--r--arch/powerpc/include/asm/io.h2
-rw-r--r--arch/powerpc/include/asm/iommu.h17
-rw-r--r--arch/powerpc/include/asm/ipic.h3
-rw-r--r--arch/powerpc/include/asm/irq.h18
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h3
-rw-r--r--arch/powerpc/include/asm/livepatch.h7
-rw-r--r--arch/powerpc/include/asm/machdep.h4
-rw-r--r--arch/powerpc/include/asm/mce.h2
-rw-r--r--arch/powerpc/include/asm/mmu.h13
-rw-r--r--arch/powerpc/include/asm/nmi.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h3
-rw-r--r--arch/powerpc/include/asm/nvram.h9
-rw-r--r--arch/powerpc/include/asm/page.h14
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h10
-rw-r--r--arch/powerpc/include/asm/pci.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/powernv.h3
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h17
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h4
-rw-r--r--arch/powerpc/include/asm/processor.h108
-rw-r--r--arch/powerpc/include/asm/ptrace.h2
-rw-r--r--arch/powerpc/include/asm/reg.h9
-rw-r--r--arch/powerpc/include/asm/sections.h7
-rw-r--r--arch/powerpc/include/asm/smp.h17
-rw-r--r--arch/powerpc/include/asm/swiotlb.h5
-rw-r--r--arch/powerpc/include/asm/task_size_32.h21
-rw-r--r--arch/powerpc/include/asm/task_size_64.h79
-rw-r--r--arch/powerpc/include/asm/thread_info.h19
-rw-r--r--arch/powerpc/include/asm/topology.h2
-rw-r--r--arch/powerpc/include/asm/uaccess.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h8
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h4
-rw-r--r--arch/powerpc/kernel/Makefile15
-rw-r--r--arch/powerpc/kernel/asm-offsets.c15
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S4
-rw-r--r--arch/powerpc/kernel/dma-iommu.c75
-rw-r--r--arch/powerpc/kernel/dma-mask.c12
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c89
-rw-r--r--arch/powerpc/kernel/dma.c362
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c17
-rw-r--r--arch/powerpc/kernel/eeh.c190
-rw-r--r--arch/powerpc/kernel/eeh_cache.c36
-rw-r--r--arch/powerpc/kernel/eeh_driver.c86
-rw-r--r--arch/powerpc/kernel/eeh_event.c16
-rw-r--r--arch/powerpc/kernel/eeh_pe.c68
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c3
-rw-r--r--arch/powerpc/kernel/entry_32.S97
-rw-r--r--arch/powerpc/kernel/entry_64.S53
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S5
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S94
-rw-r--r--arch/powerpc/kernel/head_32.S160
-rw-r--r--arch/powerpc/kernel/head_40x.S9
-rw-r--r--arch/powerpc/kernel/head_44x.S8
-rw-r--r--arch/powerpc/kernel/head_64.S20
-rw-r--r--arch/powerpc/kernel/head_8xx.S124
-rw-r--r--arch/powerpc/kernel/head_booke.h12
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S16
-rw-r--r--arch/powerpc/kernel/idle_6xx.S8
-rw-r--r--arch/powerpc/kernel/idle_book3e.S2
-rw-r--r--arch/powerpc/kernel/idle_e500.S8
-rw-r--r--arch/powerpc/kernel/idle_power4.S2
-rw-r--r--arch/powerpc/kernel/irq.c119
-rw-r--r--arch/powerpc/kernel/kgdb.c28
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c6
-rw-r--r--arch/powerpc/kernel/mce.c11
-rw-r--r--arch/powerpc/kernel/misc_32.S17
-rw-r--r--arch/powerpc/kernel/nvram_64.c158
-rw-r--r--arch/powerpc/kernel/paca.c19
-rw-r--r--arch/powerpc/kernel/pci-common.c24
-rw-r--r--arch/powerpc/kernel/process.c68
-rw-r--r--arch/powerpc/kernel/ptrace.c18
-rw-r--r--arch/powerpc/kernel/setup-common.c9
-rw-r--r--arch/powerpc/kernel/setup_32.c62
-rw-r--r--arch/powerpc/kernel/setup_64.c56
-rw-r--r--arch/powerpc/kernel/smp.c109
-rw-r--r--arch/powerpc/kernel/stacktrace.c102
-rw-r--r--arch/powerpc/kernel/syscalls.c2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl134
-rw-r--r--arch/powerpc/kernel/syscalls/syscalltbl.sh4
-rw-r--r--arch/powerpc/kernel/systbl.S6
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/trace/Makefile3
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S6
-rw-r--r--arch/powerpc/kernel/traps.c133
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/vdso.c2
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile1
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S14
-rw-r--r--arch/powerpc/kvm/Makefile5
-rw-r--r--arch/powerpc/kvm/book3s.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv.c25
-rw-r--r--arch/powerpc/kvm/book3s_hv_hmi.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c58
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S66
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/powerpc/lib/sstep.c114
-rw-r--r--arch/powerpc/lib/test_emulate_step.c535
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S150
-rw-r--r--arch/powerpc/math-emu/Makefile2
-rw-r--r--arch/powerpc/mm/40x_mmu.c2
-rw-r--r--arch/powerpc/mm/44x_mmu.c2
-rw-r--r--arch/powerpc/mm/8xx_mmu.c91
-rw-r--r--arch/powerpc/mm/Makefile15
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c40
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c2
-rw-r--r--arch/powerpc/mm/hash_low_32.S76
-rw-r--r--arch/powerpc/mm/hash_utils_64.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c28
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c22
-rw-r--r--arch/powerpc/mm/init_32.c6
-rw-r--r--arch/powerpc/mm/init_64.c2
-rw-r--r--arch/powerpc/mm/mem.c61
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c145
-rw-r--r--arch/powerpc/mm/mmu_decl.h10
-rw-r--r--arch/powerpc/mm/numa.c25
-rw-r--r--arch/powerpc/mm/pgtable-book3e.c8
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c30
-rw-r--r--arch/powerpc/mm/pgtable-radix.c43
-rw-r--r--arch/powerpc/mm/pgtable_32.c42
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c189
-rw-r--r--arch/powerpc/mm/ptdump/8xx.c (renamed from arch/powerpc/mm/dump_linuxpagetables-8xx.c)2
-rw-r--r--arch/powerpc/mm/ptdump/Makefile9
-rw-r--r--arch/powerpc/mm/ptdump/bats.c (renamed from arch/powerpc/mm/dump_bats.c)0
-rw-r--r--arch/powerpc/mm/ptdump/book3s64.c (renamed from arch/powerpc/mm/dump_linuxpagetables-book3s64.c)2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c (renamed from arch/powerpc/mm/dump_hashpagetable.c)2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c (renamed from arch/powerpc/mm/dump_linuxpagetables.c)20
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.h (renamed from arch/powerpc/mm/dump_linuxpagetables.h)0
-rw-r--r--arch/powerpc/mm/ptdump/segment_regs.c (renamed from arch/powerpc/mm/dump_sr.c)0
-rw-r--r--arch/powerpc/mm/ptdump/shared.c (renamed from arch/powerpc/mm/dump_linuxpagetables-generic.c)2
-rw-r--r--arch/powerpc/mm/slb.c5
-rw-r--r--arch/powerpc/mm/slice.c10
-rw-r--r--arch/powerpc/mm/tlb_nohash.c2
-rw-r--r--arch/powerpc/net/bpf_jit.h4
-rw-r--r--arch/powerpc/net/bpf_jit32.h5
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c121
-rw-r--r--arch/powerpc/perf/hv-24x7.c10
-rw-r--r--arch/powerpc/perf/hv-gpci.c10
-rw-r--r--arch/powerpc/perf/imc-pmu.c19
-rw-r--r--arch/powerpc/perf/power9-events-list.h24
-rw-r--r--arch/powerpc/perf/power9-pmu.c4
-rw-r--r--arch/powerpc/platforms/44x/Kconfig1
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c1
-rw-r--r--arch/powerpc/platforms/44x/warp.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend-asm.S34
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c5
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c4
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c4
-rw-r--r--arch/powerpc/platforms/85xx/p1010rdb.c1
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c2
-rw-r--r--arch/powerpc/platforms/85xx/p1022_rdk.c2
-rw-r--r--arch/powerpc/platforms/85xx/qemu_e500.c1
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype9
-rw-r--r--arch/powerpc/platforms/cell/iommu.c172
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c5
-rw-r--r--arch/powerpc/platforms/chrp/Makefile2
-rw-r--r--arch/powerpc/platforms/chrp/nvram.c14
-rw-r--r--arch/powerpc/platforms/chrp/setup.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c24
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c7
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c51
-rw-r--r--arch/powerpc/platforms/powermac/Makefile2
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c9
-rw-r--r--arch/powerpc/platforms/powermac/setup.c3
-rw-r--r--arch/powerpc/platforms/powermac/time.c2
-rw-r--r--arch/powerpc/platforms/powernv/Makefile5
-rw-r--r--arch/powerpc/platforms/powernv/idle.c27
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c5
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c16
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c283
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S344
-rw-r--r--arch/powerpc/platforms/powernv/opal.c6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c148
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--arch/powerpc/platforms/powernv/smp.c25
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c4
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c4
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c4
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c19
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c10
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c99
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c1
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c18
-rw-r--r--arch/powerpc/platforms/pseries/vio.c95
-rw-r--r--arch/powerpc/sysdev/6xx-suspend.S5
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c65
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c25
-rw-r--r--arch/powerpc/sysdev/ipic.c35
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c2
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/xmon/Makefile1
-rw-r--r--arch/powerpc/xmon/ppc-dis.c2
-rw-r--r--arch/powerpc/xmon/xmon.c2
232 files changed, 3839 insertions, 3433 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2890d36eb531..2d0be82c3061 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,21 +119,19 @@ config GENERIC_HWEIGHT
bool
default y
-config ARCH_HAS_DMA_SET_COHERENT_MASK
- bool
-
config PPC
bool
default y
#
# Please keep this list sorted alphabetically.
#
+ select ARCH_32BIT_OFF_T if PPC32
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
- select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_PTE_SPECIAL
@@ -178,6 +176,7 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
@@ -201,7 +200,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_GZIP
- select HAVE_KERNEL_XZ if PPC_BOOK3S
+ select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
@@ -220,7 +219,7 @@ config PPC
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN
+ select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_IRQ_TIME_ACCOUNTING
@@ -233,7 +232,6 @@ config PPC
select NEED_SG_DMA_LENGTH
select OF
select OF_EARLY_FLATTREE
- select OF_RESERVED_MEM
select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
select PCI_DOMAINS if PCI
@@ -241,6 +239,7 @@ config PPC
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
select VIRT_TO_BUS if !PPC64
#
# Please keep this list sorted alphabetically.
@@ -251,9 +250,6 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
-config GENERIC_CSUM
- def_bool n
-
config EARLY_PRINTK
bool
default y
@@ -274,11 +270,6 @@ config SYSVIPC_COMPAT
depends on COMPAT && SYSVIPC
default y
-# All PPC32s use generic nvram driver through ppc_md
-config GENERIC_NVRAM
- bool
- default y if PPC32
-
config SCHED_OMIT_FRAME_POINTER
bool
default y
@@ -478,9 +469,6 @@ config ARCH_CPU_PROBE_RELEASE
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
-config ARCH_HAS_WALK_MEMORY
- def_bool y
-
config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
@@ -696,7 +684,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
- depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64)
+ depends on 44x || PPC_BOOK3S_64
select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
@@ -714,6 +702,13 @@ config PPC_256K_PAGES
endchoice
+config PPC_PAGE_SHIFT
+ int
+ default 18 if PPC_256K_PAGES
+ default 16 if PPC_64K_PAGES
+ default 14 if PPC_16K_PAGES
+ default 12
+
config THREAD_SHIFT
int "Thread shift" if EXPERT
range 13 15
@@ -724,6 +719,59 @@ config THREAD_SHIFT
Used to define the stack size. The default is almost always what you
want. Only change this if you know what you are doing.
+config ETEXT_SHIFT_BOOL
+ bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \
+ (PPC_BOOK3S_32 || PPC_8xx)
+ depends on ADVANCED_OPTIONS
+ help
+ This option allows you to set the kernel end of text alignment. When
+ RAM is mapped by blocks, the alignment needs to fit the size and
+ number of possible blocks. The default should be OK for most configs.
+
+ Say N here unless you know what you are doing.
+
+config ETEXT_SHIFT
+ int "_etext shift" if ETEXT_SHIFT_BOOL
+ range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 19 if STRICT_KERNEL_RWX && PPC_8xx
+ default PPC_PAGE_SHIFT
+ help
+ On Book3S 32 (603+), IBATs are used to map kernel text.
+ Smaller is the alignment, greater is the number of necessary IBATs.
+
+ On 8xx, large pages (512kb or 8M) are used to map kernel linear
+ memory. Aligning to 8M reduces TLB misses as only 8M pages are used
+ in that case.
+
+config DATA_SHIFT_BOOL
+ bool "Set custom data alignment" if STRICT_KERNEL_RWX && \
+ (PPC_BOOK3S_32 || PPC_8xx)
+ depends on ADVANCED_OPTIONS
+ help
+ This option allows you to set the kernel data alignment. When
+ RAM is mapped by blocks, the alignment needs to fit the size and
+ number of possible blocks. The default should be OK for most configs.
+
+ Say N here unless you know what you are doing.
+
+config DATA_SHIFT
+ int "Data shift" if DATA_SHIFT_BOOL
+ default 24 if STRICT_KERNEL_RWX && PPC64
+ range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ range 19 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 23 if STRICT_KERNEL_RWX && PPC_8xx
+ default PPC_PAGE_SHIFT
+ help
+ On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
+ Smaller is the alignment, greater is the number of necessary DBATs.
+
+ On 8xx, large pages (512kb or 8M) are used to map kernel linear
+ memory. Aligning to 8M reduces TLB misses as only 8M pages are used
+ in that case.
+
config FORCE_MAX_ZONEORDER
int "Maximum zone order"
range 8 9 if PPC64 && PPC_64K_PAGES
@@ -890,6 +938,7 @@ config FSL_SOC
config FSL_PCI
bool
+ select ARCH_HAS_DMA_SET_MASK
select PPC_INDIRECT_PCI
select PCI_QUIRKS
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index f4961fbcb48d..4e00cb0a5464 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -361,10 +361,6 @@ config PPC_PTDUMP
If you are unsure, say N.
-config PPC_HTDUMP
- def_bool y
- depends on PPC_PTDUMP && PPC_BOOK3S_64
-
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
depends on DEBUG_KERNEL && PPC_BOOK3S_64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 488c9edffa58..7de49889bd5d 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -213,9 +213,9 @@ endif
asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr)
-KBUILD_AFLAGS += -Iarch/$(ARCH) $(AFLAGS-y)
+KBUILD_AFLAGS += $(AFLAGS-y)
KBUILD_CFLAGS += $(call cc-option,-msoft-float)
-KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y)
+KBUILD_CFLAGS += -pipe $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)
CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__
@@ -427,6 +427,13 @@ else
endif
endif
+ifdef CONFIG_SMP
+prepare: task_cpu_prepare
+
+task_cpu_prepare: prepare0
+ $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h))
+endif
+
# Check toolchain versions:
# - gcc-4.6 is the minimum kernel-wide version so nothing required.
checkbin:
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0e8dadd011bc..73d1f3562978 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -218,7 +218,7 @@ quiet_cmd_bootas = BOOTAS $@
cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
quiet_cmd_bootar = BOOTAR $@
- cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+ cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(real-prereqs); mv $@.$$$$ $@
$(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
$(call if_changed_dep,bootcc)
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
index fb335d05aae8..1cbc0e4ce857 100644
--- a/arch/powerpc/boot/dts/Makefile
+++ b/arch/powerpc/boot/dts/Makefile
@@ -4,3 +4,4 @@ subdir-y += fsl
dtstree := $(srctree)/$(src)
dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+dtb-$(CONFIG_XILINX_VIRTEX440_GENERIC_BOARD) += virtex440-ml507.dtb virtex440-ml510.dtb
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index 8a7a10139bc9..cd9d66041a3f 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -40,7 +40,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index b0b26d8d68a2..64eaf7e09d22 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -109,7 +109,7 @@
OCM: ocm@400040000 {
compatible = "ibm,ocm";
- status = "ok";
+ status = "okay";
cell-index = <1>;
/* configured in U-Boot */
reg = <4 0x00040000 0x8000>; /* 32K */
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index a04a4fcfde63..b6d87b9c2cef 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -39,7 +39,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
index f7063198b2dc..c9f90f1a9c8e 100644
--- a/arch/powerpc/boot/dts/iss4xx-mpic.dts
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -43,7 +43,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 104b1d6d5695..c406bdb4f36f 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -14,6 +14,7 @@
/dts-v1/;
#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
/*
* This is commented-out for now.
@@ -187,6 +188,11 @@
"DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
"DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <10>;
+ interrupt-parent = <&PIC1>;
+
/*
* This is commented out while a standard binding
* for i2c over gpio is defined.
@@ -235,5 +241,21 @@
panic-indicator;
};
};
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ power {
+ label = "Power Button";
+ gpios = <&GPIO 0 GPIO_ACTIVE_HIGH>;
+ linux,code = <KEY_POWER>;
+ };
+
+ eject {
+ label = "Eject Button";
+ gpios = <&GPIO 6 GPIO_ACTIVE_HIGH>;
+ linux,code = <KEY_EJECTCD>;
+ };
+ };
};
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index c2b1c4404683..e4bfb1101c0e 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -120,6 +120,5 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 53687c3a70c4..7c6baf6df139 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1123,7 +1123,6 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_DEBUG_INFO=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_UNUSED_SYMBOLS=y
CONFIG_HEADERS_CHECK=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1d911f68a23b..296584e6dd55 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -23,8 +23,8 @@
#include <uapi/asm/ucontext.h>
/* SMP */
-extern struct thread_info *current_set[NR_CPUS];
-extern struct thread_info *secondary_ti;
+extern struct task_struct *current_set[NR_CPUS];
+extern struct task_struct *secondary_current;
void start_secondary(void *unused);
/* kexec */
@@ -37,13 +37,11 @@ void kexec_copy_flush(struct kimage *image);
extern struct static_key hcall_tracepoint_key;
void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
-/* OPAL tracing */
-#ifdef CONFIG_JUMP_LABEL
-extern struct static_key opal_tracepoint_key;
-#endif
-void __trace_opal_entry(unsigned long opcode, unsigned long *args);
-void __trace_opal_exit(long opcode, unsigned long retval);
+/* OPAL */
+int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7,
+ int64_t opcode, uint64_t msr);
/* VMX copying */
int enter_vmx_usercopy(void);
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 0c261ba2c826..5cb588395fdc 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -92,6 +92,8 @@ typedef struct {
unsigned long vdso_base;
} mm_context_t;
+void update_bats(void);
+
/* patch sites */
extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2;
extern s32 patch__hash_page_B, patch__hash_page_C;
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 49d76adb9bc5..aa8406b8f7ba 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -174,7 +174,18 @@ static inline bool pte_user(pte_t pte)
* of RAM. -- Cort
*/
#define VMALLOC_OFFSET (0x1000000) /* 16M */
+
+/*
+ * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules
+ * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear
+ * memory shall not share segments.
+ */
+#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES)
+#define VMALLOC_START ((_ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \
+ ~(VMALLOC_OFFSET - 1))
+#else
#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#endif
#define VMALLOC_END ioremap_bot
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 247aff9cc6ba..54b7af6cd27f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -40,22 +40,36 @@
#else
#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
#endif
+
/*
- * Define the address range of the kernel non-linear virtual area
+ * Define the address range of the kernel non-linear virtual area. In contrast
+ * to the linear mapping, this is managed using the kernel page tables and then
+ * inserted into the hash page table to actually take effect, similarly to user
+ * mappings.
*/
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */
/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies half of it on hash CPUs and a quarter of it on Book3E
- * (we keep a quarter for the virtual memmap)
+ * Allow virtual mapping of one context size.
+ * 512TB for 64K page size
+ * 64TB for 4K page size
+ */
+#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * 8TB IO mapping size
+ */
+#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
+
+/*
+ * The vmalloc space starts at the beginning of the kernel non-linear virtual
+ * region, and occupies 504T (64K) or 56T (4K)
*/
-#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */
-#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
-#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_START H_VMALLOC_END
/*
* Region IDs
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5b0177733994..66c1e4f88d65 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -13,6 +13,10 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
+extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte);
+
static inline int hstate_get_psize(struct hstate *hstate)
{
unsigned long shift;
@@ -42,4 +46,12 @@ static inline bool gigantic_page_supported(void)
/* hugepd entry valid bit */
#define HUGEPD_VAL_BITS (0x8000000000000000UL)
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 12e522807f9f..a28a28079edb 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -23,7 +23,7 @@
*/
#include <asm/book3s/64/pgtable.h>
#include <asm/bug.h>
-#include <asm/processor.h>
+#include <asm/task_size_64.h>
#include <asm/cpu_has_feature.h>
/*
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 9c1173283b96..138bc2ecc0c4 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -111,7 +111,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
- pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS);
+ *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS);
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -138,7 +138,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS);
+ *pud = __pud(__pgtable_ptr_val(pmd) | PUD_VAL_BITS);
}
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -176,13 +176,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
- pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS);
+ *pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS);
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
+ *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
}
static inline pgtable_t pmd_pgtable(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index d8c8d7c9df15..581f91be9dd4 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -811,7 +811,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
}
-#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t prot)
@@ -851,11 +851,6 @@ static inline bool pte_ci(pte_t pte)
return false;
}
-static inline void pmd_set(pmd_t *pmdp, unsigned long val)
-{
- *pmdp = __pmd(val);
-}
-
static inline void pmd_clear(pmd_t *pmdp)
{
*pmdp = __pmd(0);
@@ -887,11 +882,6 @@ static inline int pmd_bad(pmd_t pmd)
return hash__pmd_bad(pmd);
}
-static inline void pud_set(pud_t *pudp, unsigned long val)
-{
- *pudp = __pud(val);
-}
-
static inline void pud_clear(pud_t *pudp)
{
*pudp = __pud(0);
@@ -934,10 +924,6 @@ static inline bool pud_access_permitted(pud_t pud, bool write)
}
#define pgd_write(pgd) pte_write(pgd_pte(pgd))
-static inline void pgd_set(pgd_t *pgdp, unsigned long val)
-{
- *pgdp = __pgd(val);
-}
static inline void pgd_clear(pgd_t *pgdp)
{
@@ -1306,6 +1292,24 @@ static inline int pud_pfn(pud_t pud)
BUILD_BUG();
return 0;
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a R -> RW upgrade of pte
+ */
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
+{
+ if (!(old_val & _PAGE_READ))
+ return false;
+
+ if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+ return true;
+
+ return false;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 7d1a3d1543fc..5ab134eeed20 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -127,6 +127,10 @@ extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep
pte_t entry, unsigned long address,
int psize);
+extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte);
+
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
unsigned long set)
{
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 671316f9e95d..05147cecb8df 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -13,8 +13,32 @@ static inline int mmu_get_ap(int psize)
#ifdef CONFIG_PPC_RADIX_MMU
extern void radix__tlbiel_all(unsigned int action);
+extern void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
+ unsigned long page_size);
+extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__flush_tlb_lpid(unsigned int lpid);
+extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
#else
static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); };
+static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ WARN_ON(1);
+}
+static inline void radix__flush_pwc_lpid(unsigned int lpid)
+{
+ WARN_ON(1);
+}
+static inline void radix__flush_tlb_lpid(unsigned int lpid)
+{
+ WARN_ON(1);
+}
+static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid)
+{
+ WARN_ON(1);
+}
#endif
extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
@@ -49,12 +73,6 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
extern void radix__flush_tlb_all(void);
-extern void radix__flush_tlb_lpid_page(unsigned int lpid,
- unsigned long addr,
- unsigned long page_size);
-extern void radix__flush_pwc_lpid(unsigned int lpid);
-extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
-extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
#endif
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index a78a57e5058d..72a65d744a28 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -9,9 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#ifdef CONFIG_GENERIC_CSUM
-#include <asm-generic/checksum.h>
-#else
#include <linux/bitops.h>
#include <linux/in6.h>
/*
@@ -217,6 +214,5 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum sum);
-#endif
#endif /* __KERNEL__ */
#endif
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 0245bfcaac32..a130be13ee83 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -20,6 +20,11 @@ struct iommu_table;
*/
struct dev_archdata {
/*
+ * Set to %true if the dma_iommu_ops are requested to use a direct
+ * window instead of dynamically mapping memory.
+ */
+ bool iommu_bypass : 1;
+ /*
* These two used to be a union. However, with the hybrid ops we need
* both so here we store both a DMA offset for direct mappings and
* an iommu_table for remapped DMA.
@@ -33,9 +38,6 @@ struct dev_archdata {
#ifdef CONFIG_IOMMU_API
void *iommu_domain;
#endif
-#ifdef CONFIG_SWIOTLB
- dma_addr_t max_direct_dma_addr;
-#endif
#ifdef CONFIG_PPC64
struct pci_dn *pci_data;
#endif
@@ -54,6 +56,4 @@ struct pdev_archdata {
u64 dma_mask;
};
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
#endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
index 7702875aabb7..a2912b47102c 100644
--- a/arch/powerpc/include/asm/dma-direct.h
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -4,26 +4,24 @@
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
-#ifdef CONFIG_SWIOTLB
- struct dev_archdata *sd = &dev->archdata;
-
- if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr)
- return false;
-#endif
-
if (!dev->dma_mask)
return false;
- return addr + size - 1 <= *dev->dma_mask;
+ return addr + size - 1 <=
+ min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
}
static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- return paddr + get_dma_offset(dev);
+ if (!dev)
+ return paddr + PCI_DRAM_OFFSET;
+ return paddr + dev->archdata.dma_offset;
}
static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- return daddr - get_dma_offset(dev);
+ if (!dev)
+ return daddr - PCI_DRAM_OFFSET;
+ return daddr - dev->archdata.dma_offset;
}
#endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index ebf66809f2d3..565d6f74b189 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -1,74 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2004 IBM
- *
- * Implements the generic device dma API for powerpc.
- * the pci and vio busses
*/
#ifndef _ASM_DMA_MAPPING_H
#define _ASM_DMA_MAPPING_H
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/cache.h>
-/* need struct page definitions */
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-/* Some dma direct funcs must be visible for use in other dma_ops */
-extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs);
-extern void __dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs);
-extern int dma_nommu_mmap_coherent(struct device *dev,
- struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle,
- size_t size, unsigned long attrs);
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * DMA-consistent mapping functions for PowerPCs that don't support
- * cache snooping. These allocate/free a region of uncached mapped
- * memory space for use with DMA devices. Alternatively, you could
- * allocate the space "normally" and use the cache management functions
- * to ensure it is consistent.
- */
-struct device;
-extern void __dma_sync(void *vaddr, size_t size, int direction);
-extern void __dma_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction);
-extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
-
-#else /* ! CONFIG_NOT_COHERENT_CACHE */
-/*
- * Cache coherent cores.
- */
-
-#define __dma_sync(addr, size, rw) ((void)0)
-#define __dma_sync_page(pg, off, sz, rw) ((void)0)
-
-#endif /* ! CONFIG_NOT_COHERENT_CACHE */
-
-static inline unsigned long device_to_mask(struct device *dev)
-{
- if (dev->dma_mask && *dev->dma_mask)
- return *dev->dma_mask;
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
-/*
- * Available generic sets of operations
- */
-#ifdef CONFIG_PPC64
-extern struct dma_map_ops dma_iommu_ops;
-#endif
-extern const struct dma_map_ops dma_nommu_ops;
static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
@@ -80,31 +15,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return NULL;
}
-/*
- * get_dma_offset()
- *
- * Get the dma offset on configurations where the dma address can be determined
- * from the physical address by looking at a simple offset. Direct dma and
- * swiotlb use this function, but it is typically not used by implementations
- * with an iommu.
- */
-static inline dma_addr_t get_dma_offset(struct device *dev)
-{
- if (dev)
- return dev->archdata.dma_offset;
-
- return PCI_DRAM_OFFSET;
-}
-
-static inline void set_dma_offset(struct device *dev, dma_addr_t off)
-{
- if (dev)
- dev->archdata.dma_offset = off;
-}
-
-#define HAVE_ARCH_DMA_SET_MASK 1
-
-extern u64 __dma_get_required_mask(struct device *dev);
-
-#endif /* __KERNEL__ */
#endif /* _ASM_DMA_MAPPING_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8b596d096ebe..94cfcf33030a 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -219,7 +219,8 @@ struct eeh_ops {
};
extern int eeh_subsystem_flags;
-extern int eeh_max_freezes;
+extern u32 eeh_max_freezes;
+extern bool eeh_debugfs_no_recover;
extern struct eeh_ops *eeh_ops;
extern raw_spinlock_t confirm_error_lock;
@@ -293,14 +294,14 @@ void eeh_add_device_late(struct pci_dev *);
void eeh_add_device_tree_late(struct pci_bus *);
void eeh_add_sysfs_files(struct pci_bus *);
void eeh_remove_device(struct pci_dev *);
-int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state);
+int eeh_unfreeze_pe(struct eeh_pe *pe);
int eeh_pe_reset_and_recover(struct eeh_pe *pe);
int eeh_dev_open(struct pci_dev *pdev);
void eeh_dev_release(struct pci_dev *pdev);
struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
int eeh_pe_set_option(struct eeh_pe *pe, int option);
int eeh_pe_get_state(struct eeh_pe *pe);
-int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
int eeh_pe_configure(struct eeh_pe *pe);
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
unsigned long addr, unsigned long mask);
@@ -460,6 +461,9 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
eeh_check_failure(addr);
}
+
+void eeh_cache_debugfs_init(void);
+
#endif /* CONFIG_PPC64 */
#endif /* __KERNEL__ */
#endif /* _POWERPC_EEH_H */
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 9884e872686f..6d0412b846ac 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -33,6 +33,7 @@ struct eeh_event {
int eeh_event_init(void);
int eeh_send_failure_event(struct eeh_pe *pe);
+int __eeh_send_failure_event(struct eeh_pe *pe);
void eeh_remove_event(struct eeh_pe *pe, bool force);
void eeh_handle_normal_event(struct eeh_pe *pe);
void eeh_handle_special_event(void);
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 3b4767ed3ec5..937bb630093f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -671,7 +671,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define RUNLATCH_ON \
BEGIN_FTR_SECTION \
- CURRENT_THREAD_INFO(r3, r1); \
+ ld r3, PACA_THREAD_INFO(r13); \
ld r4,TI_LOCAL_FLAGS(r3); \
andi. r0,r4,_TLF_RUNLATCH; \
beql ppc64_runlatch_on_trampoline; \
@@ -721,7 +721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
#ifdef CONFIG_PPC_970_NAP
#define FINISH_NAP \
BEGIN_FTR_SECTION \
- CURRENT_THREAD_INFO(r11, r1); \
+ ld r11, PACA_THREAD_INFO(r13); \
ld r9,TI_LOCAL_FLAGS(r11); \
andi. r10,r9,_TLF_NAPPING; \
bnel power4_fixup_nap; \
diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h
index 3fdc54df63c9..464a7519ed64 100644
--- a/arch/powerpc/include/asm/hvsi.h
+++ b/arch/powerpc/include/asm/hvsi.h
@@ -64,7 +64,7 @@ struct hvsi_priv {
unsigned int inbuf_len; /* data in input buffer */
unsigned char inbuf[HVSI_INBUF_SIZE];
unsigned int inbuf_cur; /* Cursor in input buffer */
- unsigned int inbuf_pktlen; /* packet lenght from cursor */
+ unsigned int inbuf_pktlen; /* packet length from cursor */
atomic_t seqno; /* packet sequence number */
unsigned int opened:1; /* driver opened */
unsigned int established:1; /* protocol established */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 7f19fbd3ba55..4b73847e9b95 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -783,8 +783,10 @@ extern void __iounmap_at(void *ea, unsigned long size);
#define mmio_read16be(addr) readw_be(addr)
#define mmio_read32be(addr) readl_be(addr)
+#define mmio_read64be(addr) readq_be(addr)
#define mmio_write16be(val, addr) writew_be(val, addr)
#define mmio_write32be(val, addr) writel_be(val, addr)
+#define mmio_write64be(val, addr) writeq_be(val, addr)
#define mmio_insb(addr, dst, count) readsb(addr, dst, count)
#define mmio_insw(addr, dst, count) readsw(addr, dst, count)
#define mmio_insl(addr, dst, count) readsl(addr, dst, count)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 17524d222a7b..0ac52392ed99 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -237,6 +237,7 @@ static inline void iommu_del_device(struct device *dev)
}
#endif /* !CONFIG_IOMMU_API */
+u64 dma_iommu_get_required_mask(struct device *dev);
#else
static inline void *get_iommu_table_base(struct device *dev)
@@ -318,5 +319,21 @@ extern void iommu_release_ownership(struct iommu_table *tbl);
extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
+#ifdef CONFIG_PPC_CELL_NATIVE
+extern bool iommu_fixed_is_weak;
+#else
+#define iommu_fixed_is_weak false
+#endif
+
+extern const struct dma_map_ops dma_iommu_ops;
+
+static inline unsigned long device_to_mask(struct device *dev)
+{
+ if (dev->dma_mask && *dev->dma_mask)
+ return *dev->dma_mask;
+ /* Assume devices without mask can take 32 bit addresses */
+ return 0xfffffffful;
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h
index 3dbd47f2bffe..abad50a745db 100644
--- a/arch/powerpc/include/asm/ipic.h
+++ b/arch/powerpc/include/asm/ipic.h
@@ -69,10 +69,7 @@ enum ipic_mcp_irq {
IPIC_MCP_MU = 7,
};
-extern void ipic_set_highest_priority(unsigned int irq);
extern void ipic_set_default_priority(void);
-extern void ipic_enable_mcp(enum ipic_mcp_irq mcp_irq);
-extern void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq);
extern u32 ipic_get_mcp_status(void);
extern void ipic_clear_mcp_status(u32 mask);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index ee39ce56b2a2..c91a60cda4fa 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -48,23 +48,19 @@ struct pt_regs;
* Per-cpu stacks for handling critical, debug and machine check
* level interrupts.
*/
-extern struct thread_info *critirq_ctx[NR_CPUS];
-extern struct thread_info *dbgirq_ctx[NR_CPUS];
-extern struct thread_info *mcheckirq_ctx[NR_CPUS];
-extern void exc_lvl_ctx_init(void);
-#else
-#define exc_lvl_ctx_init()
+extern void *critirq_ctx[NR_CPUS];
+extern void *dbgirq_ctx[NR_CPUS];
+extern void *mcheckirq_ctx[NR_CPUS];
#endif
/*
* Per-cpu stacks for handling hard and soft interrupts.
*/
-extern struct thread_info *hardirq_ctx[NR_CPUS];
-extern struct thread_info *softirq_ctx[NR_CPUS];
+extern void *hardirq_ctx[NR_CPUS];
+extern void *softirq_ctx[NR_CPUS];
-extern void irq_ctx_init(void);
-extern void call_do_softirq(struct thread_info *tp);
-extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
+void call_do_softirq(void *sp);
+void call_do_irq(struct pt_regs *regs, void *sp);
extern void do_IRQ(struct pt_regs *regs);
extern void __init init_IRQ(void);
extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index eb0d79f0ca45..a6c8548ed9fa 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -141,6 +141,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -632,7 +633,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
long kvmppc_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
void kvmppc_subcore_exit_guest(void);
long kvmppc_realmode_hmi_handler(void);
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
index 47a03b9b528b..5070df19d463 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/sched/task_stack.h>
#ifdef CONFIG_LIVEPATCH
static inline int klp_check_compiler_support(void)
@@ -43,13 +44,13 @@ static inline unsigned long klp_get_ftrace_location(unsigned long faddr)
return ftrace_location_range(faddr, faddr + 16);
}
-static inline void klp_init_thread_info(struct thread_info *ti)
+static inline void klp_init_thread_info(struct task_struct *p)
{
/* + 1 to account for STACK_END_MAGIC */
- ti->livepatch_sp = (unsigned long *)(ti + 1) + 1;
+ task_thread_info(p)->livepatch_sp = end_of_stack(p) + 1;
}
#else
-static void klp_init_thread_info(struct thread_info *ti) { }
+static inline void klp_init_thread_info(struct task_struct *p) { }
#endif /* CONFIG_LIVEPATCH */
#endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 8311869005fa..2f0ca6560e47 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -47,9 +47,7 @@ struct machdep_calls {
#endif
#endif /* CONFIG_PPC64 */
- /* Platform set_dma_mask and dma_get_required_mask overrides */
- int (*dma_set_mask)(struct device *dev, u64 dma_mask);
- u64 (*dma_get_required_mask)(struct device *dev);
+ void (*dma_set_mask)(struct device *dev, u64 dma_mask);
int (*probe)(void);
void (*setup_arch)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index a8b8903e1844..17996bc9382b 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
- bool user_mode);
+ bool user_mode, bool in_guest);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 25607604a7a5..d34ad1657d7b 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -289,6 +289,17 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
}
#endif /* CONFIG_PPC_MEM_KEYS */
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static inline bool strict_kernel_rwx_enabled(void)
+{
+ return rodata_enabled;
+}
+#else
+static inline bool strict_kernel_rwx_enabled(void)
+{
+ return false;
+}
+#endif
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
@@ -356,6 +367,8 @@ extern void early_init_mmu_secondary(void);
extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size);
static inline void mmu_early_init_devtree(void) { }
+
+extern void *abatron_pteptrs[2];
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index bd9ba8defd72..84b4cfe73edd 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -14,4 +14,6 @@ extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
+extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs);
+
#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index b0f764c827c0..0a1a3fc54e54 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -231,9 +231,10 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
}
/* patch sites */
-extern s32 patch__itlbmiss_linmem_top;
+extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8;
extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
extern s32 patch__fixupdar_linmem_top;
+extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8;
extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2;
extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3;
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index 09a518bb7c03..629a5cdcc865 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -78,9 +78,6 @@ extern int pmac_get_partition(int partition);
extern u8 pmac_xpram_read(int xpaddr);
extern void pmac_xpram_write(int xpaddr, u8 data);
-/* Synchronize NVRAM */
-extern void nvram_sync(void);
-
/* Initialize NVRAM OS partition */
extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
@@ -98,10 +95,4 @@ extern int nvram_write_os_partition(struct nvram_os_partition *part,
unsigned int err_type,
unsigned int error_log_cnt);
-/* Determine NVRAM size */
-extern ssize_t nvram_get_size(void);
-
-/* Normal access to NVRAM */
-extern unsigned char nvram_read_byte(int i);
-extern void nvram_write_byte(unsigned char c, int i);
#endif /* _ASM_POWERPC_NVRAM_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 5c5ea2413413..ed870468ef6f 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -20,20 +20,11 @@
/*
* On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
- * on PPC44x). For PPC64 we support either 4K or 64K software
+ * on PPC44x and 4K/16K on 8xx). For PPC64 we support either 4K or 64K software
* page size. When using 64K pages however, whether we are really supporting
* 64K pages in HW or not is irrelevant to those definitions.
*/
-#if defined(CONFIG_PPC_256K_PAGES)
-#define PAGE_SHIFT 18
-#elif defined(CONFIG_PPC_64K_PAGES)
-#define PAGE_SHIFT 16
-#elif defined(CONFIG_PPC_16K_PAGES)
-#define PAGE_SHIFT 14
-#else
-#define PAGE_SHIFT 12
-#endif
-
+#define PAGE_SHIFT CONFIG_PPC_PAGE_SHIFT
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#ifndef __ASSEMBLY__
@@ -326,7 +317,6 @@ struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
extern void copy_user_page(void *to, void *from, unsigned long vaddr,
struct page *p);
-extern int page_is_ram(unsigned long pfn);
extern int devmem_is_allowed(unsigned long pfn);
#ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index aee4fcc24990..fc188e0e9179 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
+#include <linux/numa.h>
struct device_node;
@@ -19,6 +20,8 @@ struct device_node;
struct pci_controller_ops {
void (*dma_dev_setup)(struct pci_dev *pdev);
void (*dma_bus_setup)(struct pci_bus *bus);
+ bool (*iommu_bypass_supported)(struct pci_dev *pdev,
+ u64 mask);
int (*probe_mode)(struct pci_bus *bus);
@@ -43,9 +46,6 @@ struct pci_controller_ops {
void (*teardown_msi_irqs)(struct pci_dev *pdev);
#endif
- int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask);
- u64 (*dma_get_required_mask)(struct pci_dev *pdev);
-
void (*shutdown)(struct pci_controller *hose);
};
@@ -265,7 +265,7 @@ extern int pcibios_map_io_space(struct pci_bus *bus);
#ifdef CONFIG_NUMA
#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = (NODE))
#else
-#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = -1)
+#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = NUMA_NO_NODE)
#endif
#endif /* CONFIG_PPC64 */
@@ -274,6 +274,8 @@ extern int pcibios_map_io_space(struct pci_bus *bus);
extern struct pci_controller *pci_find_hose_for_OF_device(
struct device_node* node);
+extern struct pci_controller *pci_find_controller_for_domain(int domain_nr);
+
/* Fill up host controller resources from the OF node */
extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary);
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 0c72f1897063..6a1861a6301e 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -52,10 +52,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
#ifdef CONFIG_PCI
extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops);
-extern const struct dma_map_ops *get_pci_dma_ops(void);
#else /* CONFIG_PCI */
#define set_pci_dma_ops(d)
-#define get_pci_dma_ops() NULL
#endif
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index dad1d27e196d..505550fb2935 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[];
extern pgd_t swapper_pg_dir[];
-int dma_pfn_limit_to_zone(u64 pfn_limit);
extern void paging_init(void);
/*
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index 2f3ff7a27881..05b552418519 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -23,6 +23,8 @@ extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
unsigned long *flags, unsigned long *status,
int count);
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
+
void pnv_tm_init(void);
#else
static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
@@ -40,7 +42,6 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
}
static inline void pnv_tm_init(void) { }
-static inline void pnv_power9_force_smt4(void) { }
#endif
#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 19a8834e0398..c5698a523bb1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -326,6 +326,7 @@
#define PPC_INST_ADDI 0x38000000
#define PPC_INST_ADDIS 0x3c000000
#define PPC_INST_ADD 0x7c000214
+#define PPC_INST_ADDC 0x7c000014
#define PPC_INST_SUB 0x7c000050
#define PPC_INST_BLR 0x4e800020
#define PPC_INST_BLRL 0x4e800021
@@ -334,9 +335,13 @@
#define PPC_INST_MULLW 0x7c0001d6
#define PPC_INST_MULHWU 0x7c000016
#define PPC_INST_MULLI 0x1c000000
+#define PPC_INST_MADDHD 0x10000030
+#define PPC_INST_MADDHDU 0x10000031
+#define PPC_INST_MADDLD 0x10000033
#define PPC_INST_DIVWU 0x7c000396
#define PPC_INST_DIVD 0x7c0003d2
#define PPC_INST_RLWINM 0x54000000
+#define PPC_INST_RLWINM_DOT 0x54000001
#define PPC_INST_RLWIMI 0x50000000
#define PPC_INST_RLDICL 0x78000000
#define PPC_INST_RLDICR 0x78000004
@@ -376,6 +381,7 @@
/* macros to insert fields into opcodes */
#define ___PPC_RA(a) (((a) & 0x1f) << 16)
#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+#define ___PPC_RC(c) (((c) & 0x1f) << 6)
#define ___PPC_RS(s) (((s) & 0x1f) << 21)
#define ___PPC_RT(t) ___PPC_RS(t)
#define ___PPC_R(r) (((r) & 0x1) << 16)
@@ -395,7 +401,7 @@
#define __PPC_WS(w) (((w) & 0x1f) << 11)
#define __PPC_SH(s) __PPC_WS(s)
#define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4))
-#define __PPC_MB(s) (((s) & 0x1f) << 6)
+#define __PPC_MB(s) ___PPC_RC(s)
#define __PPC_ME(s) (((s) & 0x1f) << 1)
#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20))
#define __PPC_ME64(s) __PPC_MB64(s)
@@ -437,6 +443,15 @@
#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \
___PPC_RT(t) | ___PPC_RA(a) | \
___PPC_RB(b))
+#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \
+ ___PPC_RT(t) | ___PPC_RA(a) | \
+ ___PPC_RB(b) | ___PPC_RC(c))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC)
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index f67da277d652..f191ef0d2a0a 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,13 +53,13 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_pe_reset_full(struct eeh_pe *pe);
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
void eeh_save_bars(struct eeh_dev *edev);
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
void eeh_pe_mark_isolated(struct eeh_pe *pe);
-void eeh_pe_state_clear(struct eeh_pe *pe, int state);
+void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed);
void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ee58526cb6c2..3351bcf42f2d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -40,7 +40,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
-#include <asm/thread_info.h>
+#include <linux/thread_info.h>
#include <asm/ptrace.h>
#include <asm/hw_breakpoint.h>
@@ -77,105 +77,15 @@ extern int _chrp_type;
#ifdef __KERNEL__
-struct task_struct;
-void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
-void release_thread(struct task_struct *);
-
-#ifdef CONFIG_PPC32
-
-#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
-#error User TASK_SIZE overlaps with KERNEL_START address
-#endif
-#define TASK_SIZE (CONFIG_TASK_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
-#endif
-
#ifdef CONFIG_PPC64
-/*
- * 64-bit user address space can have multiple limits
- * For now supported values are:
- */
-#define TASK_SIZE_64TB (0x0000400000000000UL)
-#define TASK_SIZE_128TB (0x0000800000000000UL)
-#define TASK_SIZE_512TB (0x0002000000000000UL)
-#define TASK_SIZE_1PB (0x0004000000000000UL)
-#define TASK_SIZE_2PB (0x0008000000000000UL)
-/*
- * With 52 bits in the address we can support
- * upto 4PB of range.
- */
-#define TASK_SIZE_4PB (0x0010000000000000UL)
-
-/*
- * For now 512TB is only supported with book3s and 64K linux page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
-/*
- * Max value currently used:
- */
-#define TASK_SIZE_USER64 TASK_SIZE_4PB
-#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
-#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
+#include <asm/task_size_64.h>
#else
-#define TASK_SIZE_USER64 TASK_SIZE_64TB
-#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
-/*
- * We don't need to allocate extended context ids for 4K page size, because
- * we limit the max effective address on this config to 64TB.
- */
-#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
+#include <asm/task_size_32.h>
#endif
-/*
- * 32-bit user address space is 4GB - 1 page
- * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
- */
-#define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
-
-#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
- TASK_SIZE_USER32 : TASK_SIZE_USER64)
-#define TASK_SIZE TASK_SIZE_OF(current)
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
-
-#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
- TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
-#endif
-
-/*
- * Initial task size value for user applications. For book3s 64 we start
- * with 128TB and conditionally enable upto 512TB
- */
-#ifdef CONFIG_PPC_BOOK3S_64
-#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
- TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
-#else
-#define DEFAULT_MAP_WINDOW TASK_SIZE
-#endif
-
-#ifdef __powerpc64__
-
-#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
-#define STACK_TOP_USER32 TASK_SIZE_USER32
-
-#define STACK_TOP (is_32bit_task() ? \
- STACK_TOP_USER32 : STACK_TOP_USER64)
-
-#define STACK_TOP_MAX TASK_SIZE_USER64
-
-#else /* __powerpc64__ */
-
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
-#endif /* __powerpc64__ */
+struct task_struct;
+void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
+void release_thread(struct task_struct *);
typedef struct {
unsigned long seg;
@@ -250,6 +160,9 @@ struct thread_struct {
#ifdef CONFIG_PPC32
void *pgdir; /* root of page-table tree */
unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
+#ifdef CONFIG_PPC_RTAS
+ unsigned long rtas_sp; /* stack pointer for when in RTAS */
+#endif
#endif
/* Debug Registers */
struct debug_reg debug;
@@ -357,8 +270,7 @@ struct thread_struct {
#define ARCH_MIN_TASKALIGN 16
#define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack)
-#define INIT_SP_LIMIT \
- (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack)
+#define INIT_SP_LIMIT ((unsigned long)&init_stack)
#ifdef CONFIG_SPE
#define SPEFSCR_INIT \
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 0b8a735b6d85..64271e562fed 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -157,7 +157,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno,
unsigned long data);
#define current_pt_regs() \
- ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
/*
* We use the least-significant bit of the trap field to indicate
* whether we have saved the full set of registers, or only a
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c98ef1f2d5b..c5b2aff0ce8e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1062,7 +1062,7 @@
* - SPRG9 debug exception scratch
*
* All 32-bit:
- * - SPRG3 current thread_info pointer
+ * - SPRG3 current thread_struct physical addr pointer
* (virtual on BookE, physical on others)
*
* 32-bit classic:
@@ -1167,7 +1167,7 @@
#ifdef CONFIG_PPC_BOOK3S_32
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
-#define SPRN_SPRG_RTAS SPRN_SPRG2
+#define SPRN_SPRG_PGDIR SPRN_SPRG2
#define SPRN_SPRG_603_LRU SPRN_SPRG4
#endif
@@ -1425,6 +1425,11 @@ static inline void msr_check_and_clear(unsigned long bits)
#define mfsrin(v) ({unsigned int rval; \
asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \
rval;})
+
+static inline void mtsrin(u32 val, u32 idx)
+{
+ asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx));
+}
#endif
#define proc_trap() asm volatile("trap")
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index e335a8f846af..4a1664a8658d 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -17,6 +17,13 @@ extern char __end_interrupts[];
extern char __prom_init_toc_start[];
extern char __prom_init_toc_end[];
+#ifdef CONFIG_PPC_POWERNV
+extern char start_real_trampolines[];
+extern char end_real_trampolines[];
+extern char start_virt_trampolines[];
+extern char end_virt_trampolines[];
+#endif
+
static inline int in_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 41695745032c..0de717e16dd6 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -83,7 +83,22 @@ int is_cpu_dead(unsigned int cpu);
/* 32-bit */
extern int smp_hw_index[];
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+/*
+ * This is particularly ugly: it appears we can't actually get the definition
+ * of task_struct here, but we need access to the CPU this task is running on.
+ * Instead of using task_struct we're using _TASK_CPU which is extracted from
+ * asm-offsets.h by kbuild to get the current processor ID.
+ *
+ * This also needs to be safeguarded when building asm-offsets.s because at
+ * that time _TASK_CPU is not defined yet. It could have been guarded by
+ * _TASK_CPU itself, but we want the build to fail if _TASK_CPU is missing
+ * when building something else than asm-offsets.s
+ */
+#ifdef GENERATING_ASM_OFFSETS
+#define raw_smp_processor_id() (0)
+#else
+#define raw_smp_processor_id() (*(unsigned int *)((void *)current + _TASK_CPU))
+#endif
#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
static inline int get_hard_smp_processor_id(int cpu)
diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
index f65ecf57b66c..b7d082c0ec25 100644
--- a/arch/powerpc/include/asm/swiotlb.h
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -13,12 +13,7 @@
#include <linux/swiotlb.h>
-extern const struct dma_map_ops powerpc_swiotlb_dma_ops;
-
extern unsigned int ppc_swiotlb_enable;
-int __init swiotlb_setup_bus_notifier(void);
-
-extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
#ifdef CONFIG_SWIOTLB
void swiotlb_detect_4g(void);
diff --git a/arch/powerpc/include/asm/task_size_32.h b/arch/powerpc/include/asm/task_size_32.h
new file mode 100644
index 000000000000..de7290ee770f
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_32.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_32_H
+#define _ASM_POWERPC_TASK_SIZE_32_H
+
+#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
+#error User TASK_SIZE overlaps with KERNEL_START address
+#endif
+
+#define TASK_SIZE (CONFIG_TASK_SIZE)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
+
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP TASK_SIZE
+#define STACK_TOP_MAX STACK_TOP
+
+#endif /* _ASM_POWERPC_TASK_SIZE_32_H */
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
new file mode 100644
index 000000000000..eab4779f6b84
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_64_H
+#define _ASM_POWERPC_TASK_SIZE_64_H
+
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB (0x0000400000000000UL)
+#define TASK_SIZE_128TB (0x0000800000000000UL)
+#define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB (0x0004000000000000UL)
+#define TASK_SIZE_2PB (0x0008000000000000UL)
+
+/*
+ * With 52 bits in the address we can support up to 4PB of range.
+ */
+#define TASK_SIZE_4PB (0x0010000000000000UL)
+
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64 TASK_SIZE_4PB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
+#else
+#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
+
+/*
+ * We don't need to allocate extended context ids for 4K page size, because we
+ * limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
+#endif
+
+/*
+ * 32-bit user address space is 4GB - 1 page
+ * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
+ */
+#define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
+
+#define TASK_SIZE_OF(tsk) \
+ (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \
+ TASK_SIZE_USER64)
+
+#define TASK_SIZE TASK_SIZE_OF(current)
+
+#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE \
+ ((is_32bit_task()) ? TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64)
+
+/*
+ * Initial task size value for user applications. For book3s 64 we start
+ * with 128TB and conditionally enable upto 512TB
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+#define DEFAULT_MAP_WINDOW \
+ ((is_32bit_task()) ? TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
+#else
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#endif
+
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
+#define STACK_TOP_USER32 TASK_SIZE_USER32
+#define STACK_TOP_MAX TASK_SIZE_USER64
+#define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64)
+
+#endif /* _ASM_POWERPC_TASK_SIZE_64_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 544cac0474cb..8e1d0195ac36 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -17,12 +17,6 @@
#define THREAD_SIZE (1 << THREAD_SHIFT)
-#ifdef CONFIG_PPC64
-#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(clrrdi dest, sp, THREAD_SHIFT)
-#else
-#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT)
-#endif
-
#ifndef __ASSEMBLY__
#include <linux/cache.h>
#include <asm/processor.h>
@@ -34,8 +28,6 @@
* low level task data.
*/
struct thread_info {
- struct task_struct *task; /* main task structure */
- int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
unsigned long local_flags; /* private flags for thread */
@@ -58,8 +50,6 @@ struct thread_info {
*/
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
- .cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.flags = 0, \
}
@@ -67,15 +57,6 @@ struct thread_info {
#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
- unsigned long val;
-
- asm (CURRENT_THREAD_INFO(%0,1) : "=r" (val));
-
- return (struct thread_info *)val;
-}
-
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index a4a718dbfec6..f85e2b01c3df 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -132,6 +132,8 @@ static inline void shared_proc_topology_init(void) {}
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
+
+int dlpar_cpu_readd(int cpu);
#endif
#endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index e3a731793ea2..4d6d905e9138 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -28,7 +28,6 @@
#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1)
#endif
-#define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.addr_limit)
static inline void set_fs(mm_segment_t fs)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index a3c35e6d6ffb..f44dbc65e38e 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -29,8 +29,8 @@
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_TIME32
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
@@ -45,8 +45,8 @@
#define __ARCH_WANT_OLD_STAT
#endif
#ifdef CONFIG_PPC64
-#define __ARCH_WANT_COMPAT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME32
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_NEWFSTATAT
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 94de465e0920..12aa0c43e775 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -11,8 +11,8 @@
#define SO_RCVLOWAT 16
#define SO_SNDLOWAT 17
-#define SO_RCVTIMEO 18
-#define SO_SNDTIMEO 19
+#define SO_RCVTIMEO_OLD 18
+#define SO_SNDTIMEO_OLD 19
#define SO_PASSCRED 20
#define SO_PEERCRED 21
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cb7f0bb9ee71..cddadccf551d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o misc_$(BITS).o \
+ udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
@@ -105,6 +105,7 @@ obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
@@ -142,19 +143,29 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
+KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
GCOV_PROFILE_machine_kexec_64.o := n
+KCOV_INSTRUMENT_machine_kexec_64.o := n
UBSAN_SANITIZE_machine_kexec_64.o := n
GCOV_PROFILE_machine_kexec_32.o := n
+KCOV_INSTRUMENT_machine_kexec_32.o := n
UBSAN_SANITIZE_machine_kexec_32.o := n
GCOV_PROFILE_kprobes.o := n
+KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
GCOV_PROFILE_kprobes-ftrace.o := n
+KCOV_INSTRUMENT_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
UBSAN_SANITIZE_vdso.o := n
+# Necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_cputable.o := n
+KCOV_INSTRUMENT_setup_64.o := n
+KCOV_INSTRUMENT_paca.o := n
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..86a61e5f8285 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -13,6 +13,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define GENERATING_ASM_OFFSETS /* asm/smp.h */
+
#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
@@ -90,10 +92,15 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
#else
- OFFSET(THREAD_INFO, task_struct, stack);
- DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
+#ifdef CONFIG_PPC_RTAS
+ OFFSET(RTAS_SP, thread_struct, rtas_sp);
+#endif
#endif /* CONFIG_PPC64 */
+ OFFSET(TASK_STACK, task_struct, stack);
+#ifdef CONFIG_SMP
+ OFFSET(TASK_CPU, task_struct, cpu);
+#endif
#ifdef CONFIG_LIVEPATCH
OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
@@ -161,8 +168,6 @@ int main(void)
OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
OFFSET(TI_PREEMPT, thread_info, preempt_count);
- OFFSET(TI_TASK, thread_info, task);
- OFFSET(TI_CPU, thread_info, cpu);
#ifdef CONFIG_PPC64
OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
@@ -177,6 +182,8 @@ int main(void)
OFFSET(PACAPROCSTART, paca_struct, cpu_start);
OFFSET(PACAKSAVE, paca_struct, kstack);
OFFSET(PACACURRENT, paca_struct, __current);
+ DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
+ offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
OFFSET(PACAR1, paca_struct, saved_r1);
OFFSET(PACATOC, paca_struct, kernel_toc);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 8c069e96c478..6f1c11e0691f 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -24,6 +24,10 @@ BEGIN_MMU_FTR_SECTION
li r10,0
mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+ lis r10, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r10, r10, (swapper_pg_dir - PAGE_OFFSET)@l
+ mtspr SPRN_SPRG_PGDIR, r10
+
BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 9c9bcaae2f75..09231ef06d01 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -6,12 +6,31 @@
* busses using the iommu infrastructure
*/
+#include <linux/dma-direct.h>
+#include <linux/pci.h>
#include <asm/iommu.h>
/*
* Generic iommu implementation
*/
+/*
+ * The coherent mask may be smaller than the real mask, check if we can
+ * really use a direct window.
+ */
+static inline bool dma_iommu_alloc_bypass(struct device *dev)
+{
+ return dev->archdata.iommu_bypass && !iommu_fixed_is_weak &&
+ dma_direct_supported(dev, dev->coherent_dma_mask);
+}
+
+static inline bool dma_iommu_map_bypass(struct device *dev,
+ unsigned long attrs)
+{
+ return dev->archdata.iommu_bypass &&
+ (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING));
+}
+
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
@@ -20,6 +39,8 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
unsigned long attrs)
{
+ if (dma_iommu_alloc_bypass(dev))
+ return dma_direct_alloc(dev, size, dma_handle, flag, attrs);
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
dev_to_node(dev));
@@ -29,7 +50,11 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
- iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+ if (dma_iommu_alloc_bypass(dev))
+ dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+ else
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr,
+ dma_handle);
}
/* Creates TCEs for a user provided buffer. The user buffer must be
@@ -42,6 +67,9 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_page(dev, page, offset, size, direction,
+ attrs);
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
size, device_to_mask(dev), direction, attrs);
}
@@ -51,8 +79,9 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
- iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
- attrs);
+ if (!dma_iommu_map_bypass(dev, attrs))
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size,
+ direction, attrs);
}
@@ -60,6 +89,8 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
+ if (dma_iommu_map_bypass(dev, attrs))
+ return dma_direct_map_sg(dev, sglist, nelems, direction, attrs);
return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
device_to_mask(dev), direction, attrs);
}
@@ -68,10 +99,20 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
unsigned long attrs)
{
- ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ if (!dma_iommu_map_bypass(dev, attrs))
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
direction, attrs);
}
+static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+
+ return phb->controller_ops.iommu_bypass_supported &&
+ phb->controller_ops.iommu_bypass_supported(pdev, mask);
+}
+
/* We support DMA to/from any memory page via the iommu */
int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
@@ -83,32 +124,48 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 0;
}
+ if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+ dev->archdata.iommu_bypass = true;
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ return 1;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
mask, tbl->it_offset << tbl->it_page_shift);
return 0;
- } else
- return 1;
+ }
+
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ dev->archdata.iommu_bypass = false;
+ return 1;
}
-static u64 dma_iommu_get_required_mask(struct device *dev)
+u64 dma_iommu_get_required_mask(struct device *dev)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
+
if (!tbl)
return 0;
+ if (dev_is_pci(dev)) {
+ u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+ if (dma_iommu_bypass_supported(dev, bypass_mask))
+ return bypass_mask;
+ }
+
mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
mask += mask - 1;
return mask;
}
-struct dma_map_ops dma_iommu_ops = {
+const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
new file mode 100644
index 000000000000..ffbbbc432612
--- /dev/null
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <asm/machdep.h>
+
+void arch_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ ppc_md.dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(arch_dma_set_mask);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 7d5fc9751622..132d61c91629 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -10,101 +10,12 @@
* option) any later version.
*
*/
-
-#include <linux/dma-direct.h>
#include <linux/memblock.h>
-#include <linux/pfn.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-
#include <asm/machdep.h>
#include <asm/swiotlb.h>
-#include <asm/dma.h>
unsigned int ppc_swiotlb_enable;
-static u64 swiotlb_powerpc_get_required(struct device *dev)
-{
- u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
-
- end = memblock_end_of_DRAM();
- if (max_direct_dma_addr && end > max_direct_dma_addr)
- end = max_direct_dma_addr;
- end += get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-/*
- * At the moment, all platforms that use this code only require
- * swiotlb to be used if we're operating on HIGHMEM. Since
- * we don't ever call anything other than map_sg, unmap_sg,
- * map_page, and unmap_page on highmem, use normal dma_ops
- * for everything else.
- */
-const struct dma_map_ops powerpc_swiotlb_dma_ops = {
- .alloc = __dma_nommu_alloc_coherent,
- .free = __dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_direct_map_sg,
- .unmap_sg = dma_direct_unmap_sg,
- .dma_supported = swiotlb_dma_supported,
- .map_page = dma_direct_map_page,
- .unmap_page = dma_direct_unmap_page,
- .sync_single_for_cpu = dma_direct_sync_single_for_cpu,
- .sync_single_for_device = dma_direct_sync_single_for_device,
- .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
- .sync_sg_for_device = dma_direct_sync_sg_for_device,
- .get_required_mask = swiotlb_powerpc_get_required,
-};
-
-void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
-{
- struct pci_controller *hose;
- struct dev_archdata *sd;
-
- hose = pci_bus_to_host(pdev->bus);
- sd = &pdev->dev.archdata;
- sd->max_direct_dma_addr =
- hose->dma_window_base_cur + hose->dma_window_size;
-}
-
-static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct dev_archdata *sd;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- sd = &dev->archdata;
- sd->max_direct_dma_addr = 0;
-
- /* May need to bounce if the device can't address all of DRAM */
- if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
- set_dma_ops(dev, &powerpc_swiotlb_dma_ops);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
- .notifier_call = ppc_swiotlb_bus_notify,
- .priority = 0,
-};
-
-int __init swiotlb_setup_bus_notifier(void)
-{
- bus_register_notifier(&platform_bus_type,
- &ppc_swiotlb_plat_bus_notifier);
- return 0;
-}
-
void __init swiotlb_detect_4g(void)
{
if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
deleted file mode 100644
index b1903ebb2e9c..000000000000
--- a/arch/powerpc/kernel/dma.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
- *
- * Provide default implementations of the DMA mapping callbacks for
- * directly mapped busses.
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/bug.h>
-#include <asm/machdep.h>
-#include <asm/swiotlb.h>
-#include <asm/iommu.h>
-
-/*
- * Generic direct DMA implementation
- *
- * This implementation supports a per-device offset that can be applied if
- * the address at which memory is visible to devices is not 0. Platform code
- * can set archdata.dma_data to an unsigned long holding the offset. By
- * default the offset is PCI_DRAM_OFFSET.
- */
-
-static u64 __maybe_unused get_pfn_limit(struct device *dev)
-{
- u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
- struct dev_archdata __maybe_unused *sd = &dev->archdata;
-
-#ifdef CONFIG_SWIOTLB
- if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops)
- pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
-#endif
-
- return pfn;
-}
-
-static int dma_nommu_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PPC64
- u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
-
- /* Limit fits in the mask, we are good */
- if (mask >= limit)
- return 1;
-
-#ifdef CONFIG_FSL_SOC
- /*
- * Freescale gets another chance via ZONE_DMA, however
- * that will have to be refined if/when they support iommus
- */
- return 1;
-#endif
- /* Sorry ... */
- return 0;
-#else
- return 1;
-#endif
-}
-
-#ifndef CONFIG_NOT_COHERENT_CACHE
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- void *ret;
- struct page *page;
- int node = dev_to_node(dev);
-#ifdef CONFIG_FSL_SOC
- u64 pfn = get_pfn_limit(dev);
- int zone;
-
- /*
- * This code should be OK on other platforms, but we have drivers that
- * don't set coherent_dma_mask. As a workaround we just ifdef it. This
- * whole routine needs some serious cleanup.
- */
-
- zone = dma_pfn_limit_to_zone(pfn);
- if (zone < 0) {
- dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
- __func__, pfn);
- return NULL;
- }
-
- switch (zone) {
-#ifdef CONFIG_ZONE_DMA
- case ZONE_DMA:
- flag |= GFP_DMA;
- break;
-#endif
- };
-#endif /* CONFIG_FSL_SOC */
-
- page = alloc_pages_node(node, flag, get_order(size));
- if (page == NULL)
- return NULL;
- ret = page_address(page);
- memset(ret, 0, size);
- *dma_handle = __pa(ret) + get_dma_offset(dev);
-
- return ret;
-}
-
-void __dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
-#endif /* !CONFIG_NOT_COHERENT_CACHE */
-
-static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* The coherent mask may be smaller than the real mask, check if
- * we can really use the direct ops
- */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_alloc_coherent(dev, size, dma_handle,
- flag, attrs);
-
- /* Ok we can't ... do we have an iommu ? If not, fail */
- iommu = get_iommu_table_base(dev);
- if (!iommu)
- return NULL;
-
- /* Try to use the iommu */
- return iommu_alloc_coherent(dev, iommu, size, dma_handle,
- dev->coherent_dma_mask, flag,
- dev_to_node(dev));
-}
-
-static void dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct iommu_table *iommu;
-
- /* See comments in dma_nommu_alloc_coherent() */
- if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask))
- return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle,
- attrs);
- /* Maybe we used an iommu ... */
- iommu = get_iommu_table_base(dev);
-
- /* If we hit that we should have never allocated in the first
- * place so how come we are freeing ?
- */
- if (WARN_ON(!iommu))
- return;
- iommu_free_coherent(iommu, size, vaddr, dma_handle);
-}
-
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
- unsigned long pfn;
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
- pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
- return remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
- sg->dma_length = sg->length;
-
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- continue;
-
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
- }
-
- return nents;
-}
-
-static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static u64 dma_nommu_get_required_mask(struct device *dev)
-{
- u64 end, mask;
-
- end = memblock_end_of_DRAM() + get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync_page(page, offset, size, dir);
-
- return page_to_phys(page) + offset + get_dma_offset(dev);
-}
-
-static inline void dma_nommu_unmap_page(struct device *dev,
- dma_addr_t dma_address,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync(bus_to_virt(dma_address), size, direction);
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-static inline void dma_nommu_sync_sg(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static inline void dma_nommu_sync_single(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- __dma_sync(bus_to_virt(dma_handle), size, direction);
-}
-#endif
-
-const struct dma_map_ops dma_nommu_ops = {
- .alloc = dma_nommu_alloc_coherent,
- .free = dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_nommu_map_sg,
- .unmap_sg = dma_nommu_unmap_sg,
- .dma_supported = dma_nommu_dma_supported,
- .map_page = dma_nommu_map_page,
- .unmap_page = dma_nommu_unmap_page,
- .get_required_mask = dma_nommu_get_required_mask,
-#ifdef CONFIG_NOT_COHERENT_CACHE
- .sync_single_for_cpu = dma_nommu_sync_single,
- .sync_single_for_device = dma_nommu_sync_single,
- .sync_sg_for_cpu = dma_nommu_sync_sg,
- .sync_sg_for_device = dma_nommu_sync_sg,
-#endif
-};
-EXPORT_SYMBOL(dma_nommu_ops);
-
-int dma_set_coherent_mask(struct device *dev, u64 mask)
-{
- if (!dma_supported(dev, mask)) {
- /*
- * We need to special case the direct DMA ops which can
- * support a fallback for coherent allocations. There
- * is no dma_op->set_coherent_mask() so we have to do
- * things the hard way:
- */
- if (get_dma_ops(dev) != &dma_nommu_ops ||
- get_iommu_table_base(dev) == NULL ||
- !dma_iommu_dma_supported(dev, mask))
- return -EIO;
- }
- dev->coherent_dma_mask = mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_coherent_mask);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (ppc_md.dma_set_mask)
- return ppc_md.dma_set_mask(dev, dma_mask);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_set_mask)
- return phb->controller_ops.dma_set_mask(pdev, dma_mask);
- }
-
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-u64 __dma_get_required_mask(struct device *dev)
-{
- const struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
-
- if (dma_ops->get_required_mask)
- return dma_ops->get_required_mask(dev);
-
- return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
-}
-
-u64 dma_get_required_mask(struct device *dev)
-{
- if (ppc_md.dma_get_required_mask)
- return ppc_md.dma_get_required_mask(dev);
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_controller *phb = pci_bus_to_host(pdev->bus);
- if (phb->controller_ops.dma_get_required_mask)
- return phb->controller_ops.dma_get_required_mask(pdev);
- }
-
- return __dma_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
-static int __init dma_init(void)
-{
-#ifdef CONFIG_IBMVIO
- dma_debug_add_bus(&vio_bus_type);
-#endif
-
- return 0;
-}
-fs_initcall(dma_init);
-
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8be3721d9302..e49bd5efcfe6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -666,8 +666,10 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
m = &dt_cpu_feature_match_table[i];
if (!strcmp(f->name, m->name)) {
known = true;
- if (m->enable(f))
+ if (m->enable(f)) {
+ cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
break;
+ }
pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
f->name);
@@ -675,17 +677,12 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
}
}
- if (!known && enable_unknown) {
- if (!feat_try_enable_unknown(f)) {
- pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
- f->name);
- return false;
- }
+ if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) {
+ pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+ f->name);
+ return false;
}
- if (m->cpu_ftr_bit_mask)
- cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
-
if (known)
pr_debug("enabling: %s\n", f->name);
else
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index ae05203eb4de..289c0b37d845 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -109,7 +109,14 @@ EXPORT_SYMBOL(eeh_subsystem_flags);
* frozen count in last hour exceeds this limit, the PE will
* be forced to be offline permanently.
*/
-int eeh_max_freezes = 5;
+u32 eeh_max_freezes = 5;
+
+/*
+ * Controls whether a recovery event should be scheduled when an
+ * isolated device is discovered. This is only really useful for
+ * debugging problems with the EEH core.
+ */
+bool eeh_debugfs_no_recover;
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
@@ -823,15 +830,15 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
- eeh_unfreeze_pe(pe, false);
+ eeh_unfreeze_pe(pe);
if (!(pe->type & EEH_PE_VF))
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
break;
case pcie_hot_reset:
eeh_pe_mark_isolated(pe);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -840,7 +847,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
break;
case pcie_warm_reset:
eeh_pe_mark_isolated(pe);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
@@ -848,7 +855,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
};
@@ -877,6 +884,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
return NULL;
}
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+ struct eeh_pe *pe;
+ int state;
+
+ eeh_for_each_pe(root, pe) {
+ if (eeh_pe_passed(pe)) {
+ state = eeh_ops->get_state(pe, NULL);
+ if (state &
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+ pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+ pe->phb->global_number, pe->addr);
+ eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+ }
+ }
+ }
+}
+
/**
* eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
@@ -889,12 +914,12 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
*
* This function will attempt to reset a PE three times before failing.
*/
-int eeh_pe_reset_full(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
{
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
- int i, state, ret;
+ int i, state = 0, ret;
/*
* Determine the type of reset to perform - hot or fundamental.
@@ -911,32 +936,42 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
/* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
- ret = eeh_pe_reset(pe, type);
- if (ret)
- break;
-
- ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
- if (ret)
- break;
+ ret = eeh_pe_reset(pe, type, include_passed);
+ if (!ret)
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+ include_passed);
+ if (ret) {
+ ret = -EIO;
+ pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+ state, pe->phb->global_number, pe->addr, i + 1);
+ continue;
+ }
+ if (i)
+ pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+ pe->phb->global_number, pe->addr, i + 1);
/* Wait until the PE is in a functioning state */
state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (state < 0) {
- pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
- __func__, pe->phb->global_number, pe->addr);
+ pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+ pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
if (eeh_state_active(state))
break;
-
- /* Set error in case this is our last attempt */
- ret = -EIO;
- pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
- __func__, state, pe->phb->global_number, pe->addr, (i + 1));
+ else
+ pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+ pe->phb->global_number, pe->addr, state, i + 1);
}
- eeh_pe_state_clear(pe, reset_state);
+ /* Resetting the PE may have unfrozen child PEs. If those PEs have been
+ * (potentially) passed through to a guest, re-freeze them:
+ */
+ if (!include_passed)
+ eeh_pe_refreeze_passed(pe);
+
+ eeh_pe_state_clear(pe, reset_state, true);
return ret;
}
@@ -1309,7 +1344,7 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode &= ~EEH_DEV_SYSFS;
}
-int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
+int eeh_unfreeze_pe(struct eeh_pe *pe)
{
int ret;
@@ -1327,10 +1362,6 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
return ret;
}
- /* Clear software isolated state */
- if (sw_state && (pe->state & EEH_PE_ISOLATED))
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
return ret;
}
@@ -1382,7 +1413,10 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
}
}
- return eeh_unfreeze_pe(pe, true);
+ ret = eeh_unfreeze_pe(pe);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+ return ret;
}
/**
@@ -1612,13 +1646,12 @@ int eeh_pe_get_state(struct eeh_pe *pe)
}
EXPORT_SYMBOL_GPL(eeh_pe_get_state);
-static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
{
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
int ret = 0;
- /* Restore config space */
eeh_pe_restore_bars(pe);
/*
@@ -1639,7 +1672,14 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
}
/* The PE is still in frozen state */
- return eeh_unfreeze_pe(pe, true);
+ if (include_passed || !eeh_pe_passed(pe)) {
+ ret = eeh_unfreeze_pe(pe);
+ } else
+ pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+ pe->phb->global_number, pe->addr);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
+ return ret;
}
@@ -1652,7 +1692,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
* indicated type, either fundamental reset or hot reset.
* PE reset is the most important part for error recovery.
*/
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
{
int ret = 0;
@@ -1666,11 +1706,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
switch (option) {
case EEH_RESET_DEACTIVATE:
ret = eeh_ops->reset(pe, option);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
if (ret)
break;
- ret = eeh_pe_reenable_devices(pe);
+ ret = eeh_pe_reenable_devices(pe, include_passed);
break;
case EEH_RESET_HOT:
case EEH_RESET_FUNDAMENTAL:
@@ -1796,22 +1836,64 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val)
return 0;
}
-static int eeh_freeze_dbgfs_set(void *data, u64 val)
-{
- eeh_max_freezes = val;
- return 0;
-}
+DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
-static int eeh_freeze_dbgfs_get(void *data, u64 *val)
+static ssize_t eeh_force_recover_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
{
- *val = eeh_max_freezes;
- return 0;
+ struct pci_controller *hose;
+ uint32_t phbid, pe_no;
+ struct eeh_pe *pe;
+ char buf[20];
+ int ret;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ /*
+ * When PE is NULL the event is a "special" event. Rather than
+ * recovering a specific PE it forces the EEH core to scan for failed
+ * PHBs and recovers each. This needs to be done before any device
+ * recoveries can occur.
+ */
+ if (!strncmp(buf, "hwcheck", 7)) {
+ __eeh_send_failure_event(NULL);
+ return count;
+ }
+
+ ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
+ if (ret != 2)
+ return -EINVAL;
+
+ hose = pci_find_controller_for_domain(phbid);
+ if (!hose)
+ return -ENODEV;
+
+ /* Retrieve PE */
+ pe = eeh_pe_get(hose, pe_no, 0);
+ if (!pe)
+ return -ENODEV;
+
+ /*
+ * We don't do any state checking here since the detection
+ * process is async to the recovery process. The recovery
+ * thread *should* not break even if we schedule a recovery
+ * from an odd state (e.g. PE removed, or recovery of a
+ * non-isolated PE)
+ */
+ __eeh_send_failure_event(pe);
+
+ return ret < 0 ? ret : count;
}
-DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
- eeh_enable_dbgfs_set, "0x%llx\n");
-DEFINE_DEBUGFS_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
- eeh_freeze_dbgfs_set, "0x%llx\n");
+static const struct file_operations eeh_force_recover_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_force_recover_write,
+};
#endif
static int __init eeh_init_proc(void)
@@ -1822,9 +1904,15 @@ static int __init eeh_init_proc(void)
debugfs_create_file_unsafe("eeh_enable", 0600,
powerpc_debugfs_root, NULL,
&eeh_enable_dbgfs_ops);
- debugfs_create_file_unsafe("eeh_max_freezes", 0600,
- powerpc_debugfs_root, NULL,
- &eeh_freeze_dbgfs_ops);
+ debugfs_create_u32("eeh_max_freezes", 0600,
+ powerpc_debugfs_root, &eeh_max_freezes);
+ debugfs_create_bool("eeh_disable_recovery", 0600,
+ powerpc_debugfs_root,
+ &eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_force_recover", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_force_recover_fops);
+ eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 201943d54a6e..9c68f0837385 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -26,6 +26,7 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <asm/pci-bridge.h>
+#include <asm/debugfs.h>
#include <asm/ppc-pci.h>
@@ -113,7 +114,7 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n",
+ pr_info("PCI: %s addr range %d [%pap-%pap]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
cnt++;
@@ -157,10 +158,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
-#ifdef DEBUG
pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
&alo, &ahi, pci_name(dev));
-#endif
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -240,6 +239,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
+ pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
+ &piar->addr_lo, &piar->addr_hi, pci_name(dev));
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -298,9 +299,30 @@ void eeh_addr_cache_build(void)
eeh_addr_cache_insert_dev(dev);
eeh_sysfs_add_device(dev);
}
+}
-#ifdef DEBUG
- /* Verify tree built up above, echo back the list of addrs. */
- eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
+static int eeh_addr_cache_show(struct seq_file *s, void *v)
+{
+ struct pci_io_addr_range *piar;
+ struct rb_node *n;
+
+ spin_lock(&pci_io_addr_cache_root.piar_lock);
+ for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ seq_printf(s, "%s addr range [%pap-%pap]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
+ }
+ spin_unlock(&pci_io_addr_cache_root.piar_lock);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
+
+void eeh_cache_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("eeh_address_cache", 0400,
+ powerpc_debugfs_root, NULL,
+ &eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 99eab7bc7edc..89623962c727 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
* support EEH. So we just care about PCI devices for
* simplicity here.
*/
- if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
-
- /*
- * We rely on count-based pcibios_release_device() to
- * detach permanently offlined PEs. Unfortunately, that's
- * not reliable enough. We might have the permanently
- * offlined PEs attached, but we needn't take care of
- * them and their child devices.
- */
- if (eeh_dev_removed(edev))
+ if (!eeh_edev_actionable(edev) ||
+ (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
return NULL;
if (rmv_data) {
- if (eeh_pe_passed(edev->pe))
- return NULL;
driver = eeh_pcid_get(dev);
if (driver) {
if (driver->err_handler &&
@@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
}
/* Remove it from PCI subsystem */
- pr_debug("EEH: Removing %s without EEH sensitive driver\n",
- pci_name(dev));
+ pr_info("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
edev->mode |= EEH_DEV_DISCONNECTED;
if (rmv_data)
rmv_data->removed_dev_count++;
@@ -591,34 +580,22 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
* PE reset (for 3 times), we try to clear the frozen state
* for 3 times as well.
*/
-static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
{
- bool clear_sw_state = *(bool *)flag;
- int i, rc = 1;
-
- for (i = 0; rc && i < 3; i++)
- rc = eeh_unfreeze_pe(pe, clear_sw_state);
+ struct eeh_pe *pe;
+ int i;
- /* Stop immediately on any errors */
- if (rc) {
- pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
- __func__, rc, pe->phb->global_number, pe->addr);
- return (void *)pe;
+ eeh_for_each_pe(root, pe) {
+ if (include_passed || !eeh_pe_passed(pe)) {
+ for (i = 0; i < 3; i++)
+ if (!eeh_unfreeze_pe(pe))
+ break;
+ if (i >= 3)
+ return -EIO;
+ }
}
-
- return NULL;
-}
-
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
- bool clear_sw_state)
-{
- void *rc;
-
- rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
- if (!rc)
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
- return rc ? -EIO : 0;
+ eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
+ return 0;
}
int eeh_pe_reset_and_recover(struct eeh_pe *pe)
@@ -636,16 +613,16 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
/* Issue reset */
- ret = eeh_pe_reset_full(pe);
+ ret = eeh_pe_reset_full(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
/* Unfreeze the PE */
ret = eeh_clear_pe_frozen_state(pe, true);
if (ret) {
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return ret;
}
@@ -653,7 +630,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
/* Clear recovery mode */
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
return 0;
}
@@ -676,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
+ struct eeh_pe *tmp_pe;
+ bool any_passed = false;
+
+ eeh_for_each_pe(pe, tmp_pe)
+ any_passed |= eeh_pe_passed(tmp_pe);
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -688,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+ if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
} else {
pci_lock_rescan_remove();
@@ -705,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- rc = eeh_pe_reset_full(pe);
+ rc = eeh_pe_reset_full(pe, false);
if (rc)
return rc;
@@ -744,11 +726,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
eeh_add_virt_device(edev);
} else {
if (!driver_eeh_aware)
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
pci_hp_add_devices(bus);
}
}
- eeh_pe_state_clear(pe, EEH_PE_KEEP);
+ eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
pe->tstamp = tstamp;
pe->freeze_count = cnt;
@@ -900,7 +882,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* is still in frozen state. Clear it before
* resuming the PE.
*/
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
result = PCI_ERS_RESULT_RECOVERED;
}
}
@@ -977,7 +959,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove();
@@ -987,7 +969,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
/**
@@ -1069,7 +1051,7 @@ void eeh_handle_special_event(void)
continue;
/* Notify all devices to be down */
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
eeh_pe_report(
"error_detected(permanent failure)", pe,
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 227e57f980df..539aca055d70 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -121,7 +121,7 @@ int eeh_event_init(void)
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct eeh_pe *pe)
+int __eeh_send_failure_event(struct eeh_pe *pe)
{
unsigned long flags;
struct eeh_event *event;
@@ -144,6 +144,20 @@ int eeh_send_failure_event(struct eeh_pe *pe)
return 0;
}
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ /*
+ * If we've manually supressed recovery events via debugfs
+ * then just drop it on the floor.
+ */
+ if (eeh_debugfs_no_recover) {
+ pr_err("EEH: Event dropped due to no_recover setting\n");
+ return 0;
+ }
+
+ return __eeh_send_failure_event(pe);
+}
+
/**
* eeh_remove_event - Remove EEH event from the queue
* @pe: Event binding to the PE
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 6fa2032e0594..8b578891f27c 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -657,62 +657,52 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
}
/**
- * __eeh_pe_state_clear - Clear state for the PE
+ * eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
- * @flag: state
+ * @state: state
+ * @include_passed: include passed-through devices?
*
* The function is used to clear the indicated state from the
* given PE. Besides, we also clear the check count of the PE
* as well.
*/
-static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
{
- int state = *((int *)flag);
+ struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
+ eeh_for_each_pe(root, pe) {
+ /* Keep the state of permanently removed PE intact */
+ if (pe->state & EEH_PE_REMOVED)
+ continue;
- pe->state &= ~state;
+ if (!include_passed && eeh_pe_passed(pe))
+ continue;
- /*
- * Special treatment on clearing isolated state. Clear
- * check count since last isolation and put all affected
- * devices to normal state.
- */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
+ pe->state &= ~state;
- pe->check_count = 0;
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (!pdev)
+ /*
+ * Special treatment on clearing isolated state. Clear
+ * check count since last isolation and put all affected
+ * devices to normal state.
+ */
+ if (!(state & EEH_PE_ISOLATED))
continue;
- pdev->error_state = pci_channel_io_normal;
- }
-
- /* Unblock PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state &= ~EEH_PE_CFG_BLOCKED;
+ pe->check_count = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
- return NULL;
-}
+ pdev->error_state = pci_channel_io_normal;
+ }
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+ /* Unblock PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state &= ~EEH_PE_CFG_BLOCKED;
+ }
}
/*
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index deed906dd8f1..3fa04dda1737 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -82,8 +82,9 @@ static ssize_t eeh_pe_state_store(struct device *dev,
if (!(edev->pe->state & EEH_PE_ISOLATED))
return count;
- if (eeh_unfreeze_pe(edev->pe, true))
+ if (eeh_unfreeze_pe(edev->pe))
return -EIO;
+ eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true);
return count;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0768dfd8a64e..b61cfd29c76f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -97,14 +97,11 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,_SRR1(r11)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,SAVED_KSP_LIMIT(r11)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -121,14 +118,11 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,crit_srr1@l(0)
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
+ /* set the stack limit to the current stack */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,saved_ksp_limit@l(0)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ rlwinm r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -157,7 +151,6 @@ transfer_to_handler:
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
addi r2,r12,-THREAD
- tovirt(r2,r2) /* set r2 to current */
beq 2f /* if from user, fix up THREAD.regs */
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
@@ -166,6 +159,9 @@ transfer_to_handler:
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
@@ -174,8 +170,7 @@ transfer_to_handler:
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -185,11 +180,6 @@ transfer_to_handler:
addi r12,r12,-1
stw r12,4(r11)
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9, r9)
- ACCOUNT_CPU_USER_ENTRY(r9, r11, r12)
-#endif
b 3f
@@ -201,9 +191,7 @@ transfer_to_handler:
ble- stack_ovf /* then the kernel stack overflowed */
5:
#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9,r9) /* check local flags */
- lwz r12,TI_LOCAL_FLAGS(r9)
+ lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
@@ -212,6 +200,7 @@ transfer_to_handler:
transfer_to_handler_cont:
3:
mflr r9
+ tovirt(r2, r2) /* set r2 to current */
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
@@ -275,11 +264,11 @@ reenable_mmu: /* re-enable mmu so we can */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
7: rlwinm r12,r12,0,~_TLF_SLEEPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
@@ -351,8 +340,7 @@ _GLOBAL(DoSyscall)
mtmsr r11
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
- CURRENT_THREAD_INFO(r10, r1)
- lwz r11,TI_FLAGS(r10)
+ lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
bne- syscall_dotrace
syscall_dotrace_cont:
@@ -385,13 +373,12 @@ ret_from_syscall:
lwz r3,GPR3(r1)
#endif
mr r6,r3
- CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
- lwz r9,TI_FLAGS(r12)
+ lwz r9,TI_FLAGS(r2)
li r8,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- syscall_exit_work
@@ -438,8 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
andi. r4,r8,MSR_PR
beq 3f
- CURRENT_THREAD_INFO(r4, r1)
- ACCOUNT_CPU_USER_EXIT(r4, r5, r7)
+ ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
3:
#endif
lwz r4,_LINK(r1)
@@ -532,7 +518,7 @@ syscall_exit_work:
/* Clear per-syscall TIF flags if any are set. */
li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
+ addi r12,r2,TI_FLAGS
3: lwarx r8,0,r12
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -540,7 +526,6 @@ syscall_exit_work:
#endif
stwcx. r8,0,r12
bne- 3b
- subi r12,r12,TI_FLAGS
4: /* Anything which requires enabling interrupts? */
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -745,6 +730,9 @@ fast_exception_return:
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r11)
REST_GPR(10, r11)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -819,8 +807,7 @@ ret_from_except:
user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
@@ -832,18 +819,14 @@ restore_user:
andis. r10,r0,DBCR0_IDM@h
bnel- load_dbcr0
#endif
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- CURRENT_THREAD_INFO(r9, r1)
- ACCOUNT_CPU_USER_EXIT(r9, r10, r11)
-#endif
+ ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
b restore
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_FLAGS(r9)
+ lwz r8,TI_FLAGS(r2)
andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
beq+ 1f
@@ -869,7 +852,7 @@ resume_kernel:
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
+ addi r5,r2,TI_FLAGS
0: lwarx r8,0,r5
andc r8,r8,r11
#ifdef CONFIG_IBM405_ERR77
@@ -881,7 +864,7 @@ resume_kernel:
#ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r9)
+ lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
andi. r8,r8,_TIF_NEED_RESCHED
@@ -897,8 +880,7 @@ resume_kernel:
bl trace_hardirqs_off
#endif
1: bl preempt_schedule_irq
- CURRENT_THREAD_INFO(r9, r1)
- lwz r3,TI_FLAGS(r9)
+ lwz r3,TI_FLAGS(r2)
andi. r0,r3,_TIF_NEED_RESCHED
bne- 1b
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -982,6 +964,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
mtcrf 0xFF,r10
mtlr r11
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
/*
* Once we put values in SRR0 and SRR1, we are in a state
* where exceptions are not recoverable, since taking an
@@ -997,9 +982,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r9
REST_4GPRS(9, r1)
@@ -1021,6 +1003,9 @@ exc_exit_restart_end:
mtlr r11
lwz r10,_CCR(r1)
mtcrf 0xff,r10
+ /* Clear the exception_marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r1)
REST_2GPRS(9, r1)
.globl exc_exit_restart
exc_exit_restart:
@@ -1166,10 +1151,6 @@ ret_from_debug_exc:
mfspr r9,SPRN_SPRG_THREAD
lwz r10,SAVED_KSP_LIMIT(r1)
stw r10,KSP_LIMIT(r9)
- lwz r9,THREAD_INFO-THREAD(r9)
- CURRENT_THREAD_INFO(r10, r1)
- lwz r10,TI_PREEMPT(r10)
- stw r10,TI_PREEMPT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
@@ -1201,8 +1182,7 @@ load_dbcr0:
lis r11,global_dbcr0@ha
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
+ lwz r9,TASK_CPU(r2)
slwi r9,r9,3
add r11,r11,r9
#endif
@@ -1242,8 +1222,7 @@ recheck:
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
+ lwz r9,TI_FLAGS(r2)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
andi. r0,r9,_TIF_USER_WORK_MASK
@@ -1292,10 +1271,13 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
- beq 4f
+ beq 5f
SAVE_NVGPRS(r1)
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
+5: mfspr r2,SPRN_SPRG_THREAD
+ addi r2,r2,-THREAD
+ tovirt(r2,r2) /* set back r2 to current */
4: addi r3,r1,STACK_FRAME_OVERHEAD
bl unrecoverable_exception
/* shouldn't return */
@@ -1335,7 +1317,7 @@ _GLOBAL(enter_rtas)
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
- mtspr SPRN_SPRG_RTAS,r7
+ stw r7, THREAD + RTAS_SP(r2)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI
@@ -1344,7 +1326,8 @@ _GLOBAL(enter_rtas)
lwz r9,8(r9) /* original msr value */
addi r1,r1,INT_FRAME_SIZE
li r0,0
- mtspr SPRN_SPRG_RTAS,r0
+ tophys(r7, r2)
+ stw r0, THREAD + RTAS_SP(r7)
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI /* return to caller */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 435927f549c4..15c67d2c0534 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -166,7 +166,7 @@ system_call: /* label this so stack traces look sane */
li r10,IRQS_ENABLED
std r10,SOFTE(r1)
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_DOTRACE
bne .Lsyscall_dotrace /* does not return */
@@ -213,7 +213,7 @@ system_call: /* label this so stack traces look sane */
ld r3,RESULT(r1)
#endif
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
@@ -236,18 +236,14 @@ system_call_exit:
/*
* Disable interrupts so current_thread_info()->flags can't change,
* and so that we don't get interrupted after loading SRR0/1.
+ *
+ * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
+ * could fault on the load of the TI_FLAGS below.
*/
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- /*
- * For performance reasons we clear RI the same time that we
- * clear EE. We only need to clear RI just before we restore r13
- * below, but batching it with EE saves us one expensive mtmsrd call.
- * We have to be careful to restore RI if we branch anywhere from
- * here (eg syscall_exit_work).
- */
- li r11,0
+ li r11,MSR_RI
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -263,15 +259,7 @@ system_call_exit:
bne 3f
#endif
2: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
@@ -287,6 +275,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+#ifdef CONFIG_PPC_BOOK3S
+ /*
+ * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
+ * this later, but testing shows that doing it here causes less slow
+ * down than doing it closer to the rfid.
+ */
+ li r11,0
+ mtmsrd r11,1
+#endif
+
beq- 1f
ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
@@ -348,7 +346,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Repopulate r9 and r10 for the syscall path */
addi r9,r1,STACK_FRAME_OVERHEAD
- CURRENT_THREAD_INFO(r10, r1)
+ ld r10, PACA_THREAD_INFO(r13)
ld r10,TI_FLAGS(r10)
cmpldi r0,NR_syscalls
@@ -363,10 +361,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
b .Lsyscall_exit
.Lsyscall_exit_work:
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
@@ -695,7 +689,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
2:
#endif /* CONFIG_PPC_BOOK3S_64 */
- CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
+ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
top of the kernel stack. */
@@ -746,7 +740,7 @@ _GLOBAL(ret_from_except_lite)
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r3,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3E
ld r10,PACACURRENT(r13)
@@ -860,7 +854,7 @@ resume_kernel:
1: bl preempt_schedule_irq
/* Re-test flags and eventually loop */
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r4,TI_FLAGS(r9)
andi. r0,r4,_TIF_NEED_RESCHED
bne 1b
@@ -1002,6 +996,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r2,_NIP(r1)
mtspr SPRN_SRR0,r2
+ /*
+ * Leaving a stale exception_marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ li r2,0
+ std r2,STACK_FRAME_OVERHEAD-16(r1)
+
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 52ca2471ee1a..d252f4663a23 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -21,10 +21,9 @@
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
_GLOBAL(epapr_ev_idle)
- CURRENT_THREAD_INFO(r3, r1)
- PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ PPC_LL r4, TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
- PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */
wrteei 1
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index afb638778f44..49381f32b374 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -77,17 +77,6 @@ special_reg_save:
andi. r3,r3,MSR_PR
bnelr
- /* Copy info into temporary exception thread info */
- ld r11,PACAKSAVE(r13)
- CURRENT_THREAD_INFO(r11, r11)
- CURRENT_THREAD_INFO(r12, r1)
- ld r10,TI_FLAGS(r11)
- std r10,TI_FLAGS(r12)
- ld r10,TI_PREEMPT(r11)
- std r10,TI_PREEMPT(r12)
- ld r10,TI_TASK(r11)
- std r10,TI_TASK(r12)
-
/*
* Advance to the next TLB exception frame for handler
* types that don't do it automatically.
@@ -349,6 +338,7 @@ ret_from_mc_except:
#define GEN_BTB_FLUSH
#define CRIT_BTB_FLUSH
#define DBG_BTB_FLUSH
+#define MC_BTB_FLUSH
#define GDBELL_BTB_FLUSH
#endif
@@ -504,7 +494,7 @@ exc_##n##_bad_stack: \
* interrupts happen before the wait instruction.
*/
#define CHECK_NAPPING() \
- CURRENT_THREAD_INFO(r11, r1); \
+ ld r11, PACA_THREAD_INFO(r13); \
ld r10,TI_LOCAL_FLAGS(r11); \
andi. r9,r10,_TLF_NAPPING; \
beq+ 1f; \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9e253ce27e08..a5b8fbae56a0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -68,6 +68,14 @@ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
+
+#ifdef CONFIG_PPC_POWERNV
+ .globl start_real_trampolines
+ .globl end_real_trampolines
+ .globl start_virt_trampolines
+ .globl end_virt_trampolines
+#endif
+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
@@ -566,8 +574,36 @@ EXC_COMMON_BEGIN(mce_return)
RFI_TO_KERNEL
b .
-EXC_REAL(data_access, 0x300, 0x80)
-EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
+EXC_REAL_BEGIN(data_access, 0x300, 0x80)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b tramp_real_data_access
+EXC_REAL_END(data_access, 0x300, 0x80)
+
+TRAMP_REAL_BEGIN(tramp_real_data_access)
+EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x300)
+ /*
+ * DAR/DSISR must be read before setting MSR[RI], because
+ * a d-side MCE will clobber those registers so is not
+ * recoverable if they are live.
+ */
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2(data_access_common, EXC_STD)
+
+EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x300)
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2_RELON(data_access_common, EXC_STD)
+EXC_VIRT_END(data_access, 0x4300, 0x80)
+
TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
EXC_COMMON_BEGIN(data_access_common)
@@ -575,11 +611,8 @@ EXC_COMMON_BEGIN(data_access_common)
* Here r13 points to the paca, r9 contains the saved CR,
* SRR0 and SRR1 are saved in r11 and r12,
* r9 - r13 are saved in paca->exgen.
+ * EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
RECONCILE_IRQ_STATE(r10, r11)
ld r12,_MSR(r1)
@@ -596,18 +629,29 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXSLB)
+ b tramp_real_data_access_slb
EXC_REAL_END(data_access_slb, 0x380, 0x80)
+TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
+EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+EXCEPTION_PROLOG_2(data_access_slb_common, EXC_STD)
+
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXSLB)
+EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+EXCEPTION_PROLOG_2_RELON(data_access_slb_common, EXC_STD)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
EXC_COMMON_BEGIN(data_access_slb_common)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXSLB+EX_DAR(r13)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
ld r4,PACA_EXSLB+EX_DAR(r13)
std r4,_DAR(r1)
@@ -703,14 +747,30 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x500)
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
-EXC_REAL(alignment, 0x600, 0x100)
-EXC_VIRT(alignment, 0x4600, 0x100, 0x600)
-TRAMP_KVM(PACA_EXGEN, 0x600)
-EXC_COMMON_BEGIN(alignment_common)
+EXC_REAL_BEGIN(alignment, 0x600, 0x100)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x600)
mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2(alignment_common, EXC_STD)
+EXC_REAL_END(alignment, 0x600, 0x100)
+
+EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
+SET_SCRATCH0(r13) /* save r13 */
+EXCEPTION_PROLOG_0(PACA_EXGEN)
+EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x600)
+ mfspr r10,SPRN_DAR
+ mfspr r11,SPRN_DSISR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ stw r11,PACA_EXGEN+EX_DSISR(r13)
+EXCEPTION_PROLOG_2_RELON(alignment_common, EXC_STD)
+EXC_VIRT_END(alignment, 0x4600, 0x100)
+
+TRAMP_KVM(PACA_EXGEN, 0x600)
+EXC_COMMON_BEGIN(alignment_common)
EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
@@ -1629,7 +1689,7 @@ do_hash_page:
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 05b08db3901d..ce6a972f2584 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -261,7 +261,7 @@ __secondary_hold_acknowledge:
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
+ lwz r11,TASK_STACK-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
@@ -352,9 +352,8 @@ i##n: \
* registers that might have bad values includes all the GPRs
* and all the BATs. We indicate that we are in RTAS by putting
* a non-zero value, the address of the exception frame to use,
- * in SPRG2. The machine check handler checks SPRG2 and uses its
- * value if it is non-zero. If we ever needed to free up SPRG2,
- * we could use a field in the thread_info or thread_struct instead.
+ * in thread.rtas_sp. The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
* (Other exception handlers assume that r1 is a valid kernel stack
* pointer when we take an exception from supervisor mode.)
* -- paulus.
@@ -365,16 +364,15 @@ i##n: \
mtspr SPRN_SPRG_SCRATCH1,r11
mfcr r10
#ifdef CONFIG_PPC_CHRP
- mfspr r11,SPRN_SPRG_RTAS
- cmpwi 0,r11,0
- bne 7f
+ mfspr r11, SPRN_SPRG_THREAD
+ lwz r11, RTAS_SP(r11)
+ cmpwi cr1, r11, 0
+ bne cr1, 7f
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
- mfspr r4,SPRN_SPRG_RTAS
- cmpwi cr1,r4,0
bne cr1,1f
#endif
EXC_XFER_STD(0x200, machine_check_exception)
@@ -500,18 +498,22 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT|_PAGE_EXEC /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+#endif
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#else
+ li r1,_PAGE_PRESENT | _PAGE_EXEC
+#endif
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+#endif
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
@@ -519,20 +521,10 @@ InstructionTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ ori r1, r1, 0xe05 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 2 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -576,16 +568,16 @@ DataLoadTLBMiss:
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -593,20 +585,16 @@ DataLoadTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -660,16 +648,16 @@ DataStoreTLBMiss:
mfspr r3,SPRN_DMISS
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
- lwz r2,PGDIR(r2)
+ mfspr r2, SPRN_SPRG_PGDIR
+#ifdef CONFIG_SWAP
+ li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+#else
+ li r1, _PAGE_RW | _PAGE_PRESENT
+#endif
bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
@@ -677,12 +665,10 @@ DataStoreTLBMiss:
lwz r0,0(r2) /* get linux-style pte */
andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
li r1,0xe05 /* clear out reserved bits & PP lsb */
@@ -845,12 +831,12 @@ __secondary_start:
bl init_idle_6xx
#endif /* CONFIG_PPC_BOOK3S_32 */
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- tophys(r1,r1)
- lwz r1,secondary_ti@l(r1)
- tophys(r2,r1)
- lwz r2,TI_TASK(r2)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ tophys(r2,r2)
+ lwz r2,secondary_current@l(r2)
+ tophys(r1,r2)
+ lwz r1,TASK_STACK(r1)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
@@ -865,8 +851,10 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
mtspr SPRN_SPRG_THREAD,r4
+#ifdef CONFIG_PPC_RTAS
li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
+#endif
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -950,8 +938,10 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
mtspr SPRN_SPRG_THREAD,r4
+#ifdef CONFIG_PPC_RTAS
li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+ stw r3, RTAS_SP(r4) /* 0 => not in RTAS */
+#endif
/* stack */
lis r1,init_thread_union@ha
@@ -1022,15 +1012,16 @@ _ENTRY(switch_mmu_context)
li r0,NUM_USER_SEGMENTS
mtctr r0
+ lwz r4, MM_PGD(r4)
#ifdef CONFIG_BDI_SWITCH
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is passed as second argument.
*/
- lwz r4,MM_PGD(r4)
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
+ tophys(r4, r4)
+ mtspr SPRN_SPRG_PGDIR, r4
li r4,0
isync
3:
@@ -1105,6 +1096,41 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+_ENTRY(update_bats)
+ lis r4, 1f@h
+ ori r4, r4, 1f@l
+ tophys(r4, r4)
+ mfmsr r6
+ mflr r7
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+ rlwinm r0, r6, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ mtspr SPRN_SRR0, r4
+ mtspr SPRN_SRR1, r3
+ SYNC
+ RFI
+1: bl clear_bats
+ lis r3, BATS@ha
+ addi r3, r3, BATS@l
+ tophys(r3, r3)
+ LOAD_BAT(0, r3, r4, r5)
+ LOAD_BAT(1, r3, r4, r5)
+ LOAD_BAT(2, r3, r4, r5)
+ LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+ LOAD_BAT(4, r3, r4, r5)
+ LOAD_BAT(5, r3, r4, r5)
+ LOAD_BAT(6, r3, r4, r5)
+ LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ mtmsr r3
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r6
+ SYNC
+ RFI
+
flush_tlbs:
lis r10, 0x40
1: addic. r10, r10, -0x1000
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index b19d78410511..a9c934f2319b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -115,7 +115,7 @@ _ENTRY(saved_ksp_limit)
andi. r11,r11,MSR_PR; \
beq 1f; \
mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
+ lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\
addi r1,r1,THREAD_SIZE; \
1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
tophys(r11,r1); \
@@ -158,7 +158,7 @@ _ENTRY(saved_ksp_limit)
beq 1f; \
/* COMING FROM USER MODE */ \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
tophys(r11,r11); \
stw r10,_CCR(r11); /* save various registers */\
@@ -953,9 +953,8 @@ _GLOBAL(set_context)
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is the second parameter.
*/
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+ lis r5, abatron_pteptrs@ha
+ stw r4, abatron_pteptrs@l + 0x4(r5)
#endif
sync
mtspr SPRN_PID,r3
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index bf23c19c92d6..37117ab11584 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1019,10 +1019,10 @@ _GLOBAL(start_secondary_47x)
/* Now we can get our task struct and real stack pointer */
- /* Get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* Get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* Current stack pointer */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4898e9491a1c..3fad8d499767 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -801,21 +801,19 @@ __secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Initialize the kernel stack */
- LOAD_REG_ADDR(r3, current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r14,r3,r28
- addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
- std r14,PACAKSAVE(r13)
-
- /* Do early setup for that CPU (SLB and hash table pointer) */
+ /*
+ * Do early setup for this CPU, in particular initialising the MMU so we
+ * can turn it on below. This is a call to C, which is OK, we're still
+ * running on the emergency stack.
+ */
bl early_setup_secondary
/*
- * setup the new stack pointer, but *don't* use this until
- * translation is on.
+ * The primary has initialized our kernel stack for us in the paca, grab
+ * it and put it in r1. We must *not* use it until we turn on the MMU
+ * below, because it may not be inside the RMO.
*/
- mr r1, r14
+ ld r1, PACAKSAVE(r13)
/* Clear backchain so we get nice backtraces */
li r7,0
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 20cc816b3508..03c73b4c6435 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -142,7 +142,7 @@ instruction_counter:
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
+ lwz r11,TASK_STACK-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
@@ -292,6 +292,17 @@ SystemCall:
*/
EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+#endif
+
. = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
@@ -337,8 +348,8 @@ InstructionTLBMiss:
rlwinm r10, r10, 16, 0xfff8
cmpli cr0, r10, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_TEXT
- /* It is assumed that kernel code fits into the first 8M page */
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x0800000)@h
+ /* It is assumed that kernel code fits into the first 32M */
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
patch_site 0b, patch__itlbmiss_linmem_top
#endif
#endif
@@ -405,10 +416,20 @@ InstructionTLBMiss:
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
mtcr r11
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
+ patch_site 0f, patch__itlbmiss_linmem_top8
+
+ mfspr r10, SPRN_SRR0
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
+#else
/* Set 8M byte page and mark it valid */
li r11, MI_PS8MEG | MI_SVALID
- mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#endif
+ mtspr SPRN_MI_TWC, r11
ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
@@ -434,7 +455,7 @@ DataStoreTLBMiss:
#ifndef CONFIG_PIN_TLB_IMMR
cmpli cr6, r10, VIRT_IMMR_BASE@h
#endif
-0: cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h
+0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h
patch_site 0b, patch__dtlbmiss_linmem_top
mfspr r10, SPRN_M_TWB /* Get level 1 table */
@@ -494,16 +515,6 @@ DataStoreTLBMiss:
rfi
patch_site 0b, patch__dtlbmiss_exit_1
-#ifdef CONFIG_PERF_EVENTS
- patch_site 0f, patch__dtlbmiss_perf
-0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- addi r10, r10, 1
- stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
- mfspr r10, SPRN_SPRG_SCRATCH0
- mfspr r11, SPRN_SPRG_SCRATCH1
- rfi
-#endif
-
DTLBMissIMMR:
mtcr r11
/* Set 512k byte guarded page and mark it valid */
@@ -525,10 +536,29 @@ DTLBMissIMMR:
DTLBMissLinear:
mtcr r11
+ rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
+ patch_site 0f, patch__dtlbmiss_romem_top8
+
+0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha
+ rlwinm r11, r11, 0, 0xff800000
+ neg r10, r11
+ or r11, r11, r10
+ rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K
+ ori r11, r11, MI_PS512K | MI_SVALID
+ mfspr r10, SPRN_MD_EPN
+ rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */
+#else
/* Set 8M byte page and mark it valid */
li r11, MD_PS8MEG | MD_SVALID
+#endif
mtspr SPRN_MD_TWC, r11
- rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ patch_site 0f, patch__dtlbmiss_romem_top
+
+0: subis r11, r10, 0
+ rlwimi r10, r11, 11, _PAGE_RO
+#endif
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
@@ -551,11 +581,11 @@ InstructionTLBError:
mr r4,r12
andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
andis. r10,r9,SRR1_ISI_NOPT@h
- beq+ 1f
+ beq+ .Litlbie
tlbie r4
-itlbie:
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-1: EXC_XFER_LITE(0x400, handle_page_fault)
+.Litlbie:
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* This is the data TLB error on the MPC8xx. This could be due to
* many reasons, including a dirty update to a pte. We bail out to
@@ -577,10 +607,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */
stw r5,_DSISR(r11)
mfspr r4,SPRN_DAR
andis. r10,r5,DSISR_NOHPTE@h
- beq+ 1f
+ beq+ .Ldtlbie
tlbie r4
-dtlbie:
-1: li r10,RPN_PATTERN
+.Ldtlbie:
+ li r10,RPN_PATTERN
mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
@@ -603,8 +633,8 @@ DataBreakpoint:
mtspr SPRN_SPRG_SCRATCH1, r11
mfcr r10
mfspr r11, SPRN_SRR0
- cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l
- cmplwi cr7, r11, (itlbie - PAGE_OFFSET)@l
+ cmplwi cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
beq- cr0, 11f
beq- cr7, 11f
EXCEPTION_PROLOG_1
@@ -886,28 +916,11 @@ initial_mmu:
mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_TEXT
- lis r8, MI_RSV4I@h
- ori r8, r8, 0x1c00
-
- mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
-#endif
-
#ifdef CONFIG_PIN_TLB_DATA
oris r10, r10, MD_RSV4I@h
mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
#endif
- /* Now map the lower 8 Meg into the ITLB. */
- lis r8, KERNELBASE@h /* Create vaddr for TLB */
- ori r8, r8, MI_EVALID /* Mark it valid */
- mtspr SPRN_MI_EPN, r8
- li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID /* Make it valid */
- mtspr SPRN_MI_TWC, r8
- li r8, MI_BOOTINIT /* Create RPN for address 0 */
- mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
-
lis r8, MI_APG_INIT@h /* Set protection modes */
ori r8, r8, MI_APG_INIT@l
mtspr SPRN_MI_AP, r8
@@ -937,6 +950,34 @@ initial_mmu:
mtspr SPRN_MD_RPN, r8
#endif
+ /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
+#ifdef CONFIG_PIN_TLB_TEXT
+ lis r8, MI_RSV4I@h
+ ori r8, r8, 0x1c00
+#endif
+ li r9, 4 /* up to 4 pages of 8M */
+ mtctr r9
+ lis r9, KERNELBASE@h /* Create vaddr for TLB */
+ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r11, MI_BOOTINIT /* Create RPN for address 0 */
+ lis r12, _einittext@h
+ ori r12, r12, _einittext@l
+1:
+#ifdef CONFIG_PIN_TLB_TEXT
+ mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+ addi r8, r8, 0x100
+#endif
+
+ ori r0, r9, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r10
+ mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ addis r9, r9, 0x80
+ addis r11, r11, 0x80
+
+ cmpl cr0, r9, r12
+ bdnzf gt, 1b
+
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
* We should probably check/set other modes....later.
@@ -989,5 +1030,6 @@ swapper_pg_dir:
/* Room for two PTE table poiners, usually the kernel and current user
* pointer to their respective root page table (pgdir).
*/
+ .globl abatron_pteptrs
abatron_pteptrs:
.space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 306e26c073a0..1b22a8dea399 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -55,7 +55,7 @@ END_BTB_FLUSH_SECTION
beq 1f; \
BOOKE_CLEAR_BTB(r11) \
/* if from user, start at top of this thread's kernel stack */ \
- lwz r11, THREAD_INFO-THREAD(r10); \
+ lwz r11, TASK_STACK - THREAD(r10); \
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
stw r13, _CCR(r11); /* save various registers */ \
@@ -142,7 +142,7 @@ END_BTB_FLUSH_SECTION
BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
@@ -155,13 +155,7 @@ END_BTB_FLUSH_SECTION
stw r10,GPR11(r11); \
b 2f; \
/* COMING FROM PRIV MODE */ \
-1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
- lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
- stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
- lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
- mr r11,r8; \
+1: mr r11, r8; \
2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 2386ce2a9c6e..1881127682e9 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -243,8 +243,9 @@ set_ivor:
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
- CURRENT_THREAD_INFO(r22, r1)
- stw r24, TI_CPU(r22)
+#ifdef CONFIG_SMP
+ stw r24, TASK_CPU(r2)
+#endif
bl early_init
@@ -717,8 +718,7 @@ finish_tlb_load:
/* Get the next_tlbcam_idx percpu var */
#ifdef CONFIG_SMP
- lwz r12, THREAD_INFO-THREAD(r12)
- lwz r15, TI_CPU(r12)
+ lwz r15, TASK_CPU-THREAD(r12)
lis r14, __per_cpu_offset@h
ori r14, r14, __per_cpu_offset@l
rlwinm r15, r15, 2, 0, 29
@@ -1089,10 +1089,10 @@ __secondary_start:
mr r4,r24 /* Why? */
bl call_setup_cpu
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* stack */
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index ff026c9d3cab..c5e7f5bb2e66 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -136,10 +136,9 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ stw r8,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
mfmsr r7
ori r7,r7,MSR_EE
oris r7,r7,MSR_POW@h
@@ -159,8 +158,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r11)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 4e0d94d02030..31e732c378ad 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -63,7 +63,7 @@ _GLOBAL(\name)
1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
* to the right spot
*/
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACACURRENT(r13)
ld r10,TI_LOCAL_FLAGS(r11)
ori r10,r10,_TLF_NAPPING
std r10,TI_LOCAL_FLAGS(r11)
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 583e55ac7d26..69dfcd2ca011 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -22,10 +22,9 @@
.text
_GLOBAL(e500_idle)
- CURRENT_THREAD_INFO(r3, r1)
- lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ lwz r4,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4,r4,_TLF_NAPPING /* so when we take an exception */
- stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ stw r4,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
#ifdef CONFIG_PPC_E500MC
wrteei 1
@@ -88,8 +87,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r1)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index a09b3c7ca176..a2fdb0a34b75 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -68,7 +68,7 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
+ ld r9, PACA_THREAD_INFO(r13)
ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 916ddc4aac44..8a936723c791 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -618,9 +618,8 @@ static inline void check_stack_overflow(void)
sp = current_stack_pointer() & (THREAD_SIZE-1);
/* check for stack overflow: is there less than 2KB free? */
- if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
- pr_err("do_IRQ: stack overflow: %ld\n",
- sp - sizeof(struct thread_info));
+ if (unlikely(sp < 2048)) {
+ pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
#endif
@@ -660,36 +659,21 @@ void __do_irq(struct pt_regs *regs)
void do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct thread_info *curtp, *irqtp, *sirqtp;
+ void *cursp, *irqsp, *sirqsp;
/* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[raw_smp_processor_id()];
- sirqtp = softirq_ctx[raw_smp_processor_id()];
+ cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ irqsp = hardirq_ctx[raw_smp_processor_id()];
+ sirqsp = softirq_ctx[raw_smp_processor_id()];
/* Already there ? */
- if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+ if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
set_irq_regs(old_regs);
return;
}
-
- /* Prepare the thread_info in the irq stack */
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqtp->preempt_count = curtp->preempt_count;
-
/* Switch stack and call */
- call_do_irq(regs, irqtp);
-
- /* Restore stack limit */
- irqtp->task = NULL;
-
- /* Copy back updates to the thread_info */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_irq(regs, irqsp);
set_irq_regs(old_regs);
}
@@ -698,90 +682,20 @@ void __init init_IRQ(void)
{
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
-
- exc_lvl_ctx_init();
-
- irq_ctx_init();
}
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
-
-void exc_lvl_ctx_init(void)
-{
- struct thread_info *tp;
- int i, cpu_nr;
-
- for_each_possible_cpu(i) {
-#ifdef CONFIG_PPC64
- cpu_nr = i;
-#else
-#ifdef CONFIG_SMP
- cpu_nr = get_hard_smp_processor_id(i);
-#else
- cpu_nr = 0;
-#endif
+void *critirq_ctx[NR_CPUS] __read_mostly;
+void *dbgirq_ctx[NR_CPUS] __read_mostly;
+void *mcheckirq_ctx[NR_CPUS] __read_mostly;
#endif
- memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = critirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
-#ifdef CONFIG_BOOKE
- memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = dbgirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
- memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = mcheckirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = HARDIRQ_OFFSET;
-#endif
- }
-}
-#endif
-
-struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
-
-void irq_ctx_init(void)
-{
- struct thread_info *tp;
- int i;
-
- for_each_possible_cpu(i) {
- memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
- tp = softirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
-
- memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
- tp = hardirq_ctx[i];
- tp->cpu = i;
- klp_init_thread_info(tp);
- }
-}
+void *softirq_ctx[NR_CPUS] __read_mostly;
+void *hardirq_ctx[NR_CPUS] __read_mostly;
void do_softirq_own_stack(void)
{
- struct thread_info *curtp, *irqtp;
-
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- irqtp->flags = 0;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
-
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ call_do_softirq(softirq_ctx[smp_processor_id()]);
}
irq_hw_number_t virq_to_hw(unsigned int virq)
@@ -827,11 +741,6 @@ int irq_choose_cpu(const struct cpumask *mask)
}
#endif
-int arch_early_irq_init(void)
-{
- return 0;
-}
-
#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
{
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index e1865565f0ae..7dd55eb1259d 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -151,41 +151,13 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
return 1;
}
-static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
static int kgdb_singlestep(struct pt_regs *regs)
{
- struct thread_info *thread_info, *exception_thread_info;
- struct thread_info *backup_current_thread_info =
- this_cpu_ptr(&kgdb_thread_info);
-
if (user_mode(regs))
return 0;
- /*
- * On Book E and perhaps other processors, singlestep is handled on
- * the critical exception stack. This causes current_thread_info()
- * to fail, since it it locates the thread_info by masking off
- * the low bits of the current stack pointer. We work around
- * this issue by copying the thread_info from the kernel stack
- * before calling kgdb_handle_exception, and copying it back
- * afterwards. On most processors the copy is avoided since
- * exception_thread_info == thread_info.
- */
- thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
- exception_thread_info = current_thread_info();
-
- if (thread_info != exception_thread_info) {
- /* Save the original current_thread_info. */
- memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info);
- memcpy(exception_thread_info, thread_info, sizeof *thread_info);
- }
-
kgdb_handle_exception(0, SIGTRAP, 0, regs);
- if (thread_info != exception_thread_info)
- /* Restore current_thread_info lastly. */
- memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
-
return 1;
}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a0f6f45005bd..75692c327ba0 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -317,10 +317,8 @@ void default_machine_kexec(struct kimage *image)
* We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
- kexec_stack.thread_info.task = current_thread_info()->task;
- kexec_stack.thread_info.flags = 0;
- kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
- kexec_stack.thread_info.cpu = current_thread_info()->cpu;
+ current_thread_info()->flags = 0;
+ current_thread_info()->preempt_count = HARDIRQ_OFFSET;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
* it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index bd933a75f0bc..b5fec1f9751a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -31,6 +31,7 @@
#include <asm/machdep.h>
#include <asm/mce.h>
+#include <asm/nmi.h>
static DEFINE_PER_CPU(int, mce_nest_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
@@ -301,13 +302,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
- machine_check_print_event_info(evt, false);
+ machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
}
void machine_check_print_event_info(struct machine_check_event *evt,
- bool user_mode)
+ bool user_mode, bool in_guest)
{
const char *level, *sevstr, *subtype;
static const char *mc_ue_types[] = {
@@ -387,7 +388,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
evt->disposition == MCE_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
- if (user_mode) {
+ if (in_guest) {
+ printk("%s Guest NIP: %016llx\n", level, evt->srr0);
+ } else if (user_mode) {
printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
evt->srr0, current->pid, current->comm);
} else {
@@ -488,6 +491,8 @@ long machine_check_early(struct pt_regs *regs)
{
long handled = 0;
+ hv_nmi_check_nonrecoverable(regs);
+
/*
* See if platform is capable of handling machine check.
*/
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 57d2ffb2d45c..0dda4f8e3d7a 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -46,11 +46,10 @@ _GLOBAL(call_do_softirq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r3,THREAD_INFO_GAP
+ stw r3, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_softirq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -60,17 +59,16 @@ _GLOBAL(call_do_softirq)
blr
/*
- * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ * void call_do_irq(struct pt_regs *regs, void *sp);
*/
_GLOBAL(call_do_irq)
mflr r0
stw r0,4(r1)
lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r4,THREAD_INFO_GAP
+ stw r4, THREAD+KSP_LIMIT(r2)
stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
mr r1,r4
stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
bl __do_irq
lwz r10,8(r1)
lwz r1,0(r1)
@@ -183,10 +181,13 @@ _GLOBAL(low_choose_750fx_pll)
or r4,r4,r5
mtspr SPRN_HID1,r4
+#ifdef CONFIG_SMP
/* Store new HID1 image */
- CURRENT_THREAD_INFO(r6, r1)
- lwz r6,TI_CPU(r6)
+ lwz r6,TASK_CPU(r2)
slwi r6,r6,2
+#else
+ li r6, 0
+#endif
addis r6,r6,nap_save_hid1@ha
stw r4,nap_save_hid1@l(r6)
@@ -599,7 +600,7 @@ EXPORT_SYMBOL(__bswapdi2)
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
- CURRENT_THREAD_INFO(r1, r1)
+ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 38b03a330cd2..244d2462e781 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -7,12 +7,6 @@
* 2 of the License, or (at your option) any later version.
*
* /dev/nvram driver for PPC64
- *
- * This perhaps should live in drivers/char
- *
- * TODO: Split the /dev/nvram part (that one can use
- * drivers/char/generic_nvram.c) from the arch & partition
- * parsing code.
*/
#include <linux/types.h>
@@ -714,137 +708,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
spin_unlock_irqrestore(&lock, flags);
}
-static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
-{
- if (ppc_md.nvram_size == NULL)
- return -ENODEV;
- return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE,
- ppc_md.nvram_size());
-}
-
-
-static ssize_t dev_nvram_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- if (!ppc_md.nvram_size) {
- ret = -ENODEV;
- goto out;
- }
-
- size = ppc_md.nvram_size();
- if (size < 0) {
- ret = size;
- goto out;
- }
-
- if (*ppos >= size) {
- ret = 0;
- goto out;
- }
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = ppc_md.nvram_read(tmp, count, ppos);
- if (ret <= 0)
- goto out;
-
- if (copy_to_user(buf, tmp, ret))
- ret = -EFAULT;
-
-out:
- kfree(tmp);
- return ret;
-
-}
-
-static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
-
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
- goto out;
-
- ret = 0;
- size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
- goto out;
-
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
-
- tmp = memdup_user(buf, count);
- if (IS_ERR(tmp)) {
- ret = PTR_ERR(tmp);
- goto out;
- }
-
- ret = ppc_md.nvram_write(tmp, count, ppos);
-
- kfree(tmp);
-out:
- return ret;
-}
-
-static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- switch(cmd) {
-#ifdef CONFIG_PPC_PMAC
- case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
- printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
- /* fall through */
- case IOC_NVRAM_GET_OFFSET: {
- int part, offset;
-
- if (!machine_is(powermac))
- return -EINVAL;
- if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
- return -EFAULT;
- if (part < pmac_nvram_OF || part > pmac_nvram_NR)
- return -EINVAL;
- offset = pmac_get_partition(part);
- if (offset < 0)
- return offset;
- if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
- return -EFAULT;
- return 0;
- }
-#endif /* CONFIG_PPC_PMAC */
- default:
- return -EINVAL;
- }
-}
-
-static const struct file_operations nvram_fops = {
- .owner = THIS_MODULE,
- .llseek = dev_nvram_llseek,
- .read = dev_nvram_read,
- .write = dev_nvram_write,
- .unlocked_ioctl = dev_nvram_ioctl,
-};
-
-static struct miscdevice nvram_dev = {
- NVRAM_MINOR,
- "nvram",
- &nvram_fops
-};
-
-
#ifdef DEBUG_NVRAM
static void __init nvram_print_partitions(char * label)
{
@@ -992,6 +855,8 @@ loff_t __init nvram_create_partition(const char *name, int sig,
long size = 0;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
/* Convert sizes from bytes to blocks */
req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
@@ -1192,22 +1057,3 @@ int __init nvram_scan_partitions(void)
kfree(header);
return err;
}
-
-static int __init nvram_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
-
- if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
- return -ENODEV;
-
- rc = misc_register(&nvram_dev);
- if (rc != 0) {
- printk(KERN_ERR "nvram_init: failed to register device\n");
- return rc;
- }
-
- return rc;
-}
-device_initcall(nvram_init);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 913bfca09c4f..8c890c6557ed 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/memblock.h>
#include <linux/sched/task.h>
+#include <linux/numa.h>
#include <asm/lppaca.h>
#include <asm/paca.h>
@@ -27,7 +28,7 @@
static void *__init alloc_paca_data(unsigned long size, unsigned long align,
unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
int nid;
/*
@@ -36,23 +37,21 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
* which will put its paca in the right place.
*/
if (cpu == boot_cpuid) {
- nid = -1;
+ nid = NUMA_NO_NODE;
memblock_set_bottom_up(true);
} else {
nid = early_cpu_to_node(cpu);
}
- pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(size, align, limit);
- if (!pa)
- panic("cannot allocate paca data");
- }
+ ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+ limit, nid);
+ if (!ptr)
+ panic("cannot allocate paca data");
if (cpu == boot_cpuid)
memblock_set_bottom_up(false);
- return __va(pa);
+ return ptr;
}
#ifdef CONFIG_PPC_PSERIES
@@ -118,7 +117,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
}
s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
- memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -222,7 +220,6 @@ void __init allocate_paca(int cpu)
paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
limit, cpu);
paca_ptrs[cpu] = paca;
- memset(paca, 0, sizeof(struct paca_struct));
initialise_paca(paca, cpu);
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 88e4f69a09e5..ff4b7539cbdf 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -32,6 +32,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/numa.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -62,19 +63,13 @@ resource_size_t isa_mem_base;
EXPORT_SYMBOL(isa_mem_base);
-static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops;
+static const struct dma_map_ops *pci_dma_ops;
void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
-const struct dma_map_ops *get_pci_dma_ops(void)
-{
- return pci_dma_ops;
-}
-EXPORT_SYMBOL(get_pci_dma_ops);
-
/*
* This function should run under locking protection, specifically
* hose_spinlock.
@@ -132,7 +127,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
int nid = of_node_to_nid(dev);
if (nid < 0 || !node_online(nid))
- nid = -1;
+ nid = NUMA_NO_NODE;
PHB_SET_NODE(phb, nid);
}
@@ -357,6 +352,17 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
return NULL;
}
+struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->global_number == domain_nr)
+ return hose;
+
+ return NULL;
+}
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -972,7 +978,7 @@ static void pcibios_setup_device(struct pci_dev *dev)
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
- set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+ dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
phb = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ce393df243aa..dd9e0d5386ee 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -176,7 +176,7 @@ static void __giveup_fpu(struct task_struct *tsk)
save_fpu(tsk);
msr = tsk->thread.regs->msr;
- msr &= ~MSR_FP;
+ msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
#ifdef CONFIG_VSX
if (cpu_has_feature(CPU_FTR_VSX))
msr &= ~MSR_VSX;
@@ -1231,8 +1231,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch->active = 1;
}
- if (current_thread_info()->task->thread.regs) {
- restore_math(current_thread_info()->task->thread.regs);
+ if (current->thread.regs) {
+ restore_math(current->thread.regs);
/*
* The copy-paste buffer can only store into foreign real
@@ -1242,7 +1242,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* mappings, we must issue a cp_abort to clear any state and
* prevent snooping, corruption or a covert channel.
*/
- if (current_thread_info()->task->thread.used_vas)
+ if (current->thread.used_vas)
asm volatile(PPC_CP_ABORT);
}
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -1634,7 +1634,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
- klp_init_thread_info(ti);
+ klp_init_thread_info(p);
/* Copy registers */
sp -= sizeof(struct pt_regs);
@@ -1691,8 +1691,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
#ifdef CONFIG_PPC32
- p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
- _ALIGN_UP(sizeof(struct thread_info), 16);
+ p->thread.ksp_limit = (unsigned long)end_of_stack(p);
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
p->thread.ptrace_bps[0] = NULL;
@@ -1995,21 +1994,14 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
unsigned long stack_page;
unsigned long cpu = task_cpu(p);
- /*
- * Avoid crashing if the stack has overflowed and corrupted
- * task_cpu(p), which is in the thread_info struct.
- */
- if (cpu < NR_CPUS && cpu_possible(cpu)) {
- stack_page = (unsigned long) hardirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
-
- stack_page = (unsigned long) softirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
- }
+ stack_page = (unsigned long)hardirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)softirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
return 0;
}
@@ -2018,8 +2010,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
+ if (sp < THREAD_SIZE)
+ return 0;
+
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
return valid_irq_stack(sp, p, nbytes);
@@ -2027,7 +2021,7 @@ int validate_sp(unsigned long sp, struct task_struct *p,
EXPORT_SYMBOL(validate_sp);
-unsigned long get_wchan(struct task_struct *p)
+static unsigned long __get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
@@ -2053,6 +2047,20 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ret;
+
+ if (!try_get_task_stack(p))
+ return 0;
+
+ ret = __get_wchan(p);
+
+ put_task_stack(p);
+
+ return ret;
+}
+
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
void show_stack(struct task_struct *tsk, unsigned long *stack)
@@ -2067,9 +2075,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int curr_frame = 0;
#endif
- sp = (unsigned long) stack;
if (tsk == NULL)
tsk = current;
+
+ if (!try_get_task_stack(tsk))
+ return;
+
+ sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
sp = current_stack_pointer();
@@ -2081,7 +2093,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
printk("Call Trace:\n");
do {
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
- return;
+ break;
stack = (unsigned long *) sp;
newsp = stack[0];
@@ -2121,6 +2133,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
sp = newsp;
} while (count++ < kstack_depth_to_print);
+
+ put_task_stack(tsk);
}
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index cdd5d1d3ae41..d9ac7d94656e 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -33,6 +33,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
@@ -274,6 +275,8 @@ static int set_user_trap(struct task_struct *task, unsigned long trap)
*/
int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
{
+ unsigned int regs_max;
+
if ((task->thread.regs == NULL) || !data)
return -EIO;
@@ -297,7 +300,9 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
}
#endif
- if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
}
@@ -321,6 +326,7 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
return set_user_dscr(task, data);
if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
((unsigned long *)task->thread.regs)[regno] = data;
return 0;
}
@@ -561,6 +567,7 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
/*
* Copy out only the low-order word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -569,8 +576,10 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
}
return ret;
@@ -608,6 +617,7 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
/*
* We use only the first word of vrsave.
*/
+ int start, end;
union {
elf_vrreg_t reg;
u32 word;
@@ -616,8 +626,10 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
vrsave.word = target->thread.vrsave;
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
+ start, end);
if (!ret)
target->thread.vrsave = vrsave.word;
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index ca00fbb97cf8..f17868e19e2c 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -459,8 +459,8 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
- cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
- memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32));
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
@@ -634,7 +634,7 @@ void probe_machine(void)
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
- DBG("No suitable machine found !\n");
+ pr_err("No suitable machine description found !\n");
for (;;);
}
@@ -791,7 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
{
pdev->archdata.dma_mask = DMA_BIT_MASK(32);
pdev->dev.dma_mask = &pdev->archdata.dma_mask;
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
}
static __init void print_system_info(void)
@@ -938,7 +937,7 @@ void __init setup_arch(char **cmdline_p)
/* Reserve large chunks of memory for use by CMA for KVM. */
kvm_cma_reserve();
- klp_init_thread_info(&init_thread_info);
+ klp_init_thread_info(&init_task);
init_mm.start_code = (unsigned long)_stext;
init_mm.end_code = (unsigned long) _etext;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 947f904688b0..4a65e08a6042 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -17,6 +17,7 @@
#include <linux/console.h>
#include <linux/memblock.h>
#include <linux/export.h>
+#include <linux/nvram.h>
#include <asm/io.h>
#include <asm/prom.h>
@@ -147,41 +148,6 @@ static int __init ppc_setup_l3cr(char *str)
}
__setup("l3cr=", ppc_setup_l3cr);
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
- if (ppc_md.nvram_read_val)
- return ppc_md.nvram_read_val(addr);
- return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
- if (ppc_md.nvram_write_val)
- ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
- if (ppc_md.nvram_size)
- return ppc_md.nvram_size();
- return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
-void nvram_sync(void)
-{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
static int __init ppc_init(void)
{
/* clear the progress line */
@@ -196,6 +162,17 @@ static int __init ppc_init(void)
}
arch_initcall(ppc_init);
+static void *__init alloc_stack(void)
+{
+ void *ptr = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+
+ if (!ptr)
+ panic("cannot allocate %d bytes for stack at %pS\n",
+ THREAD_SIZE, (void *)_RET_IP_);
+
+ return ptr;
+}
+
void __init irqstack_early_init(void)
{
unsigned int i;
@@ -203,10 +180,8 @@ void __init irqstack_early_init(void)
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ softirq_ctx[i] = alloc_stack();
+ hardirq_ctx[i] = alloc_stack();
}
}
@@ -224,13 +199,10 @@ void __init exc_lvl_early_init(void)
hw_cpu = 0;
#endif
- critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ critirq_ctx[hw_cpu] = alloc_stack();
#ifdef CONFIG_BOOKE
- dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[hw_cpu] = alloc_stack();
+ mcheckirq_ctx[hw_cpu] = alloc_stack();
#endif
}
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 236c1151a3a7..ff0aac42bb33 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -634,19 +634,17 @@ __init u64 ppc64_bolted_size(void)
static void *__init alloc_stack(unsigned long limit, int cpu)
{
- unsigned long pa;
+ void *ptr;
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
- pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
- early_cpu_to_node(cpu), MEMBLOCK_NONE);
- if (!pa) {
- pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- if (!pa)
- panic("cannot allocate stacks");
- }
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+ MEMBLOCK_LOW_LIMIT, limit,
+ early_cpu_to_node(cpu));
+ if (!ptr)
+ panic("cannot allocate stacks");
- return __va(pa);
+ return ptr;
}
void __init irqstack_early_init(void)
@@ -692,24 +690,6 @@ void __init exc_lvl_early_init(void)
#endif
/*
- * Emergency stacks are used for a range of things, from asynchronous
- * NMIs (system reset, machine check) to synchronous, process context.
- * We set preempt_count to zero, even though that isn't necessarily correct. To
- * get the right value we'd need to copy it from the previous thread_info, but
- * doing that might fault causing more problems.
- * TODO: what to do with accounting?
- */
-static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
-{
- ti->task = NULL;
- ti->cpu = cpu;
- ti->preempt_count = 0;
- ti->local_flags = 0;
- ti->flags = 0;
- klp_init_thread_info(ti);
-}
-
-/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -736,25 +716,14 @@ void __init emergency_stack_init(void)
limit = min(ppc64_bolted_size(), ppc64_rma_size);
for_each_possible_cpu(i) {
- struct thread_info *ti;
-
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = alloc_stack(limit, i);
- memset(ti, 0, THREAD_SIZE);
- emerg_stack_init_thread_info(ti, i);
- paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
#endif
}
}
@@ -933,8 +902,9 @@ static void __ref init_fallback_flush(void)
* hardware prefetch runoff. We don't have a recipe for load patterns to
* reliably avoid the prefetcher.
*/
- l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
- memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
for_each_possible_cpu(cpu) {
struct paca_struct *paca = paca_ptrs[cpu];
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3f15edf25a0d..e784342bdaa1 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
@@ -75,7 +76,7 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
@@ -358,13 +359,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
* NMI IPIs may not be recoverable, so should not be used as ongoing part of
* a running system. They can be used for crash, debug, halt/reboot, etc.
*
- * NMI IPIs are globally single threaded. No more than one in progress at
- * any time.
- *
* The IPI call waits with interrupts disabled until all targets enter the
- * NMI handler, then the call returns.
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
*
- * No new NMI can be initiated until targets exit the handler.
+ * A new NMI can be issued before all targets exit the handler.
*
* The IPI call may time out without all targets entering the NMI handler.
* In that case, there is some logic to recover (and ignore subsequent
@@ -375,7 +375,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
-static int nmi_ipi_busy_count = 0;
+static bool nmi_ipi_busy = false;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
static void nmi_ipi_lock_start(unsigned long *flags)
@@ -414,7 +414,7 @@ static void nmi_ipi_unlock_end(unsigned long *flags)
*/
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
- void (*fn)(struct pt_regs *);
+ void (*fn)(struct pt_regs *) = NULL;
unsigned long flags;
int me = raw_smp_processor_id();
int ret = 0;
@@ -425,29 +425,17 @@ int smp_handle_nmi_ipi(struct pt_regs *regs)
* because the caller may have timed out.
*/
nmi_ipi_lock_start(&flags);
- if (!nmi_ipi_busy_count)
- goto out;
- if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
- goto out;
-
- fn = nmi_ipi_function;
- if (!fn)
- goto out;
-
- cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
- nmi_ipi_busy_count++;
- nmi_ipi_unlock();
-
- ret = 1;
-
- fn(regs);
-
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1) /* Can race with caller time-out */
- nmi_ipi_busy_count--;
-out:
+ if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ fn = READ_ONCE(nmi_ipi_function);
+ WARN_ON_ONCE(!fn);
+ ret = 1;
+ }
nmi_ipi_unlock_end(&flags);
+ if (fn)
+ fn(regs);
+
return ret;
}
@@ -473,9 +461,10 @@ static void do_smp_send_nmi_ipi(int cpu, bool safe)
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
- * complete executing the handler, == 0 specifies indefinite delay.
+ * begin executing the handler, == 0 specifies indefinite delay.
*/
-int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe)
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
{
unsigned long flags;
int me = raw_smp_processor_id();
@@ -487,31 +476,33 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
if (unlikely(!smp_ops))
return 0;
- /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
nmi_ipi_lock_start(&flags);
- while (nmi_ipi_busy_count) {
+ while (nmi_ipi_busy) {
nmi_ipi_unlock_end(&flags);
- spin_until_cond(nmi_ipi_busy_count == 0);
+ spin_until_cond(!nmi_ipi_busy);
nmi_ipi_lock_start(&flags);
}
-
+ nmi_ipi_busy = true;
nmi_ipi_function = fn;
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
if (cpu < 0) {
/* ALL_OTHERS */
cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
} else {
- /* cpumask starts clear */
cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
}
- nmi_ipi_busy_count++;
+
nmi_ipi_unlock();
+ /* Interrupts remain hard disabled */
+
do_smp_send_nmi_ipi(cpu, safe);
nmi_ipi_lock();
- /* nmi_ipi_busy_count is held here, so unlock/lock is okay */
+ /* nmi_ipi_busy is set here, so unlock/lock is okay */
while (!cpumask_empty(&nmi_ipi_pending_mask)) {
nmi_ipi_unlock();
udelay(1);
@@ -523,29 +514,15 @@ int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool
}
}
- while (nmi_ipi_busy_count > 1) {
- nmi_ipi_unlock();
- udelay(1);
- nmi_ipi_lock();
- if (delay_us) {
- delay_us--;
- if (!delay_us)
- break;
- }
- }
-
if (!cpumask_empty(&nmi_ipi_pending_mask)) {
/* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
ret = 0;
cpumask_clear(&nmi_ipi_pending_mask);
}
- if (nmi_ipi_busy_count > 1) {
- /* Timeout waiting for CPUs to execute fn */
- ret = 0;
- nmi_ipi_busy_count = 1;
- }
- nmi_ipi_busy_count--;
+ nmi_ipi_function = NULL;
+ nmi_ipi_busy = false;
+
nmi_ipi_unlock_end(&flags);
return ret;
@@ -613,17 +590,8 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
- * This is a special case because it never returns, so the NMI IPI
- * handling would never mark it as done, which makes any later
- * smp_send_nmi_ipi() call spin forever. Mark it done now.
- *
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
- nmi_ipi_lock();
- if (nmi_ipi_busy_count > 1)
- nmi_ipi_busy_count--;
- nmi_ipi_unlock();
-
spin_begin();
while (1)
spin_cpu_relax();
@@ -663,7 +631,7 @@ void smp_send_stop(void)
}
#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
@@ -928,7 +896,7 @@ void smp_prepare_boot_cpu(void)
paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1013,14 +981,13 @@ static bool secondaries_inhibited(void)
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
- paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ idle->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2c50b55138f..1e2276963f6d 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -67,12 +67,17 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long sp;
+ if (!try_get_task_stack(tsk))
+ return;
+
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
+
+ put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
@@ -84,25 +89,21 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-int
-save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
{
unsigned long sp;
+ unsigned long newsp;
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
-
- /*
- * The last frame (unwinding first) may not yet have saved
- * its LR onto the stack.
- */
- int firstframe = 1;
-
- if (tsk == current)
- sp = current_stack_pointer();
- else
- sp = tsk->thread.ksp;
+ bool firstframe;
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
@@ -129,40 +130,53 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
stack_end -= STACK_FRAME_OVERHEAD;
}
+ if (tsk == current)
+ sp = current_stack_pointer();
+ else
+ sp = tsk->thread.ksp;
+
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1;
+ return -EINVAL;
}
- for (;;) {
+ for (firstframe = true; sp != stack_end;
+ firstframe = false, sp = newsp) {
unsigned long *stack = (unsigned long *) sp;
- unsigned long newsp, ip;
+ unsigned long ip;
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
- return 1;
-
- /* Mark stacktraces with exception frames as unreliable. */
- if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
- stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
- return 1;
- }
+ return -EINVAL;
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
- return 1;
+ return -EINVAL;
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
- return 1; /* invalid backlink, too far up. */
+ return -EINVAL; /* invalid backlink, too far up. */
+ }
+
+ /*
+ * We can only trust the bottom frame's backlink, the
+ * rest of the frame may be uninitialized, continue to
+ * the next.
+ */
+ if (firstframe)
+ continue;
+
+ /* Mark stacktraces with exception frames as unreliable. */
+ if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+ stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ return -EINVAL;
}
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
- if (!firstframe && !__kernel_text_address(ip))
- return 1;
- firstframe = 0;
+ if (!__kernel_text_address(ip))
+ return -EINVAL;
/*
* FIXME: IMHO these tests do not belong in
@@ -175,25 +189,37 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
- return 1;
+ return -EINVAL;
#endif
+ if (trace->nr_entries >= trace->max_entries)
+ return -E2BIG;
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
+ }
+ return 0;
+}
- if (newsp == stack_end)
- break;
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
+{
+ int ret;
- if (trace->nr_entries >= trace->max_entries)
- return -E2BIG;
+ /*
+ * If the task doesn't have a stack (e.g., a zombie), the stack is
+ * "reliably" empty.
+ */
+ if (!try_get_task_stack(tsk))
+ return 0;
- sp = newsp;
- }
- return 0;
+ ret = __save_stack_trace_tsk_reliable(tsk, trace);
+
+ put_task_stack(tsk);
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index e6982ab21816..e52a8878c2fb 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -123,7 +123,7 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
(u64)len_high << 32 | len_low, advice);
}
-long sys_switch_endian(void)
+SYSCALL_DEFINE0(switch_endian)
{
struct thread_info *ti;
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index db3bbb8744af..b18abb0c3dae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
10 common unlink sys_unlink
11 nospu execve sys_execve compat_sys_execve
12 common chdir sys_chdir
-13 common time sys_time compat_sys_time
+13 32 time sys_time32
+13 64 time sys_time
+13 spu time sys_time
14 common mknod sys_mknod
15 common chmod sys_chmod
16 common lchown sys_lchown
@@ -36,14 +38,17 @@
22 spu umount sys_ni_syscall
23 common setuid sys_setuid
24 common getuid sys_getuid
-25 common stime sys_stime compat_sys_stime
+25 32 stime sys_stime32
+25 64 stime sys_stime
+25 spu stime sys_stime
26 nospu ptrace sys_ptrace compat_sys_ptrace
27 common alarm sys_alarm
28 32 oldfstat sys_fstat sys_ni_syscall
28 64 oldfstat sys_ni_syscall
28 spu oldfstat sys_ni_syscall
29 nospu pause sys_pause
-30 nospu utime sys_utime compat_sys_utime
+30 32 utime sys_utime32
+30 64 utime sys_utime
31 common stty sys_ni_syscall
32 common gtty sys_ni_syscall
33 common access sys_access
@@ -157,7 +162,9 @@
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
-124 common adjtimex sys_adjtimex compat_sys_adjtimex
+124 32 adjtimex sys_adjtimex_time32
+124 64 adjtimex sys_adjtimex
+124 spu adjtimex sys_adjtimex
125 common mprotect sys_mprotect
126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask
126 64 sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
158 common sched_yield sys_sched_yield
159 common sched_get_priority_max sys_sched_get_priority_max
160 common sched_get_priority_min sys_sched_get_priority_min
-161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
-162 common nanosleep sys_nanosleep compat_sys_nanosleep
+161 32 sched_rr_get_interval sys_sched_rr_get_interval_time32
+161 64 sched_rr_get_interval sys_sched_rr_get_interval
+161 spu sched_rr_get_interval sys_sched_rr_get_interval
+162 32 nanosleep sys_nanosleep_time32
+162 64 nanosleep sys_nanosleep
+162 spu nanosleep sys_nanosleep
163 common mremap sys_mremap
164 common setresuid sys_setresuid
165 common getresuid sys_getresuid
@@ -213,7 +224,8 @@
173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
-176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+176 32 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+176 64 rt_sigtimedwait sys_rt_sigtimedwait
177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 common pread64 sys_pread64 compat_sys_pread64
@@ -260,7 +272,9 @@
218 common removexattr sys_removexattr
219 common lremovexattr sys_lremovexattr
220 common fremovexattr sys_fremovexattr
-221 common futex sys_futex compat_sys_futex
+221 32 futex sys_futex_time32
+221 64 futex sys_futex
+221 spu futex sys_futex
222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
# 224 unused
@@ -268,7 +282,9 @@
226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64
227 common io_setup sys_io_setup compat_sys_io_setup
228 common io_destroy sys_io_destroy
-229 common io_getevents sys_io_getevents compat_sys_io_getevents
+229 32 io_getevents sys_io_getevents_time32
+229 64 io_getevents sys_io_getevents
+229 spu io_getevents sys_io_getevents
230 common io_submit sys_io_submit compat_sys_io_submit
231 common io_cancel sys_io_cancel
232 nospu set_tid_address sys_set_tid_address
@@ -280,19 +296,33 @@
238 common epoll_wait sys_epoll_wait
239 common remap_file_pages sys_remap_file_pages
240 common timer_create sys_timer_create compat_sys_timer_create
-241 common timer_settime sys_timer_settime compat_sys_timer_settime
-242 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+241 32 timer_settime sys_timer_settime32
+241 64 timer_settime sys_timer_settime
+241 spu timer_settime sys_timer_settime
+242 32 timer_gettime sys_timer_gettime32
+242 64 timer_gettime sys_timer_gettime
+242 spu timer_gettime sys_timer_gettime
243 common timer_getoverrun sys_timer_getoverrun
244 common timer_delete sys_timer_delete
-245 common clock_settime sys_clock_settime compat_sys_clock_settime
-246 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
-247 common clock_getres sys_clock_getres compat_sys_clock_getres
-248 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+245 32 clock_settime sys_clock_settime32
+245 64 clock_settime sys_clock_settime
+245 spu clock_settime sys_clock_settime
+246 32 clock_gettime sys_clock_gettime32
+246 64 clock_gettime sys_clock_gettime
+246 spu clock_gettime sys_clock_gettime
+247 32 clock_getres sys_clock_getres_time32
+247 64 clock_getres sys_clock_getres
+247 spu clock_getres sys_clock_getres
+248 32 clock_nanosleep sys_clock_nanosleep_time32
+248 64 clock_nanosleep sys_clock_nanosleep
+248 spu clock_nanosleep sys_clock_nanosleep
249 32 swapcontext ppc_swapcontext ppc32_swapcontext
249 64 swapcontext ppc64_swapcontext
249 spu swapcontext sys_ni_syscall
250 common tgkill sys_tgkill
-251 common utimes sys_utimes compat_sys_utimes
+251 32 utimes sys_utimes_time32
+251 64 utimes sys_utimes
+251 spu utimes sys_utimes
252 common statfs64 sys_statfs64 compat_sys_statfs64
253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
254 32 fadvise64_64 ppc_fadvise64_64
@@ -308,8 +338,10 @@
261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
262 nospu mq_open sys_mq_open compat_sys_mq_open
263 nospu mq_unlink sys_mq_unlink
-264 nospu mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
-265 nospu mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+264 32 mq_timedsend sys_mq_timedsend_time32
+264 64 mq_timedsend sys_mq_timedsend
+265 32 mq_timedreceive sys_mq_timedreceive_time32
+265 64 mq_timedreceive sys_mq_timedreceive
266 nospu mq_notify sys_mq_notify compat_sys_mq_notify
267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
268 nospu kexec_load sys_kexec_load compat_sys_kexec_load
@@ -324,8 +356,10 @@
277 nospu inotify_rm_watch sys_inotify_rm_watch
278 nospu spu_run sys_spu_run
279 nospu spu_create sys_spu_create
-280 nospu pselect6 sys_pselect6 compat_sys_pselect6
-281 nospu ppoll sys_ppoll compat_sys_ppoll
+280 32 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+280 64 pselect6 sys_pselect6
+281 32 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+281 64 ppoll sys_ppoll
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
@@ -334,7 +368,9 @@
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
289 common fchownat sys_fchownat
-290 common futimesat sys_futimesat compat_sys_futimesat
+290 32 futimesat sys_futimesat_time32
+290 64 futimesat sys_futimesat
+290 spu utimesat sys_futimesat
291 32 fstatat64 sys_fstatat64
291 64 newfstatat sys_newfstatat
291 spu newfstatat sys_newfstatat
@@ -350,15 +386,21 @@
301 common move_pages sys_move_pages compat_sys_move_pages
302 common getcpu sys_getcpu
303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
-304 common utimensat sys_utimensat compat_sys_utimensat
+304 32 utimensat sys_utimensat_time32
+304 64 utimensat sys_utimensat
+304 spu utimensat sys_utimensat
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2
309 nospu fallocate sys_fallocate compat_sys_fallocate
310 nospu subpage_prot sys_subpage_prot
-311 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
-312 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+311 32 timerfd_settime sys_timerfd_settime32
+311 64 timerfd_settime sys_timerfd_settime
+311 spu timerfd_settime sys_timerfd_settime
+312 32 timerfd_gettime sys_timerfd_gettime32
+312 64 timerfd_gettime sys_timerfd_gettime
+312 spu timerfd_gettime sys_timerfd_gettime
313 common signalfd4 sys_signalfd4 compat_sys_signalfd4
314 common eventfd2 sys_eventfd2
315 common epoll_create1 sys_epoll_create1
@@ -389,11 +431,15 @@
340 common getsockopt sys_getsockopt compat_sys_getsockopt
341 common sendmsg sys_sendmsg compat_sys_sendmsg
342 common recvmsg sys_recvmsg compat_sys_recvmsg
-343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+343 64 recvmmsg sys_recvmmsg
+343 spu recvmmsg sys_recvmmsg
344 common accept4 sys_accept4
345 common name_to_handle_at sys_name_to_handle_at
346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
-347 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+347 32 clock_adjtime sys_clock_adjtime32
+347 64 clock_adjtime sys_clock_adjtime
+347 spu clock_adjtime sys_clock_adjtime
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
@@ -414,6 +460,7 @@
363 spu switch_endian sys_ni_syscall
364 common userfaultfd sys_userfaultfd
365 common membarrier sys_membarrier
+# 366-377 originally left for IPC, now unused
378 nospu mlock2 sys_mlock2
379 nospu copy_file_range sys_copy_file_range
380 common preadv2 sys_preadv2 compat_sys_preadv2
@@ -424,4 +471,37 @@
385 nospu pkey_free sys_pkey_free
386 nospu pkey_mprotect sys_pkey_mprotect
387 nospu rseq sys_rseq
-388 nospu io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
+388 32 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+388 64 io_pgetevents sys_io_pgetevents
+# room for arch specific syscalls
+392 64 semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl compat_sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl compat_sys_shmctl
+397 common shmat sys_shmat compat_sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd compat_sys_msgsnd
+401 common msgrcv sys_msgrcv compat_sys_msgrcv
+402 common msgctl sys_msgctl compat_sys_msgctl
+403 32 clock_gettime64 sys_clock_gettime sys_clock_gettime
+404 32 clock_settime64 sys_clock_settime sys_clock_settime
+405 32 clock_adjtime64 sys_clock_adjtime sys_clock_adjtime
+406 32 clock_getres_time64 sys_clock_getres sys_clock_getres
+407 32 clock_nanosleep_time64 sys_clock_nanosleep sys_clock_nanosleep
+408 32 timer_gettime64 sys_timer_gettime sys_timer_gettime
+409 32 timer_settime64 sys_timer_settime sys_timer_settime
+410 32 timerfd_gettime64 sys_timerfd_gettime sys_timerfd_gettime
+411 32 timerfd_settime64 sys_timerfd_settime sys_timerfd_settime
+412 32 utimensat_time64 sys_utimensat sys_utimensat
+413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
+419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
+420 32 semtimedop_time64 sys_semtimedop sys_semtimedop
+421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 32 futex_time64 sys_futex sys_futex
+423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
index fd620490a542..f7393a7b18aa 100644
--- a/arch/powerpc/kernel/syscalls/syscalltbl.sh
+++ b/arch/powerpc/kernel/syscalls/syscalltbl.sh
@@ -13,10 +13,10 @@ emit() {
t_entry="$3"
while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s,sys_ni_syscall, )\n" "${t_nxt}"
+ printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
t_nxt=$((t_nxt+1))
done
- printf "__SYSCALL(%s,%s, )\n" "${t_nxt}" "${t_entry}"
+ printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
}
grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 23265a28740b..02f28faba125 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -25,11 +25,11 @@
.globl sys_call_table
sys_call_table:
#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_64.h>
#undef __SYSCALL
#else
-#define __SYSCALL(nr, entry, nargs) .long entry
+#define __SYSCALL(nr, entry) .long entry
#include <asm/syscall_table_32.h>
#undef __SYSCALL
#endif
@@ -38,7 +38,7 @@ sys_call_table:
.globl compat_sys_call_table
compat_sys_call_table:
#define compat_sys_sigsuspend sys_sigsuspend
-#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#define __SYSCALL(nr, entry) .8byte DOTSYM(entry)
#include <asm/syscall_table_c32.h>
#undef __SYSCALL
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3646affae963..bc0503ef9c9c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -57,7 +57,6 @@
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
#include <linux/suspend.h>
-#include <linux/rtc.h>
#include <linux/sched/cputime.h>
#include <linux/processor.h>
#include <asm/trace.h>
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
index b1725ad3e13d..858503775c58 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-# Disable GCOV & sanitizers in odd or sensitive code
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_ftrace.o := n
+KCOV_INSTRUMENT_ftrace.o := n
UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 32476a6e4e9c..01b1224add49 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -229,7 +229,7 @@ ftrace_call:
* - r0, r11 & r12 are free
*/
livepatch_handler:
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
/* Allocate 3 x 8 bytes */
ld r11, TI_livepatch_sp(r12)
@@ -256,7 +256,7 @@ livepatch_handler:
* restore it.
*/
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
ld r11, TI_livepatch_sp(r12)
@@ -273,7 +273,7 @@ livepatch_handler:
ld r2, -24(r11)
/* Pop livepatch stack frame */
- CURRENT_THREAD_INFO(r12, r1)
+ ld r12, PACA_THREAD_INFO(r13)
subi r11, r11, 24
std r11, TI_livepatch_sp(r12)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 64936b60d521..a21200c6aaea 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -257,24 +257,17 @@ static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
- if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
- printk("LE ");
- else
- printk("BE ");
-
- if (IS_ENABLED(CONFIG_PREEMPT))
- pr_cont("PREEMPT ");
-
- if (IS_ENABLED(CONFIG_SMP))
- pr_cont("SMP NR_CPUS=%d ", NR_CPUS);
-
- if (debug_pagealloc_enabled())
- pr_cont("DEBUG_PAGEALLOC ");
-
- if (IS_ENABLED(CONFIG_NUMA))
- pr_cont("NUMA ");
-
- pr_cont("%s\n", ppc_md.name ? ppc_md.name : "");
+ printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+ IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ PAGE_SIZE / 1024,
+ early_radix_enabled() ? " MMU=Radix" : "",
+ early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+ IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+ IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
+ ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -376,16 +369,101 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
force_sig_fault(signr, code, (void __user *)addr, current);
}
+/*
+ * The interrupt architecture has a quirk in that the HV interrupts excluding
+ * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
+ * that an interrupt handler must do is save off a GPR into a scratch register,
+ * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
+ * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
+ * that it is non-reentrant, which leads to random data corruption.
+ *
+ * The solution is for NMI interrupts in HV mode to check if they originated
+ * from these critical HV interrupt regions. If so, then mark them not
+ * recoverable.
+ *
+ * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
+ * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
+ * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
+ * that would work. However any other guest OS that may have the SPRG live
+ * and MSR[RI]=1 could encounter silent corruption.
+ *
+ * Builds that do not support KVM could take this second option to increase
+ * the recoverability of NMIs.
+ */
+void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_POWERNV
+ unsigned long kbase = (unsigned long)_stext;
+ unsigned long nip = regs->nip;
+
+ if (!(regs->msr & MSR_RI))
+ return;
+ if (!(regs->msr & MSR_HV))
+ return;
+ if (regs->msr & MSR_PR)
+ return;
+
+ /*
+ * Now test if the interrupt has hit a range that may be using
+ * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
+ * problem ranges all run un-relocated. Test real and virt modes
+ * at the same time by droping the high bit of the nip (virt mode
+ * entry points still have the +0x4000 offset).
+ */
+ nip &= ~0xc000000000000000ULL;
+ if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
+ goto nonrecoverable;
+ if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
+ goto nonrecoverable;
+ if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
+ goto nonrecoverable;
+ if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
+ goto nonrecoverable;
+
+ /* Trampoline code runs un-relocated so subtract kbase. */
+ if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
+ nip < (unsigned long)(end_real_trampolines - kbase))
+ goto nonrecoverable;
+ if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
+ nip < (unsigned long)(end_virt_trampolines - kbase))
+ goto nonrecoverable;
+ return;
+
+nonrecoverable:
+ regs->msr &= ~MSR_RI;
+#endif
+}
+
void system_reset_exception(struct pt_regs *regs)
{
+ unsigned long hsrr0, hsrr1;
+ bool nested = in_nmi();
+ bool saved_hsrrs = false;
+
/*
* Avoid crashes in case of nested NMI exceptions. Recoverability
* is determined by RI and in_nmi
*/
- bool nested = in_nmi();
if (!nested)
nmi_enter();
+ /*
+ * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
+ * The system reset interrupt itself may clobber HSRRs (e.g., to call
+ * OPAL), so save them here and restore them before returning.
+ *
+ * Machine checks don't need to save HSRRs, as the real mode handler
+ * is careful to avoid them, and the regular handler is not delivered
+ * as an NMI.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ hsrr0 = mfspr(SPRN_HSRR0);
+ hsrr1 = mfspr(SPRN_HSRR1);
+ saved_hsrrs = true;
+ }
+
+ hv_nmi_check_nonrecoverable(regs);
+
__this_cpu_inc(irq_stat.sreset_irqs);
/* See if any machine dependent calls */
@@ -433,6 +511,11 @@ out:
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable System Reset");
+ if (saved_hsrrs) {
+ mtspr(SPRN_HSRR0, hsrr0);
+ mtspr(SPRN_HSRR1, hsrr1);
+ }
+
if (!nested)
nmi_exit();
@@ -763,15 +846,15 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
- /* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- nmi_panic(regs, "Unrecoverable Machine check");
-
if (!nested)
nmi_exit();
die("Machine check", regs, SIGBUS);
+ /* Must die if the interrupt is not recoverable */
+ if (!(regs->msr & MSR_RI))
+ nmi_panic(regs, "Unrecoverable Machine check");
+
return;
bail:
@@ -1542,8 +1625,8 @@ bail:
void StackOverflow(struct pt_regs *regs)
{
- printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
- current, regs->gpr[1]);
+ pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n",
+ current->comm, task_pid_nr(current), regs->gpr[1]);
debugger(regs);
show_regs(regs);
panic("kernel stack overflow");
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7cc38b5b58bc..8db4891acdaf 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -74,7 +74,7 @@ void __init udbg_early_init(void)
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
register_early_udbg_console();
#endif
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7725a9714736..a31b6234fcd7 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -798,7 +798,6 @@ static int __init vdso_init(void)
BUG_ON(vdso32_pagelist == NULL);
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso32_pagelist[i] = pg;
}
@@ -812,7 +811,6 @@ static int __init vdso_init(void)
BUG_ON(vdso64_pagelist == NULL);
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
get_page(pg);
vdso64_pagelist[i] = pg;
}
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 50112d4473bb..ce199f6e4256 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -23,6 +23,7 @@ targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 69cecb346269..28e7d112aa2f 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -9,6 +9,7 @@ targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
ccflags-y := -shared -fno-common -fno-builtin
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index ad1c77f71f54..060a1acd7c6d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -12,11 +12,8 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
-#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32)
-#define STRICT_ALIGN_SIZE (1 << 24)
-#else
-#define STRICT_ALIGN_SIZE PAGE_SIZE
-#endif
+#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
+#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT)
ENTRY(_stext)
@@ -86,11 +83,11 @@ SECTIONS
#ifdef CONFIG_PPC64
/*
- * BLOCK(0) overrides the default output section alignment because
+ * ALIGN(0) overrides the default output section alignment because
* this needs to start right after .head.text in order for fixed
* section placement to work.
*/
- .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+ .text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
#ifdef CONFIG_LD_HEAD_STUB_CATCH
KEEP(*(.linker_stub_catch));
. = . ;
@@ -131,7 +128,7 @@ SECTIONS
} :kernel
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
PROVIDE32 (etext = .);
@@ -319,6 +316,7 @@ SECTIONS
*(.sdata2)
*(.got.plt) *(.got)
*(.plt)
+ *(.branch_lt)
}
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 64f1135e7732..3223aec88b2c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -10,11 +10,6 @@ common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
-CFLAGS_e500_mmu.o := -I.
-CFLAGS_e500_mmu_host.o := -I.
-CFLAGS_emulate.o := -I.
-CFLAGS_emulate_loadstore.o := -I.
-
common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index bd1a677dd9e4..9a7dadbe1f17 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -192,6 +192,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
}
EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5a066fc299e1..a3d5318f5d1e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
+
+ /*
+ * If the guest can do FWNMI, exit to userspace so it can
+ * deliver a FWNMI to the guest.
+ * Otherwise we synthesize a machine check for the guest
+ * so that it knows that the machine check occurred.
+ */
+ if (!vcpu->kvm->arch.fwnmi_enabled) {
+ ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
+ kvmppc_core_queue_machine_check(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+
/* Exit to guest with KVM_EXIT_NMI as exit reason */
run->exit_reason = KVM_EXIT_NMI;
run->hw.hardware_exit_reason = vcpu->arch.trap;
@@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
r = RESUME_HOST;
- /* Print the MCE event to host console. */
- machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
@@ -1392,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Pass the machine check to the L1 guest */
r = RESUME_HOST;
/* Print the MCE event to host console. */
- machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
break;
/*
* We get these next two if the guest accesses a page which it thinks
@@ -3455,6 +3469,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long host_dscr = mfspr(SPRN_DSCR);
unsigned long host_tidr = mfspr(SPRN_TIDR);
unsigned long host_iamr = mfspr(SPRN_IAMR);
+ unsigned long host_amr = mfspr(SPRN_AMR);
s64 dec;
u64 tb;
int trap, save_pmu;
@@ -3571,13 +3586,15 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_WORT, 0);
- mtspr(SPRN_AMR, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
mtspr(SPRN_PSPB, 0);
+ if (host_amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_amr);
+
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index e3f738eb1cac..64b5011475c7 100644
--- a/arch/powerpc/kvm/book3s_hv_hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -24,6 +24,7 @@
#include <linux/compiler.h>
#include <asm/paca.h>
#include <asm/hmi.h>
+#include <asm/processor.h>
void wait_for_subcore_guest_exit(void)
{
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0787f12c1a1b..8c24c3bea0bf 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -66,10 +66,8 @@ static void reload_slb(struct kvm_vcpu *vcpu)
/*
* On POWER7, see if we can handle a machine check that occurred inside
* the guest in real mode, without switching to the host partition.
- *
- * Returns: 0 => exit guest, 1 => deliver machine check to guest
*/
-static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
{
unsigned long srr1 = vcpu->arch.shregs.msr;
struct machine_check_event mce_evt;
@@ -111,52 +109,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
}
/*
- * See if we have already handled the condition in the linux host.
- * We assume that if the condition is recovered then linux host
- * will have generated an error log event that we will pick
- * up and log later.
- * Don't release mce event now. We will queue up the event so that
- * we can log the MCE event info on host console.
+ * Now get the event and stash it in the vcpu struct so it can
+ * be handled by the primary thread in virtual mode. We can't
+ * call machine_check_queue_event() here if we are running on
+ * an offline secondary thread.
*/
- if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
- goto out;
-
- if (mce_evt.version == MCE_V1 &&
- (mce_evt.severity == MCE_SEV_NO_ERROR ||
- mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
- handled = 1;
-
-out:
- /*
- * For guest that supports FWNMI capability, hook the MCE event into
- * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
- * exit reason. On our way to exit we will pull this event from vcpu
- * structure and print it from thread 0 of the core/subcore.
- *
- * For guest that does not support FWNMI capability (old QEMU):
- * We are now going enter guest either through machine check
- * interrupt (for unhandled errors) or will continue from
- * current HSRR0 (for handled errors) in guest. Hence
- * queue up the event so that we can log it from host console later.
- */
- if (vcpu->kvm->arch.fwnmi_enabled) {
- /*
- * Hook up the mce event on to vcpu structure.
- * First clear the old event.
- */
- memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
- if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
- vcpu->arch.mce_evt = mce_evt;
- }
- } else
- machine_check_queue_event();
+ if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+ if (handled && mce_evt.version == MCE_V1)
+ mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
+ } else {
+ memset(&mce_evt, 0, sizeof(mce_evt));
+ }
- return handled;
+ vcpu->arch.mce_evt = mce_evt;
}
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
{
- return kvmppc_realmode_mc_power7(vcpu);
+ kvmppc_realmode_mc_power7(vcpu);
}
/* Check if dynamic split is in force and return subcore size accordingly. */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 9b8d50a7cbaf..25043b50cb30 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -58,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
+#define STACK_SLOT_AMR (SFS-80)
+#define STACK_SLOT_UAMOR (SFS-88)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
@@ -726,11 +728,9 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
- mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
- std r8, STACK_SLOT_IAMR(r1)
mfspr r5, SPRN_HFSCR
std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
@@ -738,11 +738,18 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR
mfspr r7, SPRN_DAWRX
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_CIABR(r1)
std r6, STACK_SLOT_DAWR(r1)
std r7, STACK_SLOT_DAWRX(r1)
+ std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mfspr r5, SPRN_AMR
+ std r5, STACK_SLOT_AMR(r1)
+ mfspr r6, SPRN_UAMOR
+ std r6, STACK_SLOT_UAMOR(r1)
+
BEGIN_FTR_SECTION
/* Set partition DABR */
/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -1631,22 +1638,25 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
- mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-8:
- /* Save and reset AMR and UAMOR before turning on the MMU */
+ /* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
+ ld r8, STACK_SLOT_IAMR(r1)
+ mtspr SPRN_IAMR, r8
+
+8: /* Power7 jumps back in here */
mfspr r5,SPRN_AMR
mfspr r6,SPRN_UAMOR
std r5,VCPU_AMR(r9)
std r6,VCPU_UAMOR(r9)
- li r6,0
- mtspr SPRN_AMR,r6
+ ld r5,STACK_SLOT_AMR(r1)
+ ld r6,STACK_SLOT_UAMOR(r1)
+ mtspr SPRN_AMR, r5
mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
@@ -1746,11 +1756,9 @@ BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
- ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
- mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
#ifdef CONFIG_PPC_RADIX_MMU
@@ -2826,49 +2834,15 @@ kvm_cede_exit:
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
- /* Try to handle a machine check in real mode */
+ /* Try to do machine check recovery in real mode */
machine_check_realmode:
mr r3, r9 /* get vcpu pointer */
bl kvmppc_realmode_machine_check
nop
+ /* all machine checks go to virtual mode for further handling */
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- /*
- * For the guest that is FWNMI capable, deliver all the MCE errors
- * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
- * reason. This new approach injects machine check errors in guest
- * address space to guest with additional information in the form
- * of RTAS event, thus enabling guest kernel to suitably handle
- * such errors.
- *
- * For the guest that is not FWNMI capable (old QEMU) fallback
- * to old behaviour for backward compatibility:
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
- * through machine check interrupt (set HSRR0 to 0x200).
- * For handled errors (no-fatal), just go back to guest execution
- * with current HSRR0.
- * if we receive machine check with MSR(RI=0) then deliver it to
- * guest as machine check causing guest to crash.
- */
- ld r11, VCPU_MSR(r9)
- rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
- bne guest_exit_cont /* if so, exit to host */
- /* Check if guest is capable of handling NMI exit */
- ld r10, VCPU_KVM(r9)
- lbz r10, KVM_FWNMI(r10)
- cmpdi r10, 1 /* FWNMI capable? */
- beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */
-
- /* if not, fall through for backward compatibility. */
- andi. r10, r11, MSR_RI /* check for unrecoverable exception */
- beq 1f /* Deliver a machine check to guest */
- ld r10, VCPU_PC(r9)
- cmpdi r3, 0 /* Did we handle MCE ? */
- bne 2f /* Continue guest execution. */
- /* If not, deliver a machine check. SRR0/1 are already set */
-1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
- bl kvmppc_msr_interrupt
-2: b fast_interrupt_c_return
+ b guest_exit_cont
/*
* Call C code to handle a HMI in real mode.
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3bf9fc6fd36c..79396e184bca 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -30,7 +30,8 @@ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
-obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
+ test_emulate_step_exec_instr.o
obj-y += checksum_$(BITS).o checksum_wrappers.o \
string_$(BITS).o memcmp_$(BITS).o
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index d81568f783e5..3d33fb509ef4 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1169,7 +1169,7 @@ static nokprobe_inline int trap_compare(long v1, long v2)
int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
unsigned int instr)
{
- unsigned int opcode, ra, rb, rd, spr, u;
+ unsigned int opcode, ra, rb, rc, rd, spr, u;
unsigned long int imm;
unsigned long int val, val2;
unsigned int mb, me, sh;
@@ -1292,6 +1292,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
rd = (instr >> 21) & 0x1f;
ra = (instr >> 16) & 0x1f;
rb = (instr >> 11) & 0x1f;
+ rc = (instr >> 6) & 0x1f;
switch (opcode) {
#ifdef __powerpc64__
@@ -1305,6 +1306,38 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
goto trap;
return 1;
+#ifdef __powerpc64__
+ case 4:
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+
+ switch (instr & 0x3f) {
+ case 48: /* maddhd */
+ asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 49: /* maddhdu */
+ asm volatile(PPC_MADDHDU(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 51: /* maddld */
+ asm volatile(PPC_MADDLD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+ }
+
+ /*
+ * There are other instructions from ISA 3.0 with the same
+ * primary opcode which do not have emulation support yet.
+ */
+ return -1;
+#endif
+
case 7: /* mulli */
op->val = regs->gpr[ra] * (short) instr;
goto compute_done;
@@ -1671,10 +1704,23 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
(int) regs->gpr[rb];
goto arith_done;
-
+#ifdef __powerpc64__
+ case 265: /* modud */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ op->val = regs->gpr[ra] % regs->gpr[rb];
+ goto compute_done;
+#endif
case 266: /* add */
op->val = regs->gpr[ra] + regs->gpr[rb];
goto arith_done;
+
+ case 267: /* moduw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ op->val = (unsigned int) regs->gpr[ra] %
+ (unsigned int) regs->gpr[rb];
+ goto compute_done;
#ifdef __powerpc64__
case 457: /* divdu */
op->val = regs->gpr[ra] / regs->gpr[rb];
@@ -1695,6 +1741,42 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
(int) regs->gpr[rb];
goto arith_done;
+ case 755: /* darn */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ switch (ra & 0x3) {
+ case 0:
+ /* 32-bit conditioned */
+ asm volatile(PPC_DARN(%0, 0) : "=r" (op->val));
+ goto compute_done;
+
+ case 1:
+ /* 64-bit conditioned */
+ asm volatile(PPC_DARN(%0, 1) : "=r" (op->val));
+ goto compute_done;
+
+ case 2:
+ /* 64-bit raw */
+ asm volatile(PPC_DARN(%0, 2) : "=r" (op->val));
+ goto compute_done;
+ }
+
+ return -1;
+#ifdef __powerpc64__
+ case 777: /* modsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ op->val = (long int) regs->gpr[ra] %
+ (long int) regs->gpr[rb];
+ goto compute_done;
+#endif
+ case 779: /* modsw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ op->val = (int) regs->gpr[ra] %
+ (int) regs->gpr[rb];
+ goto compute_done;
+
/*
* Logical instructions
@@ -1765,6 +1847,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
do_popcnt(regs, op, regs->gpr[rd], 64);
goto logical_done_nocc;
#endif
+ case 538: /* cnttzw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ val = (unsigned int) regs->gpr[rd];
+ op->val = (val ? __builtin_ctz(val) : 32);
+ goto logical_done;
+#ifdef __powerpc64__
+ case 570: /* cnttzd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ val = regs->gpr[rd];
+ op->val = (val ? __builtin_ctzl(val) : 64);
+ goto logical_done;
+#endif
case 922: /* extsh */
op->val = (signed short) regs->gpr[rd];
goto logical_done;
@@ -1866,6 +1962,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->xerval &= ~XER_CA;
set_ca32(op, op->xerval & XER_CA);
goto logical_done;
+
+ case 890: /* extswsli with sh_5 = 0 */
+ case 891: /* extswsli with sh_5 = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -1;
+ op->type = COMPUTE + SETREG;
+ sh = rb | ((instr & 2) << 4);
+ val = (signed int) regs->gpr[rd];
+ if (sh)
+ op->val = ROTATE(val, sh) & MASK64(0, 63 - sh);
+ else
+ op->val = val;
+ goto logical_done;
+
#endif /* __powerpc64__ */
/*
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 6c47daa61614..9992c1ea7a1d 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -1,5 +1,5 @@
/*
- * Simple sanity test for emulate_step load/store instructions.
+ * Simple sanity tests for instruction emulation infrastructure.
*
* Copyright IBM Corp. 2016
*
@@ -14,6 +14,7 @@
#include <linux/ptrace.h>
#include <asm/sstep.h>
#include <asm/ppc-opcode.h>
+#include <asm/code-patching.h>
#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
@@ -48,7 +49,20 @@
___PPC_RA(a) | ___PPC_RB(b))
#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
+#define TEST_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \
+ ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \
+ ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+
+#define MAX_SUBTESTS 16
+#define IGNORE_GPR(n) (0x1UL << (n))
+#define IGNORE_XER (0x1UL << 32)
+#define IGNORE_CCR (0x1UL << 33)
static void __init init_pt_regs(struct pt_regs *regs)
{
@@ -72,9 +86,15 @@ static void __init init_pt_regs(struct pt_regs *regs)
msr_cached = true;
}
-static void __init show_result(char *ins, char *result)
+static void __init show_result(char *mnemonic, char *result)
{
- pr_info("%-14s : %s\n", ins, result);
+ pr_info("%-14s : %s\n", mnemonic, result);
+}
+
+static void __init show_result_with_descr(char *mnemonic, char *descr,
+ char *result)
+{
+ pr_info("%-14s : %-50s %s\n", mnemonic, descr, result);
}
static void __init test_ld(void)
@@ -426,7 +446,7 @@ static void __init test_lxvd2x_stxvd2x(void)
}
#endif /* CONFIG_VSX */
-static int __init test_emulate_step(void)
+static void __init run_tests_load_store(void)
{
test_ld();
test_lwz();
@@ -437,6 +457,513 @@ static int __init test_emulate_step(void)
test_lfdx_stfdx();
test_lvx_stvx();
test_lxvd2x_stxvd2x();
+}
+
+struct compute_test {
+ char *mnemonic;
+ struct {
+ char *descr;
+ unsigned long flags;
+ unsigned int instr;
+ struct pt_regs regs;
+ } subtests[MAX_SUBTESTS + 1];
+};
+
+static struct compute_test compute_tests[] = {
+ {
+ .mnemonic = "nop",
+ .subtests = {
+ {
+ .descr = "R0 = LONG_MAX",
+ .instr = PPC_INST_NOP,
+ .regs = {
+ .gpr[0] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = TEST_ADD(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = TEST_ADD_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = TEST_ADDC(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = TEST_ADDC_DOT(20, 21, 22),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ }
+};
+
+static int __init emulate_compute_instr(struct pt_regs *regs,
+ unsigned int instr)
+{
+ struct instruction_op op;
+
+ if (!regs || !instr)
+ return -EINVAL;
+
+ if (analyse_instr(&op, regs, instr) != 1 ||
+ GETTYPE(op.type) != COMPUTE) {
+ pr_info("emulation failed, instruction = 0x%08x\n", instr);
+ return -EFAULT;
+ }
+
+ emulate_update_regs(regs, &op);
+ return 0;
+}
+
+static int __init execute_compute_instr(struct pt_regs *regs,
+ unsigned int instr)
+{
+ extern int exec_instr(struct pt_regs *regs);
+ extern s32 patch__exec_instr;
+
+ if (!regs || !instr)
+ return -EINVAL;
+
+ /* Patch the NOP with the actual instruction */
+ patch_instruction_site(&patch__exec_instr, instr);
+ if (exec_instr(regs)) {
+ pr_info("execution failed, instruction = 0x%08x\n", instr);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#define gpr_mismatch(gprn, exp, got) \
+ pr_info("GPR%u mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ gprn, exp, got)
+
+#define reg_mismatch(name, exp, got) \
+ pr_info("%s mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ name, exp, got)
+
+static void __init run_tests_compute(void)
+{
+ unsigned long flags;
+ struct compute_test *test;
+ struct pt_regs *regs, exp, got;
+ unsigned int i, j, k, instr;
+ bool ignore_gpr, ignore_xer, ignore_ccr, passed;
+
+ for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
+ test = &compute_tests[i];
+
+ for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
+ instr = test->subtests[j].instr;
+ flags = test->subtests[j].flags;
+ regs = &test->subtests[j].regs;
+ ignore_xer = flags & IGNORE_XER;
+ ignore_ccr = flags & IGNORE_CCR;
+ passed = true;
+
+ memcpy(&exp, regs, sizeof(struct pt_regs));
+ memcpy(&got, regs, sizeof(struct pt_regs));
+
+ /*
+ * Set a compatible MSR value explicitly to ensure
+ * that XER and CR bits are updated appropriately
+ */
+ exp.msr = MSR_KERNEL;
+ got.msr = MSR_KERNEL;
+
+ if (emulate_compute_instr(&got, instr) ||
+ execute_compute_instr(&exp, instr)) {
+ passed = false;
+ goto print;
+ }
+
+ /* Verify GPR values */
+ for (k = 0; k < 32; k++) {
+ ignore_gpr = flags & IGNORE_GPR(k);
+ if (!ignore_gpr && exp.gpr[k] != got.gpr[k]) {
+ passed = false;
+ gpr_mismatch(k, exp.gpr[k], got.gpr[k]);
+ }
+ }
+
+ /* Verify LR value */
+ if (exp.link != got.link) {
+ passed = false;
+ reg_mismatch("LR", exp.link, got.link);
+ }
+
+ /* Verify XER value */
+ if (!ignore_xer && exp.xer != got.xer) {
+ passed = false;
+ reg_mismatch("XER", exp.xer, got.xer);
+ }
+
+ /* Verify CR value */
+ if (!ignore_ccr && exp.ccr != got.ccr) {
+ passed = false;
+ reg_mismatch("CR", exp.ccr, got.ccr);
+ }
+
+print:
+ show_result_with_descr(test->mnemonic,
+ test->subtests[j].descr,
+ passed ? "PASS" : "FAIL");
+ }
+ }
+}
+
+static int __init test_emulate_step(void)
+{
+ printk(KERN_INFO "Running instruction emulation self-tests ...\n");
+ run_tests_load_store();
+ run_tests_compute();
return 0;
}
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
new file mode 100644
index 000000000000..1580f34f4f4f
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Non-emulated single-stepping support (currently limited to basic integer
+ * computations) used to validate the instruction emulation infrastructure.
+ *
+ * Copyright (C) 2019 IBM Corporation
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <linux/errno.h>
+
+/* int exec_instr(struct pt_regs *regs) */
+_GLOBAL(exec_instr)
+
+ /*
+ * Stack frame layout (INT_FRAME_SIZE bytes)
+ * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD)
+ * Scratch space (SP + 8)
+ * Back chain (SP + 0)
+ */
+
+ /*
+ * Allocate a new stack frame with enough space to hold the register
+ * states in an in-memory pt_regs and also create the back chain to
+ * the caller's stack frame.
+ */
+ stdu r1, -INT_FRAME_SIZE(r1)
+
+ /*
+ * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2)
+ * and local variables (GPR14 to GPR31). The register for the pt_regs
+ * parameter (GPR3) is saved additionally to ensure that the resulting
+ * register state can still be saved even if GPR3 gets overwritten
+ * when loading the initial register state for the test instruction.
+ * The stack pointer (GPR1) and the thread pointer (GPR13) are not
+ * saved as these should not be modified anyway.
+ */
+ SAVE_2GPRS(2, r1)
+ SAVE_NVGPRS(r1)
+
+ /*
+ * Save LR on stack to ensure that the return address is available
+ * even if it gets overwritten by the test instruction.
+ */
+ mflr r0
+ std r0, _LINK(r1)
+
+ /*
+ * Save CR on stack. For simplicity, the entire register is saved
+ * even though only fields 2 to 4 are non-volatile.
+ */
+ mfcr r0
+ std r0, _CCR(r1)
+
+ /*
+ * Load register state for the test instruction without touching the
+ * critical non-volatile registers. The register state is passed as a
+ * pointer to a pt_regs instance.
+ */
+ subi r31, r3, GPR0
+
+ /* Load LR from pt_regs */
+ ld r0, _LINK(r31)
+ mtlr r0
+
+ /* Load CR from pt_regs */
+ ld r0, _CCR(r31)
+ mtcr r0
+
+ /* Load XER from pt_regs */
+ ld r0, _XER(r31)
+ mtxer r0
+
+ /* Load GPRs from pt_regs */
+ REST_GPR(0, r31)
+ REST_10GPRS(2, r31)
+ REST_GPR(12, r31)
+ REST_NVGPRS(r31)
+
+ /* Placeholder for the test instruction */
+1: nop
+ patch_site 1b patch__exec_instr
+
+ /*
+ * Since GPR3 is overwritten, temporarily restore it back to its
+ * original state, i.e. the pointer to pt_regs, to ensure that the
+ * resulting register state can be saved. Before doing this, a copy
+ * of it is created in the scratch space which is used later on to
+ * save it to pt_regs.
+ */
+ std r3, 8(r1)
+ REST_GPR(3, r1)
+
+ /* Save resulting GPR state to pt_regs */
+ subi r3, r3, GPR0
+ SAVE_GPR(0, r3)
+ SAVE_GPR(2, r3)
+ SAVE_8GPRS(4, r3)
+ SAVE_GPR(12, r3)
+ SAVE_NVGPRS(r3)
+
+ /* Save resulting LR to pt_regs */
+ mflr r0
+ std r0, _LINK(r3)
+
+ /* Save resulting CR to pt_regs */
+ mfcr r0
+ std r0, _CCR(r3)
+
+ /* Save resulting XER to pt_regs */
+ mfxer r0
+ std r0, _XER(r3)
+
+ /* Restore resulting GPR3 from scratch space and save it to pt_regs */
+ ld r0, 8(r1)
+ std r0, GPR3(r3)
+
+ /* Set return value to denote execution success */
+ li r3, 0
+
+ /* Continue */
+ b 3f
+
+ /* Set return value to denote execution failure */
+2: li r3, -EFAULT
+
+ /* Restore the non-volatile GPRs from stack */
+3: REST_GPR(2, r1)
+ REST_NVGPRS(r1)
+
+ /* Restore LR from stack to be able to return */
+ ld r0, _LINK(r1)
+ mtlr r0
+
+ /* Restore CR from stack */
+ ld r0, _CCR(r1)
+ mtcr r0
+
+ /* Tear down stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ /* Return */
+ blr
+
+ /* Setup exception table */
+ EX_TABLE(1b, 2b)
+
+_ASM_NOKPROBE_SYMBOL(exec_instr)
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 494df26c5988..a8794032f15f 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -17,4 +17,4 @@ obj-$(CONFIG_SPE) += math_efp.o
CFLAGS_fabs.o = -fno-builtin-fabs
CFLAGS_math.o = -fno-builtin-fabs
-ccflags-y = -I. -Iinclude/math-emu -w
+ccflags-y = -w
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 61ac468c87c6..b9cf6f8764b0 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -93,7 +93,7 @@ void __init MMU_init_hw(void)
#define LARGE_PAGE_SIZE_16M (1<<24)
#define LARGE_PAGE_SIZE_4M (1<<22)
-unsigned long __init mmu_mapin_ram(unsigned long top)
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long v, s, mapped;
phys_addr_t p;
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index ea2b9af08a48..aad127acdbaa 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -170,7 +170,7 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
-unsigned long __init mmu_mapin_ram(unsigned long top)
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long addr;
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index bfa503cff351..fe1f6443d57f 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -66,26 +66,22 @@ unsigned long p_block_mapped(phys_addr_t pa)
void __init MMU_init_hw(void)
{
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
-#ifdef CONFIG_PIN_TLB_DATA
- unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
- unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
-#ifdef CONFIG_PIN_TLB_IMMR
- int i = 29;
-#else
- int i = 28;
-#endif
- unsigned long addr = 0;
- unsigned long mem = total_lowmem;
-
- for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
- mtspr(SPRN_MD_CTR, ctr | (i << 8));
- mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
- mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
- addr += LARGE_PAGE_SIZE_8M;
- mem -= LARGE_PAGE_SIZE_8M;
+ if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) {
+ unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
+ int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28;
+ unsigned long addr = 0;
+ unsigned long mem = total_lowmem;
+
+ for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
+ mtspr(SPRN_MD_CTR, ctr | (i << 8));
+ mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
+ addr += LARGE_PAGE_SIZE_8M;
+ mem -= LARGE_PAGE_SIZE_8M;
+ }
}
-#endif
}
static void __init mmu_mapin_immr(void)
@@ -98,26 +94,36 @@ static void __init mmu_mapin_immr(void)
map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
}
-static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
+static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
{
modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16);
}
-unsigned long __init mmu_mapin_ram(unsigned long top)
+static void mmu_patch_addis(s32 *site, long simm)
+{
+ unsigned int instr = *(unsigned int *)patch_site_addr(site);
+
+ instr &= 0xffff0000;
+ instr |= ((unsigned long)simm) >> 16;
+ patch_instruction_site(site, instr);
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long mapped;
if (__map_without_ltlbs) {
mapped = 0;
mmu_mapin_immr();
-#ifndef CONFIG_PIN_TLB_IMMR
- patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
-#endif
-#ifndef CONFIG_PIN_TLB_TEXT
- mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
-#endif
+ if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR))
+ patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
+ if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
} else {
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
+ if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
+ _ALIGN(__pa(_einittext), 8 << 20));
}
mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
@@ -138,6 +144,26 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
return mapped;
}
+void mmu_mark_initmem_nx(void)
+{
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
+ mmu_patch_addis(&patch__itlbmiss_linmem_top8,
+ -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
+ if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mmu_mark_rodata_ro(void)
+{
+ if (CONFIG_DATA_SHIFT < 23)
+ mmu_patch_addis(&patch__dtlbmiss_romem_top8,
+ -__pa(((unsigned long)_sinittext) &
+ ~(LARGE_PAGE_SIZE_8M - 1)));
+ mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+}
+#endif
+
void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
@@ -146,8 +172,8 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
- /* 8xx can only access 24MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
+ /* 8xx can only access 32MB at the moment */
+ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000));
}
/*
@@ -162,14 +188,11 @@ void set_context(unsigned long id, pgd_t *pgd)
{
s16 offset = (s16)(__pa(swapper_pg_dir));
-#ifdef CONFIG_BDI_SWITCH
- pgd_t **ptr = *(pgd_t ***)(KERNELBASE + 0xf0);
-
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is passed as second argument.
*/
- *(ptr + 1) = pgd;
-#endif
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = pgd;
/* Register M_TWB will contain base address of level 1 table minus the
* lower part of the kernel PGDIR base address, so that all accesses to
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f965fc33a8b7..d52ec118e09d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -45,13 +45,10 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
-obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
-ifdef CONFIG_PPC_PTDUMP
-obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o
-obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o
-obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o
-obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o dump_bats.o dump_sr.o
-obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o
-endif
-obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
+obj-$(CONFIG_PPC_PTDUMP) += ptdump/
obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb_nohash.o := n
+KCOV_INSTRUMENT_fsl_booke_mmu.o := n
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index e955539686a4..b5d2658c26af 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
@@ -151,8 +152,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi
* Allocate DMA-coherent memory space and return both the kernel remapped
* virtual and bus address for that space.
*/
-void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
{
struct page *page;
struct ppc_vm_region *c;
@@ -253,7 +254,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size,
/*
* free a page as defined by the above mapping.
*/
-void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
struct ppc_vm_region *c;
@@ -313,7 +314,7 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
/*
* make an area consistent.
*/
-void __dma_sync(void *vaddr, size_t size, int direction)
+static void __dma_sync(void *vaddr, size_t size, int direction)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
@@ -339,7 +340,6 @@ void __dma_sync(void *vaddr, size_t size, int direction)
break;
}
}
-EXPORT_SYMBOL(__dma_sync);
#ifdef CONFIG_HIGHMEM
/*
@@ -386,28 +386,42 @@ static inline void __dma_sync_page_highmem(struct page *page,
* __dma_sync_page makes memory consistent. identical to __dma_sync, but
* takes a struct page instead of a virtual address
*/
-void __dma_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction)
+static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
{
+ struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+ unsigned offset = paddr & ~PAGE_MASK;
+
#ifdef CONFIG_HIGHMEM
- __dma_sync_page_highmem(page, offset, size, direction);
+ __dma_sync_page_highmem(page, offset, size, dir);
#else
unsigned long start = (unsigned long)page_address(page) + offset;
- __dma_sync((void *)start, size, direction);
+ __dma_sync((void *)start, size, dir);
#endif
}
-EXPORT_SYMBOL(__dma_sync_page);
+
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
+{
+ __dma_sync_page(paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
+{
+ __dma_sync_page(paddr, size, dir);
+}
/*
- * Return the PFN for a given cpu virtual address returned by
- * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent()
+ * Return the PFN for a given cpu virtual address returned by arch_dma_alloc.
*/
-unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
+long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
+ dma_addr_t dma_addr)
{
/* This should always be populated, so we don't test every
* level. If that fails, we'll have a nice crash which
* will be as good as a BUG_ON()
*/
+ unsigned long cpu_addr = (unsigned long)vaddr;
pgd_t *pgd = pgd_offset_k(cpu_addr);
pud_t *pud = pud_offset(pgd, cpu_addr);
pmd_t *pmd = pmd_offset(pud, cpu_addr);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 080d49b26c3a..210cbc1faf63 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -221,7 +221,7 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun)
#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
#endif
-unsigned long __init mmu_mapin_ram(unsigned long top)
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
}
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 1e2df3e9f9ea..1f13494efb2b 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -47,14 +47,13 @@ mmu_hash_lock:
* Returns to the caller if the access is illegal or there is no
* mapping for the address. Otherwise it places an appropriate PTE
* in the hash table and returns from the exception.
- * Uses r0, r3 - r8, r10, ctr, lr.
+ * Uses r0, r3 - r6, r8, r10, ctr, lr.
*/
.text
_GLOBAL(hash_page)
- tophys(r7,0) /* gets -KERNELBASE into r7 */
#ifdef CONFIG_SMP
- addis r8,r7,mmu_hash_lock@h
- ori r8,r8,mmu_hash_lock@l
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@h
+ ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
lis r0,0x0fff
b 10f
11: lwz r6,0(r8)
@@ -70,14 +69,13 @@ _GLOBAL(hash_page)
/* Get PTE (linux-style) and check access */
lis r0,KERNELBASE@h /* check if kernel address */
cmplw 0,r4,r0
- mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
- lwz r5,PGDIR(r8) /* virt page-table root */
+ mfspr r5, SPRN_SPRG_PGDIR /* virt page-table root */
blt+ 112f /* assume user more likely */
lis r5,swapper_pg_dir@ha /* if kernel address, use */
addi r5,r5,swapper_pg_dir@l /* kernel page table */
rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
-112: add r5,r5,r7 /* convert to phys addr */
+112: tophys(r5, r5)
#ifndef CONFIG_PTE_64BIT
rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */
lwz r8,0(r5) /* get pmd entry */
@@ -144,25 +142,24 @@ retry:
#ifdef CONFIG_SMP
eieio
- addis r8,r7,mmu_hash_lock@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0,mmu_hash_lock@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
#endif
/* Return from the exception */
lwz r5,_CTR(r11)
mtctr r5
lwz r0,GPR0(r11)
- lwz r7,GPR7(r11)
lwz r8,GPR8(r11)
b fast_exception_return
#ifdef CONFIG_SMP
hash_page_out:
eieio
- addis r8,r7,mmu_hash_lock@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0,mmu_hash_lock@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
blr
#endif /* CONFIG_SMP */
@@ -186,8 +183,7 @@ _GLOBAL(add_hash_page)
add r3,r3,r0 /* note create_hpte trims to 24 bits */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r8, r1) /* use cpu number to make tag */
- lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
+ lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */
oris r8,r8,12
#endif /* CONFIG_SMP */
@@ -208,11 +204,9 @@ _GLOBAL(add_hash_page)
SYNC_601
isync
- tophys(r7,0)
-
#ifdef CONFIG_SMP
- addis r6,r7,mmu_hash_lock@ha
- addi r6,r6,mmu_hash_lock@l
+ lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
10: lwarx r0,0,r6 /* take the mmu_hash_lock */
cmpi 0,r0,0
bne- 11f
@@ -257,8 +251,8 @@ _GLOBAL(add_hash_page)
9:
#ifdef CONFIG_SMP
- addis r6,r7,mmu_hash_lock@ha
- addi r6,r6,mmu_hash_lock@l
+ lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
eieio
li r0,0
stw r0,0(r6) /* clear mmu_hash_lock */
@@ -278,10 +272,8 @@ _GLOBAL(add_hash_page)
* It is designed to be called with the MMU either on or off.
* r3 contains the VSID, r4 contains the virtual address,
* r5 contains the linux PTE, r6 contains the old value of the
- * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
- * offset to be added to addresses (0 if the MMU is on,
- * -KERNELBASE if it is off). r10 contains the upper half of
- * the PTE if CONFIG_PTE_64BIT.
+ * linux PTE (before setting _PAGE_HASHPTE). r10 contains the
+ * upper half of the PTE if CONFIG_PTE_64BIT.
* On SMP, the caller should have the mmu_hash_lock held.
* We assume that the caller has (or will) set the _PAGE_HASHPTE
* bit in the linux PTE in memory. The value passed in r6 should
@@ -342,7 +334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
patch_site 1f, patch__hash_page_A1
patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-0: addis r0,r7,Hash_base@h /* base address of hash table */
+0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
@@ -356,10 +348,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
beq+ 10f /* no PTE: go look for an empty slot */
tlbie r4
- addis r4,r7,htab_hash_searches@ha
- lwz r6,htab_hash_searches@l(r4)
+ lis r4, (htab_hash_searches - PAGE_OFFSET)@ha
+ lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
addi r6,r6,1 /* count how many searches we do */
- stw r6,htab_hash_searches@l(r4)
+ stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
mtctr r0
@@ -391,10 +383,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
beq+ found_empty
/* update counter of times that the primary PTEG is full */
- addis r4,r7,primary_pteg_full@ha
- lwz r6,primary_pteg_full@l(r4)
+ lis r4, (primary_pteg_full - PAGE_OFFSET)@ha
+ lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
addi r6,r6,1
- stw r6,primary_pteg_full@l(r4)
+ stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
@@ -428,8 +420,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
* lockup here but that shouldn't happen
*/
-1: addis r4,r7,next_slot@ha /* get next evict slot */
- lwz r6,next_slot@l(r4)
+1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */
+ lwz r6, (next_slot - PAGE_OFFSET)@l(r4)
addi r6,r6,HPTE_SIZE /* search for candidate */
andi. r6,r6,7*HPTE_SIZE
stw r6,next_slot@l(r4)
@@ -501,8 +493,6 @@ htab_hash_searches:
* We assume that there is a hash table in use (Hash != 0).
*/
_GLOBAL(flush_hash_pages)
- tophys(r7,0)
-
/*
* We disable interrupts here, even on UP, because we want
* the _PAGE_HASHPTE bit to be a reliable indication of
@@ -547,11 +537,9 @@ _GLOBAL(flush_hash_pages)
SET_V(r11) /* set V (valid) bit */
#ifdef CONFIG_SMP
- addis r9,r7,mmu_hash_lock@ha
- addi r9,r9,mmu_hash_lock@l
- CURRENT_THREAD_INFO(r8, r1)
- add r8,r8,r7
- lwz r8,TI_CPU(r8)
+ lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l
+ lwz r8,TASK_CPU(r2)
oris r8,r8,9
10: lwarx r0,0,r9
cmpi 0,r0,0
@@ -584,7 +572,7 @@ _GLOBAL(flush_hash_pages)
patch_site 1f, patch__flush_hash_A1
patch_site 2f, patch__flush_hash_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-0: addis r8,r7,Hash_base@h /* base address of hash table */
+0: lis r8, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r8,r0,r8 /* make primary hash */
@@ -646,8 +634,7 @@ EXPORT_SYMBOL(flush_hash_pages)
*/
_GLOBAL(_tlbie)
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r8, r1)
- lwz r8,TI_CPU(r8)
+ lwz r8,TASK_CPU(r2)
oris r8,r8,11
mfmsr r10
SYNC
@@ -684,8 +671,7 @@ _GLOBAL(_tlbie)
*/
_GLOBAL(_tlbia)
#if defined(CONFIG_SMP)
- CURRENT_THREAD_INFO(r8, r1)
- lwz r8,TI_CPU(r8)
+ lwz r8,TASK_CPU(r2)
oris r8,r8,10
mfmsr r10
SYNC
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0cc7fbc3bd1c..3d4b2399192f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -908,9 +908,9 @@ static void __init htab_initialize(void)
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled()) {
linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(memblock_alloc_base(
- linear_map_hash_count, 1, ppc64_rma_size));
- memset(linear_map_hash_slots, 0, linear_map_hash_count);
+ linear_map_hash_slots = memblock_alloc_try_nid(
+ linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
@@ -1889,12 +1889,12 @@ static int hpt_order_set(void *data, u64 val)
return mmu_hash_ops.resize_hpt(val);
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
static int __init hash64_debugfs(void)
{
- if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root,
- NULL, &fops_hpt_order)) {
+ if (!debugfs_create_file_unsafe("hpt_order", 0600, powerpc_debugfs_root,
+ NULL, &fops_hpt_order)) {
pr_err("lpar: unable to create hpt_order debugsfs file\n");
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 2e6a8f9345d3..b0d9209d9a86 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -26,7 +26,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
real_pte_t rpte;
unsigned long vpn;
unsigned long old_pte, new_pte;
- unsigned long rflags, pa, sz;
+ unsigned long rflags, pa;
long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
@@ -73,7 +73,6 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
offset = PTRS_PER_PMD;
rpte = __real_pte(__pte(old_pte), ptep, offset);
- sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
/* No CPU has hugepages but lacks no execute, so we
* don't need to worry about that case */
@@ -121,3 +120,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long pte_val;
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep,
+ _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+ return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+
+ if (radix_enabled())
+ return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+ old_pte, pte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 2486bee0f93e..cab06331c0c0 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/security.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -73,7 +74,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (high_limit - len >= addr &&
+ if (high_limit - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -83,10 +84,27 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
*/
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
- info.low_limit = PAGE_SIZE;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
}
+
+void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * To avoid NMMU hang while relaxing access we need to flush the tlb before
+ * we set the new value.
+ */
+ if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ radix__flush_hugetlb_page(vma, addr);
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 3e59e5d64b01..41a3513cadc9 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -108,12 +108,8 @@ static void __init MMU_setup(void)
__map_without_bats = 1;
__map_without_ltlbs = 1;
}
-#ifdef CONFIG_STRICT_KERNEL_RWX
- if (rodata_enabled) {
- __map_without_bats = 1;
+ if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx))
__map_without_ltlbs = 1;
- }
-#endif
}
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a5091c034747..a4c155af1597 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -274,7 +274,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
for (; start < end; start += page_size) {
unsigned long nr_pages, addr;
- struct page *section_base;
struct page *page;
/*
@@ -290,7 +289,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
continue;
page = pfn_to_page(addr >> PAGE_SHIFT);
- section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order;
base_pfn = PHYS_PFN(addr);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 33cc6f676fa6..f6787f90e158 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -69,22 +69,14 @@ pte_t *kmap_pte;
EXPORT_SYMBOL(kmap_pte);
pgprot_t kmap_prot;
EXPORT_SYMBOL(kmap_prot);
-#define TOP_ZONE ZONE_HIGHMEM
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
vaddr), vaddr), vaddr);
}
-#else
-#define TOP_ZONE ZONE_NORMAL
#endif
-int page_is_ram(unsigned long pfn)
-{
- return memblock_is_memory(__pfn_to_phys(pfn));
-}
-
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -176,34 +168,6 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size,
#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
-/*
- * walk_memory_resource() needs to make sure there is no holes in a given
- * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
- * Instead it maintains it in memblock.memory structures. Walk through the
- * memory regions, find holes and callback for contiguous regions.
- */
-int
-walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
- void *arg, int (*func)(unsigned long, unsigned long, void *))
-{
- struct memblock_region *reg;
- unsigned long end_pfn = start_pfn + nr_pages;
- unsigned long tstart, tend;
- int ret = -1;
-
- for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart >= tend)
- continue;
- ret = (*func)(tstart, tend - tstart, arg);
- if (ret)
- break;
- }
- return ret;
-}
-EXPORT_SYMBOL_GPL(walk_system_ram_range);
-
#ifndef CONFIG_NEED_MULTIPLE_NODES
void __init mem_topology_setup(void)
{
@@ -262,25 +226,6 @@ static int __init mark_nonram_nosave(void)
static unsigned long max_zone_pfns[MAX_NR_ZONES];
/*
- * Find the least restrictive zone that is entirely below the
- * specified pfn limit. Returns < 0 if no suitable zone is found.
- *
- * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
- * systems -- the DMA limit can be higher than any possible real pfn.
- */
-int dma_pfn_limit_to_zone(u64 pfn_limit)
-{
- int i;
-
- for (i = TOP_ZONE; i >= 0; i--) {
- if (max_zone_pfns[i] <= pfn_limit)
- return i;
- }
-
- return -EPERM;
-}
-
-/*
* paging_init() sets up the page tables - in fact we've already done this.
*/
void __init paging_init(void)
@@ -585,3 +530,9 @@ int devmem_is_allowed(unsigned long pfn)
return 0;
}
#endif /* CONFIG_STRICT_DEVMEM */
+
+/*
+ * This is defined in kernel/resource.c but only powerpc needs to export it, for
+ * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
+ */
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index a712a650a8b6..e7a9c4f6bfca 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -21,6 +21,7 @@
#include <linux/sizes.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
+#include <linux/mm_inline.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t {
atomic64_t mapped;
unsigned int pageshift;
u64 ua; /* userspace address */
- u64 entries; /* number of entries in hpas[] */
- u64 *hpas; /* vmalloc'ed */
+ u64 entries; /* number of entries in hpas/hpages[] */
+ /*
+ * in mm_iommu_get we temporarily use this to store
+ * struct page address.
+ *
+ * We need to convert ua to hpa in real mode. Make it
+ * simpler by storing physical address.
+ */
+ union {
+ struct page **hpages; /* vmalloc'ed */
+ phys_addr_t *hpas;
+ };
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
u64 dev_hpa; /* Device memory base address */
};
@@ -80,64 +91,13 @@ bool mm_iommu_preregistered(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
-/*
- * Taken from alloc_migrate_target with changes to remove CMA allocations
- */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
-{
- gfp_t gfp_mask = GFP_USER;
- struct page *new_page;
-
- if (PageCompound(page))
- return NULL;
-
- if (PageHighMem(page))
- gfp_mask |= __GFP_HIGHMEM;
-
- /*
- * We don't want the allocation to force an OOM if possibe
- */
- new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
- return new_page;
-}
-
-static int mm_iommu_move_page_from_cma(struct page *page)
-{
- int ret = 0;
- LIST_HEAD(cma_migrate_pages);
-
- /* Ignore huge pages for now */
- if (PageCompound(page))
- return -EBUSY;
-
- lru_add_drain();
- ret = isolate_lru_page(page);
- if (ret)
- return ret;
-
- list_add(&page->lru, &cma_migrate_pages);
- put_page(page); /* Drop the gup reference */
-
- ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
- if (ret) {
- if (!list_empty(&cma_migrate_pages))
- putback_movable_pages(&cma_migrate_pages);
- }
-
- return 0;
-}
-
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
- unsigned long entries, unsigned long dev_hpa,
- struct mm_iommu_table_group_mem_t **pmem)
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
- long i, j, ret = 0, locked_entries = 0;
+ long i, ret, locked_entries = 0;
unsigned int pageshift;
- unsigned long flags;
- unsigned long cur_ua;
- struct page *page = NULL;
mutex_lock(&mem_list_mutex);
@@ -187,62 +147,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
goto unlock_exit;
}
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
+ up_read(&mm->mmap_sem);
+ if (ret != entries) {
+ /* free the reference taken */
+ for (i = 0; i < ret; i++)
+ put_page(mem->hpages[i]);
+
+ vfree(mem->hpas);
+ kfree(mem);
+ ret = -EFAULT;
+ goto unlock_exit;
+ }
+
+ pageshift = PAGE_SHIFT;
for (i = 0; i < entries; ++i) {
- cur_ua = ua + (i << PAGE_SHIFT);
- if (1 != get_user_pages_fast(cur_ua,
- 1/* pages */, 1/* iswrite */, &page)) {
- ret = -EFAULT;
- for (j = 0; j < i; ++j)
- put_page(pfn_to_page(mem->hpas[j] >>
- PAGE_SHIFT));
- vfree(mem->hpas);
- kfree(mem);
- goto unlock_exit;
- }
+ struct page *page = mem->hpages[i];
+
/*
- * If we get a page from the CMA zone, since we are going to
- * be pinning these entries, we might as well move them out
- * of the CMA zone if possible. NOTE: faulting in + migration
- * can be expensive. Batching can be considered later
+ * Allow to use larger than 64k IOMMU pages. Only do that
+ * if we are backed by hugetlb.
*/
- if (is_migrate_cma_page(page)) {
- if (mm_iommu_move_page_from_cma(page))
- goto populate;
- if (1 != get_user_pages_fast(cur_ua,
- 1/* pages */, 1/* iswrite */,
- &page)) {
- ret = -EFAULT;
- for (j = 0; j < i; ++j)
- put_page(pfn_to_page(mem->hpas[j] >>
- PAGE_SHIFT));
- vfree(mem->hpas);
- kfree(mem);
- goto unlock_exit;
- }
- }
-populate:
- pageshift = PAGE_SHIFT;
- if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
- pte_t *pte;
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
struct page *head = compound_head(page);
- unsigned int compshift = compound_order(head);
- unsigned int pteshift;
-
- local_irq_save(flags); /* disables as well */
- pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift);
-
- /* Double check it is still the same pinned page */
- if (pte && pte_page(*pte) == head &&
- pteshift == compshift + PAGE_SHIFT)
- pageshift = max_t(unsigned int, pteshift,
- PAGE_SHIFT);
- local_irq_restore(flags);
+
+ pageshift = compound_order(head) + PAGE_SHIFT;
}
mem->pageshift = min(mem->pageshift, pageshift);
+ /*
+ * We don't need struct page reference any more, switch
+ * to physical address.
+ */
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
good_exit:
+ ret = 0;
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c4a717da65eb..74ff61dabcb1 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -130,7 +130,7 @@ extern void wii_memory_fixups(void);
*/
#ifdef CONFIG_PPC32
extern void MMU_init_hw(void);
-extern unsigned long mmu_mapin_ram(unsigned long top);
+unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -165,3 +165,11 @@ unsigned long p_block_mapped(phys_addr_t pa);
static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; }
static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx)
+void mmu_mark_initmem_nx(void);
+void mmu_mark_rodata_ro(void);
+#else
+static inline void mmu_mark_initmem_nx(void) { }
+static inline void mmu_mark_rodata_ro(void) { }
+#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 87f0dd004295..ac49e4158e50 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -84,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void)
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
- dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
+ dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
}
static int __init fake_numa_create_new_node(unsigned long end_pfn,
@@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid,
*/
static int associativity_to_nid(const __be32 *associativity)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
if (min_common_depth == -1)
goto out;
@@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity)
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
- nid = -1;
+ nid = NUMA_NO_NODE;
if (nid > 0 &&
of_read_number(associativity, 1) >= distance_ref_points_depth) {
@@ -244,7 +244,7 @@ out:
*/
static int of_node_to_nid_single(struct device_node *device)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
const __be32 *tmp;
tmp = of_get_associativity(device);
@@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device)
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
of_node_get(device);
while (device) {
@@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
*/
static int numa_setup_cpu(unsigned long lcpu)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
struct device_node *cpu;
/*
@@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
{
struct drmem_lmb *lmb;
unsigned long lmb_size;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
lmb_size = drmem_lmb_size();
@@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
static int hot_add_node_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
@@ -1460,13 +1460,6 @@ static void reset_topology_timer(void)
#ifdef CONFIG_SMP
-static void stage_topology_update(int core_id)
-{
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
- reset_topology_timer();
-}
-
static int dt_update_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -1479,7 +1472,7 @@ static int dt_update_callback(struct notifier_block *nb,
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
of_property_read_u32(update->dn, "reg", &core_id);
- stage_topology_update(core_id);
+ rc = dlpar_cpu_readd(core_id);
rc = NOTIFY_OK;
}
break;
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c
index e0ccf36714b2..53cbc7dc2df2 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -57,12 +57,8 @@ void vmemmap_remove_mapping(unsigned long start,
static __ref void *early_alloc_pgtable(unsigned long size)
{
- void *pt;
-
- pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
- memset(pt, 0, size);
-
- return pt;
+ return memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT,
+ __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE);
}
/*
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index ecd31569a120..92a3e4c39540 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -195,11 +195,8 @@ void __init mmu_partition_table_init(void)
unsigned long ptcr;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
/* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
+ partition_tb = memblock_alloc(patb_size, patb_size);
/*
* update partition table control register,
@@ -401,6 +398,31 @@ void arch_report_meminfo(struct seq_file *m)
}
#endif /* CONFIG_PROC_FS */
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ unsigned long pte_val;
+
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
+
+ return __pte(pte_val);
+
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ if (radix_enabled())
+ return radix__ptep_modify_prot_commit(vma, addr,
+ ptep, old_pte, pte);
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+
/*
* For hash translation mode, we use the deposited table to store hash slot
* information and they are stored at PTRS_PER_PMD offset from related pmd
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 931156069a81..e377684ac6ad 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -51,26 +51,15 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
{
- unsigned long pa = 0;
- void *pt;
+ phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
+ phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
- if (region_start || region_end) /* has region hint */
- pa = memblock_alloc_range(size, size, region_start, region_end,
- MEMBLOCK_NONE);
- else if (nid != -1) /* has node hint */
- pa = memblock_alloc_base_nid(size, size,
- MEMBLOCK_ALLOC_ANYWHERE,
- nid, MEMBLOCK_NONE);
+ if (region_start)
+ min_addr = region_start;
+ if (region_end)
+ max_addr = region_end;
- if (!pa)
- pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
-
- BUG_ON(!pa);
-
- pt = __va(pa);
- memset(pt, 0, size);
-
- return pt;
+ return memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
}
static int early_map_kernel_page(unsigned long ea, unsigned long pa,
@@ -1063,3 +1052,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
}
/* See ptesync comment in radix__set_pte_at */
}
+
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * To avoid NMMU hang while relaxing access we need to flush the tlb before
+ * we set the new value. We need to do this only for radix, because hash
+ * translation does flush when updating the linux pte.
+ */
+ if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ radix__flush_tlb_page(vma, addr);
+
+ set_pte_at(mm, addr, ptep, pte);
+}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index ded71126ce4c..6e56a6240bfa 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -254,26 +254,20 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
void __init mapin_ram(void)
{
- unsigned long s, top;
-
-#ifndef CONFIG_WII
- top = total_lowmem;
- s = mmu_mapin_ram(top);
- __mapin_ram_chunk(s, top);
-#else
- if (!wii_hole_size) {
- s = mmu_mapin_ram(total_lowmem);
- __mapin_ram_chunk(s, total_lowmem);
- } else {
- top = wii_hole_start;
- s = mmu_mapin_ram(top);
- __mapin_ram_chunk(s, top);
-
- top = memblock_end_of_DRAM();
- s = wii_mmu_mapin_mem2(top);
- __mapin_ram_chunk(s, top);
+ struct memblock_region *reg;
+
+ for_each_memblock(memory, reg) {
+ phys_addr_t base = reg->base;
+ phys_addr_t top = min(base + reg->size, total_lowmem);
+
+ if (base >= top)
+ continue;
+ base = mmu_mapin_ram(base, top);
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ __mapin_ram_chunk(reg->base, top);
+ else
+ __mapin_ram_chunk(base, top);
}
-#endif
}
/* Scan the real Linux page tables and return a PTE pointer for
@@ -359,7 +353,10 @@ void mark_initmem_nx(void)
unsigned long numpages = PFN_UP((unsigned long)_einittext) -
PFN_DOWN((unsigned long)_sinittext);
- change_page_attr(page, numpages, PAGE_KERNEL);
+ if (v_block_mapped((unsigned long)_stext) + 1)
+ mmu_mark_initmem_nx();
+ else
+ change_page_attr(page, numpages, PAGE_KERNEL);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -368,6 +365,11 @@ void mark_rodata_ro(void)
struct page *page;
unsigned long numpages;
+ if (v_block_mapped((unsigned long)_sinittext)) {
+ mmu_mark_rodata_ro();
+ return;
+ }
+
page = virt_to_page(_stext);
numpages = PFN_UP((unsigned long)_etext) -
PFN_DOWN((unsigned long)_stext);
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 3f4193201ee7..6c8a60b1e31d 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -32,6 +32,7 @@
#include <asm/mmu.h>
#include <asm/machdep.h>
#include <asm/code-patching.h>
+#include <asm/sections.h>
#include "mmu_decl.h"
@@ -73,45 +74,171 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-unsigned long __init mmu_mapin_ram(unsigned long top)
+static int find_free_bat(void)
{
- unsigned long tot, bl, done;
- unsigned long max_size = (256<<20);
+ int b;
+
+ if (cpu_has_feature(CPU_FTR_601)) {
+ for (b = 0; b < 4; b++) {
+ struct ppc_bat *bat = BATS[b];
+
+ if (!(bat[0].batl & 0x40))
+ return b;
+ }
+ } else {
+ int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+
+ for (b = 0; b < n; b++) {
+ struct ppc_bat *bat = BATS[b];
+
+ if (!(bat[1].batu & 3))
+ return b;
+ }
+ }
+ return -1;
+}
+
+static unsigned int block_size(unsigned long base, unsigned long top)
+{
+ unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
+ unsigned int base_shift = (fls(base) - 1) & 31;
+ unsigned int block_shift = (fls(top - base) - 1) & 31;
+
+ return min3(max_size, 1U << base_shift, 1U << block_shift);
+}
+
+/*
+ * Set up one of the IBAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ * Only for 603+ ...
+ */
+static void setibat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+{
+ unsigned int bl = (size >> 17) - 1;
+ int wimgxpp;
+ struct ppc_bat *bat = BATS[index];
+ unsigned long flags = pgprot_val(prot);
+
+ if (!cpu_has_feature(CPU_FTR_NEED_COHERENT))
+ flags &= ~_PAGE_COHERENT;
+
+ wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX);
+ bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (flags & _PAGE_USER)
+ bat[0].batu |= 1; /* Vp = 1 */
+}
+
+static void clearibat(int index)
+{
+ struct ppc_bat *bat = BATS[index];
+
+ bat[0].batu = 0;
+ bat[0].batl = 0;
+}
+
+static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ int idx;
+
+ while ((idx = find_free_bat()) != -1 && base != top) {
+ unsigned int size = block_size(base, top);
+
+ if (size < 128 << 10)
+ break;
+ setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+ base += size;
+ }
+
+ return base;
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ int done;
+ unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
if (__map_without_bats) {
- printk(KERN_DEBUG "RAM mapped without BATs\n");
- return 0;
+ pr_debug("RAM mapped without BATs\n");
+ return base;
+ }
+
+ if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
+ return __mmu_mapin_ram(base, top);
+
+ done = __mmu_mapin_ram(base, border);
+ if (done != border - base)
+ return done;
+
+ return done + __mmu_mapin_ram(border, top);
+}
+
+void mmu_mark_initmem_nx(void)
+{
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+ unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+ unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
+ unsigned long size;
+
+ if (cpu_has_feature(CPU_FTR_601))
+ return;
+
+ for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
+ size = block_size(base, top);
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+ base += size;
}
+ if (base < top) {
+ size = block_size(base, top);
+ size = max(size, 128UL << 10);
+ if ((top - base) > size) {
+ if (strict_kernel_rwx_enabled())
+ pr_warn("Kernel _etext not properly aligned\n");
+ size <<= 1;
+ }
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+ base += size;
+ }
+ for (; i < nb; i++)
+ clearibat(i);
- /* Set up BAT2 and if necessary BAT3 to cover RAM. */
+ update_bats();
- /* Make sure we don't map a block larger than the
- smallest alignment of the physical address. */
- tot = top;
- for (bl = 128<<10; bl < max_size; bl <<= 1) {
- if (bl * 2 > tot)
+ for (i = TASK_SIZE >> 28; i < 16; i++) {
+ /* Do not set NX on VM space for modules */
+ if (IS_ENABLED(CONFIG_MODULES) &&
+ (VMALLOC_START & 0xf0000000) == i << 28)
break;
+ mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
}
+}
+
+void mmu_mark_rodata_ro(void)
+{
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+
+ if (cpu_has_feature(CPU_FTR_601))
+ return;
+
+ for (i = 0; i < nb; i++) {
+ struct ppc_bat *bat = BATS[i];
- setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
- done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
- if ((done < tot) && !bat_addrs[3].limit) {
- /* use BAT3 to cover a bit more */
- tot -= done;
- for (bl = 128<<10; bl < max_size; bl <<= 1)
- if (bl * 2 > tot)
- break;
- setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
- done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
+ if (bat_addrs[i].start < (unsigned long)__init_begin)
+ bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
}
- return done;
+ update_bats();
}
/*
* Set up one of the I/D BAT (block address translation) register pairs.
* The parameters are not checked; in particular size must be a power
* of 2 between 128k and 256M.
+ * On 603+, only set IBAT when _PAGE_EXEC is set
*/
void __init setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot)
@@ -138,11 +265,12 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
bat[1].batu |= 1; /* Vp = 1 */
if (flags & _PAGE_GUARDED) {
/* G bit must be zero in IBATs */
- bat[0].batu = bat[0].batl = 0;
- } else {
- /* make IBAT same as DBAT */
- bat[0] = bat[1];
+ flags &= ~_PAGE_EXEC;
}
+ if (flags & _PAGE_EXEC)
+ bat[0] = bat[1];
+ else
+ bat[0].batu = bat[0].batl = 0;
} else {
/* 601 cpu */
if (bl > BL_8M)
@@ -211,8 +339,7 @@ void __init MMU_init_hw(void)
* Find some memory for the hash table.
*/
if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
- Hash = __va(memblock_phys_alloc(Hash_size, Hash_size));
- memset(Hash, 0, Hash_size);
+ Hash = memblock_alloc(Hash_size, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
@@ -231,7 +358,8 @@ void __init MMU_init_hw(void)
if (lg_n_hpteg > 16)
mb2 = 16 - LG_HPTEG_SIZE;
- modify_instruction_site(&patch__hash_page_A0, 0xffff, (unsigned int)Hash >> 16);
+ modify_instruction_site(&patch__hash_page_A0, 0xffff,
+ ((unsigned int)Hash - PAGE_OFFSET) >> 16);
modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6);
modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6);
modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
@@ -240,7 +368,8 @@ void __init MMU_init_hw(void)
/*
* Patch up the instructions in hashtable.S:flush_hash_page
*/
- modify_instruction_site(&patch__flush_hash_A0, 0xffff, (unsigned int)Hash >> 16);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff,
+ ((unsigned int)Hash - PAGE_OFFSET) >> 16);
modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6);
modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6);
modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c b/arch/powerpc/mm/ptdump/8xx.c
index ab9e3f24db2f..9e2d8e847d6e 100644
--- a/arch/powerpc/mm/dump_linuxpagetables-8xx.c
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <asm/pgtable.h>
-#include "dump_linuxpagetables.h"
+#include "ptdump.h"
static const struct flag_info flag_array[] = {
{
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
new file mode 100644
index 000000000000..712762be3cb1
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += ptdump.o
+
+obj-$(CONFIG_4xx) += shared.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o
+obj-$(CONFIG_PPC_BOOK3S_32) += shared.o bats.o segment_regs.o
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o hashpagetable.o
diff --git a/arch/powerpc/mm/dump_bats.c b/arch/powerpc/mm/ptdump/bats.c
index a0d23e96e841..a0d23e96e841 100644
--- a/arch/powerpc/mm/dump_bats.c
+++ b/arch/powerpc/mm/ptdump/bats.c
diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
index ed6fcf78256e..0dfca72cb9bd 100644
--- a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <asm/pgtable.h>
-#include "dump_linuxpagetables.h"
+#include "ptdump.h"
static const struct flag_info flag_array[] = {
{
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 869294695048..b430e4e08af6 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -342,7 +342,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
/* Look in secondary table */
if (slot == -1)
- slot = base_hpte_find(ea, psize, true, &v, &r);
+ slot = base_hpte_find(ea, psize, false, &v, &r);
/* No entry found */
if (slot == -1)
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/ptdump/ptdump.c
index 6aa41669ac1a..37138428ab55 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -28,7 +28,7 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
-#include "dump_linuxpagetables.h"
+#include "ptdump.h"
#ifdef CONFIG_PPC32
#define KERN_VIRT_START 0
@@ -143,14 +143,19 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
unsigned long delta;
#ifdef CONFIG_PPC64
- seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1);
- seq_printf(st->seq, "0x%016lx ", st->start_pa);
+#define REG "0x%016lx"
#else
- seq_printf(st->seq, "0x%08lx-0x%08lx ", st->start_address, addr - 1);
- seq_printf(st->seq, "0x%08lx ", st->start_pa);
+#define REG "0x%08lx"
#endif
- delta = (addr - st->start_address) >> 10;
+ seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
+ if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) {
+ seq_printf(st->seq, "[" REG "]", st->start_pa);
+ delta = PAGE_SIZE >> 10;
+ } else {
+ seq_printf(st->seq, " " REG " ", st->start_pa);
+ delta = (addr - st->start_address) >> 10;
+ }
/* Work out what appropriate unit to use */
while (!(delta & 1023) && unit[1]) {
delta >>= 10;
@@ -184,7 +189,8 @@ static void note_page(struct pg_state *st, unsigned long addr,
*/
} else if (flag != st->current_flags || level != st->level ||
addr >= st->marker[1].start_address ||
- pa != st->last_pa + PAGE_SIZE) {
+ (pa != st->last_pa + PAGE_SIZE &&
+ (pa != st->start_pa || st->start_pa != st->last_pa))) {
/* Check the PTE flags */
if (st->current_flags) {
diff --git a/arch/powerpc/mm/dump_linuxpagetables.h b/arch/powerpc/mm/ptdump/ptdump.h
index 5d513636de73..5d513636de73 100644
--- a/arch/powerpc/mm/dump_linuxpagetables.h
+++ b/arch/powerpc/mm/ptdump/ptdump.h
diff --git a/arch/powerpc/mm/dump_sr.c b/arch/powerpc/mm/ptdump/segment_regs.c
index 501843664bb9..501843664bb9 100644
--- a/arch/powerpc/mm/dump_sr.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/ptdump/shared.c
index 3fe98a0974c6..f7ed2f187cb0 100644
--- a/arch/powerpc/mm/dump_linuxpagetables-generic.c
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <asm/pgtable.h>
-#include "dump_linuxpagetables.h"
+#include "ptdump.h"
static const struct flag_info flag_array[] = {
{
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index bc3914d54e26..5986df48359b 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -69,6 +69,11 @@ static void assert_slb_presence(bool present, unsigned long ea)
if (!cpu_has_feature(CPU_FTR_ARCH_206))
return;
+ /*
+ * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
+ * ignores all other bits from 0-27, so just clear them all.
+ */
+ ea &= ~((1UL << 28) - 1);
asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
WARN_ON(present == (tmp == 0));
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 06898c13901d..aec91dbcdc0b 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -32,6 +32,7 @@
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/sched/mm.h>
+#include <linux/security.h>
#include <asm/mman.h>
#include <asm/mmu.h>
#include <asm/copro.h>
@@ -377,6 +378,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long addr, found, prev;
struct vm_unmapped_area_info info;
+ unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
@@ -393,7 +395,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
if (high_limit > DEFAULT_MAP_WINDOW)
addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
- while (addr > PAGE_SIZE) {
+ while (addr > min_addr) {
info.high_limit = addr;
if (!slice_scan_available(addr - 1, available, 0, &addr))
continue;
@@ -405,8 +407,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* Check if we need to reduce the range, or if we can
* extend it to cover the previous available slice.
*/
- if (addr < PAGE_SIZE)
- addr = PAGE_SIZE;
+ if (addr < min_addr)
+ addr = min_addr;
else if (slice_scan_available(addr - 1, available, 0, &prev)) {
addr = prev;
goto prev_slice;
@@ -528,7 +530,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
addr = _ALIGN_UP(addr, page_size);
slice_dbg(" aligned addr=%lx\n", addr);
/* Ignore hint if it's too large or overlaps a VMA */
- if (addr > high_limit - len ||
+ if (addr > high_limit - len || addr < mmap_min_addr ||
!slice_area_is_free(mm, addr, len))
addr = 0;
}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index ae5d568e267f..ac23dc1c6535 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -302,7 +302,7 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
* This function as well as __local_flush_tlb_page() must only be called
* for user contexts.
*/
- if (unlikely(WARN_ON(!mm)))
+ if (WARN_ON(!mm))
return;
preempt_disable();
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index c2d5192ed64f..549e9490ff2a 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -165,6 +165,10 @@
#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \
+ ___PPC_RA(d) | ___PPC_RS(a) | \
+ __PPC_SH(i) | __PPC_MB(mb) | \
+ __PPC_ME(me))
#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 6f4daacad296..dc50a8d4b3b9 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -106,9 +106,8 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
} while (0)
#else
#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); \
- PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)), \
- offsetof(struct thread_info, cpu)); \
+ do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \
+ PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \
} while(0)
#endif
#else
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 7ce57657d3b8..4194d3cfb60c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -768,36 +768,58 @@ emit_clear:
case BPF_JMP | BPF_JGT | BPF_X:
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
true_cond = COND_GT;
goto cond_branch;
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
true_cond = COND_LT;
goto cond_branch;
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
true_cond = COND_GE;
goto cond_branch;
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
true_cond = COND_LE;
goto cond_branch;
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
true_cond = COND_EQ;
goto cond_branch;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
true_cond = COND_NE;
goto cond_branch;
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
true_cond = COND_NE;
/* Fall through */
@@ -809,18 +831,44 @@ cond_branch:
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
/* unsigned comparison */
- PPC_CMPLD(dst_reg, src_reg);
+ if (BPF_CLASS(code) == BPF_JMP32)
+ PPC_CMPLW(dst_reg, src_reg);
+ else
+ PPC_CMPLD(dst_reg, src_reg);
break;
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
/* signed comparison */
- PPC_CMPD(dst_reg, src_reg);
+ if (BPF_CLASS(code) == BPF_JMP32)
+ PPC_CMPW(dst_reg, src_reg);
+ else
+ PPC_CMPD(dst_reg, src_reg);
break;
case BPF_JMP | BPF_JSET | BPF_X:
- PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ if (BPF_CLASS(code) == BPF_JMP) {
+ PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
+ src_reg);
+ } else {
+ int tmp_reg = b2p[TMP_REG_1];
+
+ PPC_AND(tmp_reg, dst_reg, src_reg);
+ PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
+ 31);
+ }
break;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -828,43 +876,87 @@ cond_branch:
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ {
+ bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
/*
* Need sign-extended load, so only positive
* values can be used as imm in cmpldi
*/
- if (imm >= 0 && imm < 32768)
- PPC_CMPLDI(dst_reg, imm);
- else {
+ if (imm >= 0 && imm < 32768) {
+ if (is_jmp32)
+ PPC_CMPLWI(dst_reg, imm);
+ else
+ PPC_CMPLDI(dst_reg, imm);
+ } else {
/* sign-extending load */
PPC_LI32(b2p[TMP_REG_1], imm);
/* ... but unsigned comparison */
- PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
+ if (is_jmp32)
+ PPC_CMPLW(dst_reg,
+ b2p[TMP_REG_1]);
+ else
+ PPC_CMPLD(dst_reg,
+ b2p[TMP_REG_1]);
}
break;
+ }
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ {
+ bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
/*
* signed comparison, so any 16-bit value
* can be used in cmpdi
*/
- if (imm >= -32768 && imm < 32768)
- PPC_CMPDI(dst_reg, imm);
- else {
+ if (imm >= -32768 && imm < 32768) {
+ if (is_jmp32)
+ PPC_CMPWI(dst_reg, imm);
+ else
+ PPC_CMPDI(dst_reg, imm);
+ } else {
PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
+ if (is_jmp32)
+ PPC_CMPW(dst_reg,
+ b2p[TMP_REG_1]);
+ else
+ PPC_CMPD(dst_reg,
+ b2p[TMP_REG_1]);
}
break;
+ }
case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
/* andi does not sign-extend the immediate */
if (imm >= 0 && imm < 32768)
/* PPC_ANDI is _only/always_ dot-form */
PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
else {
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
- b2p[TMP_REG_1]);
+ int tmp_reg = b2p[TMP_REG_1];
+
+ PPC_LI32(tmp_reg, imm);
+ if (BPF_CLASS(code) == BPF_JMP) {
+ PPC_AND_DOT(tmp_reg, dst_reg,
+ tmp_reg);
+ } else {
+ PPC_AND(tmp_reg, dst_reg,
+ tmp_reg);
+ PPC_RLWINM_DOT(tmp_reg, tmp_reg,
+ 0, 0, 31);
+ }
}
break;
}
@@ -1093,6 +1185,7 @@ skip_codegen_passes:
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
if (!fp->is_func || extra_pass) {
+ bpf_prog_fill_jited_linfo(fp, addrs);
out_addrs:
kfree(addrs);
kfree(jit_data);
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 72238eedc360..d2b8e6061933 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = {
.start_txn = h_24x7_event_start_txn,
.commit_txn = h_24x7_event_commit_txn,
.cancel_txn = h_24x7_event_cancel_txn,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_24x7_init(void)
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 43fabb3cae0f..735e77b09cdb 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = {
.start = h_gpci_event_start,
.stop = h_gpci_event_stop,
.read = h_gpci_event_update,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
static int hv_gpci_init(void)
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f292a3f284f1..b1c37cc3fa98 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;
@@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
switch (pmu->domain) {
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 7de344b7d9cc..063c9d9f2516 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -97,3 +97,27 @@ EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156)
EVENT(PM_DTLB_MISS_16M, 0x4c056)
EVENT(PM_DTLB_MISS_1G, 0x4c05a)
EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+EVENT(MEM_LOADS, 0x34340401e0)
+EVENT(MEM_STORES, 0x343c0401e0)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 0ff9c43733e9..030544e35959 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -160,6 +160,8 @@ GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
+GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
@@ -185,6 +187,8 @@ static struct attribute *power9_events_attr[] = {
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
CACHE_EVENT_PTR(PM_LD_REF_L1),
CACHE_EVENT_PTR(PM_L1_PREF),
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 4a9a72d01c3c..35be81fd2dc2 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -180,6 +180,7 @@ config CURRITUCK
depends on PPC_47x
select SWIOTLB
select 476FPE
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
help
This option enables support for the IBM Currituck (476fpe) evaluation board
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index e55933f9cd55..a5e61e5c16e2 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -34,6 +34,7 @@
#include <asm/ppc4xx.h>
#include <asm/mpic.h>
#include <asm/mmu.h>
+#include <asm/swiotlb.h>
#include <linux/pci.h>
#include <linux/i2c.h>
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index f467247fd1c4..18422dbd061a 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -47,7 +47,7 @@ static int __init warp_probe(void)
if (!of_machine_is_compatible("pika,warp"))
return 0;
- /* For __dma_nommu_alloc_coherent */
+ /* For arch_dma_alloc */
ISA_DMA_THRESHOLD = ~0L;
return 1;
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
index 3d1ecd211776..8137f77abad5 100644
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -26,13 +26,13 @@
#define SS_MSR 0x74
#define SS_SDR1 0x78
#define SS_LR 0x7c
-#define SS_SPRG 0x80 /* 4 SPRGs */
-#define SS_DBAT 0x90 /* 8 DBATs */
-#define SS_IBAT 0xd0 /* 8 IBATs */
-#define SS_TB 0x110
-#define SS_CR 0x118
-#define SS_GPREG 0x11c /* r12-r31 */
-#define STATE_SAVE_SIZE 0x16c
+#define SS_SPRG 0x80 /* 8 SPRGs */
+#define SS_DBAT 0xa0 /* 8 DBATs */
+#define SS_IBAT 0xe0 /* 8 IBATs */
+#define SS_TB 0x120
+#define SS_CR 0x128
+#define SS_GPREG 0x12c /* r12-r31 */
+#define STATE_SAVE_SIZE 0x17c
.section .data
.align 5
@@ -103,6 +103,16 @@ _GLOBAL(mpc83xx_enter_deep_sleep)
stw r7, SS_SPRG+12(r3)
stw r8, SS_SDR1(r3)
+ mfspr r4, SPRN_SPRG4
+ mfspr r5, SPRN_SPRG5
+ mfspr r6, SPRN_SPRG6
+ mfspr r7, SPRN_SPRG7
+
+ stw r4, SS_SPRG+16(r3)
+ stw r5, SS_SPRG+20(r3)
+ stw r6, SS_SPRG+24(r3)
+ stw r7, SS_SPRG+28(r3)
+
mfspr r4, SPRN_DBAT0U
mfspr r5, SPRN_DBAT0L
mfspr r6, SPRN_DBAT1U
@@ -493,6 +503,16 @@ mpc83xx_deep_resume:
mtspr SPRN_IBAT7U, r6
mtspr SPRN_IBAT7L, r7
+ lwz r4, SS_SPRG+16(r3)
+ lwz r5, SS_SPRG+20(r3)
+ lwz r6, SS_SPRG+24(r3)
+ lwz r7, SS_SPRG+28(r3)
+
+ mtspr SPRN_SPRG4, r4
+ mtspr SPRN_SPRG5, r5
+ mtspr SPRN_SPRG6, r6
+ mtspr SPRN_SPRG7, r7
+
lwz r4, SS_SPRG+0(r3)
lwz r5, SS_SPRG+4(r3)
lwz r6, SS_SPRG+8(r3)
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index b0dac307bebf..785e9641220d 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -27,6 +27,7 @@
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/ehv_pic.h>
+#include <asm/swiotlb.h>
#include <soc/fsl/qe/qe_ic.h>
#include <linux/of_platform.h>
@@ -223,7 +224,3 @@ define_machine(corenet_generic) {
};
machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index f29c6f0909f3..c64fa2483ea9 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -202,8 +202,6 @@ static int __init ge_imp3a_probe(void)
machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
-machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
-
define_machine(ge_imp3a) {
.name = "GE_IMP3A",
.probe = ge_imp3a_probe,
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 94a7f92c858f..94194bad4954 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -57,8 +57,6 @@ static void __init mpc8536_ds_setup_arch(void)
machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index dc9e035cc637..b7e29ce1f266 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -174,10 +174,6 @@ machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier);
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index d7e440e6dba3..80939a425de5 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -367,10 +367,6 @@ machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
-machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier);
-
static void __init mpc85xx_mds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index 78d13b364cd6..33ca373322e1 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -55,7 +55,6 @@ static void __init p1010_rdb_setup_arch(void)
}
machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 9fb57f78cdbe..1f1af0557470 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -548,8 +548,6 @@ static void __init p1022_ds_setup_arch(void)
machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 276e00ab3dde..fd9e3e7ef234 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -128,8 +128,6 @@ static void __init p1022_rdk_setup_arch(void)
machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 27631c607f3d..c52c8f9e8385 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -22,6 +22,7 @@
#include <asm/time.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
+#include <asm/swiotlb.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include "smp.h"
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 17c6cd3d02e6..775a92353c83 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -121,7 +121,6 @@ static int __init declare_of_platform_devices(void)
return 0;
}
machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
-machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
define_machine(mpc86xx_hpcn) {
.name = "MPC86xx HPCN",
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 8c7464c3f27f..842b2c7e156a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -153,6 +153,11 @@ config E300C3_CPU
bool "e300c3 (831x)"
depends on PPC_BOOK3S_32
+config G4_CPU
+ bool "G4 (74xx)"
+ depends on PPC_BOOK3S_32
+ select ALTIVEC
+
endchoice
config TARGET_CPU_BOOL
@@ -171,6 +176,7 @@ config TARGET_CPU
default "860" if 860_CPU
default "e300c2" if E300C2_CPU
default "e300c3" if E300C3_CPU
+ default "G4" if G4_CPU
config PPC_BOOK3S
def_bool y
@@ -402,6 +408,9 @@ config NOT_COHERENT_CACHE
bool
depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
+ select ARCH_HAS_DMA_COHERENT_TO_PFN
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
default n if PPC_47x
default y
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index af2a3c15e0ec..54e012e1f720 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -544,9 +544,10 @@ static struct cbe_iommu *cell_iommu_for_node(int nid)
static unsigned long cell_dma_nommu_offset;
static unsigned long dma_iommu_fixed_base;
+static bool cell_iommu_enabled;
/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
-static int iommu_fixed_is_weak;
+bool iommu_fixed_is_weak;
static struct iommu_table *cell_get_iommu_table(struct device *dev)
{
@@ -568,102 +569,19 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev)
return &window->table;
}
-/* A coherent allocation implies strong ordering */
-
-static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak)
- return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
- size, dma_handle,
- device_to_mask(dev), flag,
- dev_to_node(dev));
- else
- return dma_nommu_ops.alloc(dev, size, dma_handle, flag,
- attrs);
-}
-
-static void dma_fixed_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak)
- iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
- dma_handle);
- else
- dma_nommu_ops.free(dev, size, vaddr, dma_handle, attrs);
-}
-
-static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
- return dma_nommu_ops.map_page(dev, page, offset, size,
- direction, attrs);
- else
- return iommu_map_page(dev, cell_get_iommu_table(dev), page,
- offset, size, device_to_mask(dev),
- direction, attrs);
-}
-
-static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
- dma_nommu_ops.unmap_page(dev, dma_addr, size, direction,
- attrs);
- else
- iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
- direction, attrs);
-}
-
-static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
- return dma_nommu_ops.map_sg(dev, sg, nents, direction, attrs);
- else
- return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
- nents, device_to_mask(dev),
- direction, attrs);
-}
-
-static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
- dma_nommu_ops.unmap_sg(dev, sg, nents, direction, attrs);
- else
- ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
- direction, attrs);
-}
-
-static int dma_suported_and_switch(struct device *dev, u64 dma_mask);
-
-static const struct dma_map_ops dma_iommu_fixed_ops = {
- .alloc = dma_fixed_alloc_coherent,
- .free = dma_fixed_free_coherent,
- .map_sg = dma_fixed_map_sg,
- .unmap_sg = dma_fixed_unmap_sg,
- .dma_supported = dma_suported_and_switch,
- .map_page = dma_fixed_map_page,
- .unmap_page = dma_fixed_unmap_page,
-};
+static u64 cell_iommu_get_fixed_address(struct device *dev);
static void cell_dma_dev_setup(struct device *dev)
{
- if (get_pci_dma_ops() == &dma_iommu_ops)
+ if (cell_iommu_enabled) {
+ u64 addr = cell_iommu_get_fixed_address(dev);
+
+ if (addr != OF_BAD_ADDR)
+ dev->archdata.dma_offset = addr + dma_iommu_fixed_base;
set_iommu_table_base(dev, cell_get_iommu_table(dev));
- else if (get_pci_dma_ops() == &dma_nommu_ops)
- set_dma_offset(dev, cell_dma_nommu_offset);
- else
- BUG();
+ } else {
+ dev->archdata.dma_offset = cell_dma_nommu_offset;
+ }
}
static void cell_pci_dma_dev_setup(struct pci_dev *dev)
@@ -680,11 +598,9 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
if (action != BUS_NOTIFY_ADD_DEVICE)
return 0;
- /* We use the PCI DMA ops */
- dev->dma_ops = get_pci_dma_ops();
-
+ if (cell_iommu_enabled)
+ dev->dma_ops = &dma_iommu_ops;
cell_dma_dev_setup(dev);
-
return 0;
}
@@ -809,7 +725,6 @@ static int __init cell_iommu_init_disabled(void)
unsigned long base = 0, size;
/* When no iommu is present, we use direct DMA ops */
- set_pci_dma_ops(&dma_nommu_ops);
/* First make sure all IOC translation is turned off */
cell_disable_iommus();
@@ -894,7 +809,11 @@ static u64 cell_iommu_get_fixed_address(struct device *dev)
const u32 *ranges = NULL;
int i, len, best, naddr, nsize, pna, range_size;
+ /* We can be called for platform devices that have no of_node */
np = of_node_get(dev->of_node);
+ if (!np)
+ goto out;
+
while (1) {
naddr = of_n_addr_cells(np);
nsize = of_n_size_cells(np);
@@ -945,27 +864,10 @@ out:
return dev_addr;
}
-static int dma_suported_and_switch(struct device *dev, u64 dma_mask)
+static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask)
{
- if (dma_mask == DMA_BIT_MASK(64) &&
- cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) {
- u64 addr = cell_iommu_get_fixed_address(dev) +
- dma_iommu_fixed_base;
- dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
- dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
- set_dma_ops(dev, &dma_iommu_fixed_ops);
- set_dma_offset(dev, addr);
- return 1;
- }
-
- if (dma_iommu_dma_supported(dev, dma_mask)) {
- dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
- set_dma_ops(dev, get_pci_dma_ops());
- cell_dma_dev_setup(dev);
- return 1;
- }
-
- return 0;
+ return mask == DMA_BIT_MASK(64) &&
+ cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR;
}
static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
@@ -1119,9 +1021,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
}
- dma_iommu_ops.dma_supported = dma_suported_and_switch;
- set_pci_dma_ops(&dma_iommu_ops);
-
+ cell_pci_controller_ops.iommu_bypass_supported =
+ cell_pci_iommu_bypass_supported;
return 0;
}
@@ -1142,7 +1043,7 @@ static int __init setup_iommu_fixed(char *str)
pciep = of_find_node_by_type(NULL, "pcie-endpoint");
if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
- iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING;
+ iommu_fixed_is_weak = true;
of_node_put(pciep);
@@ -1150,26 +1051,6 @@ static int __init setup_iommu_fixed(char *str)
}
__setup("iommu_fixed=", setup_iommu_fixed);
-static u64 cell_dma_get_required_mask(struct device *dev)
-{
- const struct dma_map_ops *dma_ops;
-
- if (!dev->dma_mask)
- return 0;
-
- if (!iommu_fixed_disabled &&
- cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
- return DMA_BIT_MASK(64);
-
- dma_ops = get_dma_ops(dev);
- if (dma_ops->get_required_mask)
- return dma_ops->get_required_mask(dev);
-
- WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
-
- return DMA_BIT_MASK(64);
-}
-
static int __init cell_iommu_init(void)
{
struct device_node *np;
@@ -1186,10 +1067,9 @@ static int __init cell_iommu_init(void)
/* Setup various callbacks */
cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
- ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
- goto bail;
+ goto done;
/* Create an iommu for each /axon node. */
for_each_node_by_name(np, "axon") {
@@ -1206,10 +1086,10 @@ static int __init cell_iommu_init(void)
continue;
cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
}
-
+ done:
/* Setup default PCI iommu ops */
set_pci_dma_ops(&dma_iommu_ops);
-
+ cell_iommu_enabled = true;
bail:
/* Register callbacks on OF platform device addition/removal
* to handle linking them to the right DMA operations
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index 125f2a5f02de..b5f35cbe9e21 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -34,7 +34,7 @@
*/
static void *spu_syscall_table[] = {
-#define __SYSCALL(nr, entry, nargs) entry,
+#define __SYSCALL(nr, entry) entry,
#include <asm/syscall_table_spu.h>
#undef __SYSCALL
};
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 263413a34823..b95d6afc39b5 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -26,7 +26,6 @@
#include <linux/syscalls.h>
#include <linux/rcupdate.h>
#include <linux/binfmts.h>
-#include <linux/syscalls.h>
#include <asm/spu.h>
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ae8123edddc6..48c2477e7e2a 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2338,9 +2338,8 @@ static int spufs_switch_log_open(struct inode *inode, struct file *file)
goto out;
}
- ctx->switch_log = kmalloc(sizeof(struct switch_log) +
- SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
- GFP_KERNEL);
+ ctx->switch_log = kmalloc(struct_size(ctx->switch_log, log,
+ SWITCH_LOG_BUFSIZE), GFP_KERNEL);
if (!ctx->switch_log) {
rc = -ENOMEM;
diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile
index 4b3bfadc70fa..dc3465cc8bc6 100644
--- a/arch/powerpc/platforms/chrp/Makefile
+++ b/arch/powerpc/platforms/chrp/Makefile
@@ -1,3 +1,3 @@
obj-y += setup.o time.o pegasos_eth.o pci.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_NVRAM) += nvram.o
+obj-$(CONFIG_NVRAM:m=y) += nvram.o
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index 791b86398e1d..37ac20ccbb19 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -24,7 +24,7 @@ static unsigned int nvram_size;
static unsigned char nvram_buf[4];
static DEFINE_SPINLOCK(nvram_lock);
-static unsigned char chrp_nvram_read(int addr)
+static unsigned char chrp_nvram_read_val(int addr)
{
unsigned int done;
unsigned long flags;
@@ -46,7 +46,7 @@ static unsigned char chrp_nvram_read(int addr)
return ret;
}
-static void chrp_nvram_write(int addr, unsigned char val)
+static void chrp_nvram_write_val(int addr, unsigned char val)
{
unsigned int done;
unsigned long flags;
@@ -64,6 +64,11 @@ static void chrp_nvram_write(int addr, unsigned char val)
spin_unlock_irqrestore(&nvram_lock, flags);
}
+static ssize_t chrp_nvram_size(void)
+{
+ return nvram_size;
+}
+
void __init chrp_nvram_init(void)
{
struct device_node *nvram;
@@ -85,8 +90,9 @@ void __init chrp_nvram_init(void)
printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size);
of_node_put(nvram);
- ppc_md.nvram_read_val = chrp_nvram_read;
- ppc_md.nvram_write_val = chrp_nvram_write;
+ ppc_md.nvram_read_val = chrp_nvram_read_val;
+ ppc_md.nvram_write_val = chrp_nvram_write_val;
+ ppc_md.nvram_size = chrp_nvram_size;
return;
}
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 9438fa0fc355..fcf6f2342ef4 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -549,7 +549,7 @@ static void __init chrp_init_IRQ(void)
static void __init
chrp_init2(void)
{
-#ifdef CONFIG_NVRAM
+#if IS_ENABLED(CONFIG_NVRAM)
chrp_nvram_init();
#endif
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index ecf703ee3a76..235fe81aa2b1 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -54,10 +54,6 @@
static void __iomem *hw_ctrl;
static void __iomem *hw_gpio;
-unsigned long wii_hole_start;
-unsigned long wii_hole_size;
-
-
static int __init page_aligned(unsigned long x)
{
return !(x & (PAGE_SIZE-1));
@@ -69,26 +65,6 @@ void __init wii_memory_fixups(void)
BUG_ON(memblock.memory.cnt != 2);
BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
-
- /* determine hole */
- wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
- wii_hole_size = p[1].base - wii_hole_start;
-}
-
-unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
-{
- unsigned long delta, size, bl;
- unsigned long max_size = (256<<20);
-
- /* MEM2 64MB@0x10000000 */
- delta = wii_hole_start + wii_hole_size;
- size = top - delta;
- for (bl = 128<<10; bl < max_size; bl <<= 1) {
- if (bl * 2 > size)
- break;
- }
- setbat(4, PAGE_OFFSET+delta, delta, bl, PAGE_KERNEL_X);
- return delta + bl;
}
static void __noreturn wii_spin(void)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index f2971522fb4a..86368e238f6e 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
*/
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
- dev->dev.dma_ops = &dma_nommu_ops;
+ dev->dev.dma_ops = NULL;
/*
* Set the coherent DMA mask to prevent the iommu
* being used unnecessarily
@@ -208,7 +208,9 @@ static int __init iob_init(struct device_node *dn)
pr_debug(" -> %s\n", __func__);
/* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
- iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000));
+ iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21,
+ MEMBLOCK_LOW_LIMIT, 0x80000000,
+ NUMA_NO_NODE);
pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
@@ -269,4 +271,3 @@ void __init iommu_init_early_pasemi(void)
pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
set_pci_dma_ops(&dma_iommu_ops);
}
-
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index c0532999f854..46dd463faaa7 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -411,55 +411,6 @@ out:
return !!(srr1 & 0x2);
}
-#ifdef CONFIG_PCMCIA
-static int pcmcia_notify(struct notifier_block *nb, unsigned long action,
- void *data)
-{
- struct device *dev = data;
- struct device *parent;
- struct pcmcia_device *pdev = to_pcmcia_dev(dev);
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- parent = pdev->socket->dev.parent;
-
- /* We know electra_cf devices will always have of_node set, since
- * electra_cf is an of_platform driver.
- */
- if (!parent->of_node)
- return 0;
-
- if (!of_device_is_compatible(parent->of_node, "electra-cf"))
- return 0;
-
- /* We use the direct ops for localbus */
- dev->dma_ops = &dma_nommu_ops;
-
- return 0;
-}
-
-static struct notifier_block pcmcia_notifier = {
- .notifier_call = pcmcia_notify,
-};
-
-static inline void pasemi_pcmcia_init(void)
-{
- extern struct bus_type pcmcia_bus_type;
-
- bus_register_notifier(&pcmcia_bus_type, &pcmcia_notifier);
-}
-
-#else
-
-static inline void pasemi_pcmcia_init(void)
-{
-}
-
-#endif
-
-
static const struct of_device_id pasemi_bus_ids[] = {
/* Unfortunately needed for legacy firmwares */
{ .type = "localbus", },
@@ -472,8 +423,6 @@ static const struct of_device_id pasemi_bus_ids[] = {
static int __init pasemi_publish_devices(void)
{
- pasemi_pcmcia_init();
-
/* Publish OF platform devices for SDC and other non-PCI devices */
of_platform_bus_probe(NULL, pasemi_bus_ids, NULL);
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 923bfb340433..20ebf35d7913 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -15,7 +15,5 @@ obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really
# CONFIG_NVRAM=y
obj-$(CONFIG_NVRAM:m=y) += nvram.o
-# ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff
-obj-$(CONFIG_PPC64) += nvram.o
obj-$(CONFIG_PPC32) += bootx_init.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index ae54d7fe68f3..9360cdc408c1 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -147,6 +147,11 @@ static ssize_t core99_nvram_size(void)
static volatile unsigned char __iomem *nvram_addr;
static int nvram_mult;
+static ssize_t ppc32_nvram_size(void)
+{
+ return NVRAM_SIZE;
+}
+
static unsigned char direct_nvram_read_byte(int addr)
{
return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]);
@@ -590,21 +595,25 @@ int __init pmac_nvram_init(void)
nvram_mult = 1;
ppc_md.nvram_read_val = direct_nvram_read_byte;
ppc_md.nvram_write_val = direct_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
} else if (nvram_naddrs == 1) {
nvram_data = ioremap(r1.start, s1);
nvram_mult = (s1 + NVRAM_SIZE - 1) / NVRAM_SIZE;
ppc_md.nvram_read_val = direct_nvram_read_byte;
ppc_md.nvram_write_val = direct_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
} else if (nvram_naddrs == 2) {
nvram_addr = ioremap(r1.start, s1);
nvram_data = ioremap(r2.start, s2);
ppc_md.nvram_read_val = indirect_nvram_read_byte;
ppc_md.nvram_write_val = indirect_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
} else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) {
#ifdef CONFIG_ADB_PMU
nvram_naddrs = -1;
ppc_md.nvram_read_val = pmu_nvram_read_byte;
ppc_md.nvram_write_val = pmu_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
#endif /* CONFIG_ADB_PMU */
} else {
printk(KERN_ERR "Incompatible type of NVRAM\n");
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 2e8221e20ee8..b7efcf336589 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -316,8 +316,7 @@ static void __init pmac_setup_arch(void)
find_via_pmu();
smu_init();
-#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \
- defined(CONFIG_PPC64)
+#if IS_ENABLED(CONFIG_NVRAM)
pmac_nvram_init();
#endif
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index f157e3d071f2..b36ddee17c87 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -68,7 +68,7 @@
long __init pmac_time_init(void)
{
s32 delta = 0;
-#ifdef CONFIG_NVRAM
+#if defined(CONFIG_NVRAM) && defined(CONFIG_PPC32)
int dst;
delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index b540ce8eec55..da2e99efbd04 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
-obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y += setup.o opal-call.o opal-wrappers.o opal.o opal-async.o
+obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
@@ -11,7 +11,6 @@ obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
-obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 35f699ebb662..e52f9b06dd9c 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -458,7 +458,8 @@ EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_HOTPLUG_CPU
-static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
{
u64 pir = get_hard_smp_processor_id(cpu);
@@ -481,20 +482,6 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
u32 idle_states = pnv_get_supported_cpuidle_states();
- u64 lpcr_val;
-
- /*
- * We don't want to take decrementer interrupts while we are
- * offline, so clear LPCR:PECE1. We keep PECE2 (and
- * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
- *
- * If the CPU gets woken up by a special wakeup, ensure that
- * the SLW engine sets LPCR with decrementer bit cleared, else
- * the CPU will come back to the kernel due to a spurious
- * wakeup.
- */
- lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
- pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
__ppc64_runlatch_off();
@@ -526,16 +513,6 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
__ppc64_runlatch_on();
- /*
- * Re-enable decrementer interrupts in LPCR.
- *
- * Further, we want stop states to be woken up by decrementer
- * for non-hotplug cases. So program the LPCR via stop api as
- * well.
- */
- lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
- pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
-
return srr1;
}
#endif
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 84d038ed3882..248a38ad25c7 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -20,6 +20,7 @@
#include <linux/slab.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/numa.h>
#include <asm/machdep.h>
#include <asm/debugfs.h>
@@ -223,7 +224,7 @@ static int memtrace_online(void)
ent = &memtrace_array[i];
/* We have onlined this chunk previously */
- if (ent->nid == -1)
+ if (ent->nid == NUMA_NO_NODE)
continue;
/* Remove from io mappings */
@@ -257,7 +258,7 @@ static int memtrace_online(void)
*/
debugfs_remove_recursive(ent->dir);
pr_info("Added trace memory back to node %d\n", ent->nid);
- ent->size = ent->start = ent->nid = -1;
+ ent->size = ent->start = ent->nid = NUMA_NO_NODE;
}
if (ret)
return ret;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 3f58c7dbd581..dc23d9d2a7d9 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -28,10 +28,6 @@
*/
static DEFINE_SPINLOCK(npu_context_lock);
-/*
- * Other types of TCE cache invalidation are not functional in the
- * hardware.
- */
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
struct pci_dn *pdn = PCI_DN(dn);
@@ -220,7 +216,7 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
* their parent device so drivers shouldn't be doing DMA
* operations directly on these devices.
*/
- set_dma_ops(&npe->pdev->dev, NULL);
+ set_dma_ops(&npe->pdev->dev, &dma_dummy_ops);
}
/*
@@ -917,15 +913,6 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
mmio_invalidate(npu_context, 0, ~0UL);
}
-static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address,
- pte_t pte)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, address, PAGE_SIZE);
-}
-
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -936,7 +923,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
.release = pnv_npu2_mn_release,
- .change_pte = pnv_npu2_mn_change_pte,
.invalidate_range = pnv_npu2_mn_invalidate_range,
};
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
new file mode 100644
index 000000000000..578757d403ab
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/opal-api.h>
+#include <asm/trace.h>
+#include <asm/asm-prototypes.h>
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode)
+{
+ unsigned int *depth;
+ unsigned long args[8];
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ return;
+
+ args[0] = a0;
+ args[1] = a1;
+ args[2] = a2;
+ args[3] = a3;
+ args[4] = a4;
+ args[5] = a5;
+ args[6] = a6;
+ args[7] = a7;
+
+ (*depth)++;
+ trace_opal_entry(opcode, &args[0]);
+ (*depth)--;
+}
+
+static void __trace_opal_exit(unsigned long opcode, unsigned long retval)
+{
+ unsigned int *depth;
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ return;
+
+ (*depth)++;
+ trace_opal_exit(opcode, retval);
+ (*depth)--;
+}
+
+static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key);
+
+int opal_tracepoint_regfunc(void)
+{
+ static_branch_inc(&opal_tracepoint_key);
+ return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+ static_branch_dec(&opal_tracepoint_key);
+}
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode, unsigned long msr)
+{
+ s64 ret;
+
+ __trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode);
+ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ __trace_opal_exit(opcode, ret);
+
+ return ret;
+}
+
+#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key))
+
+#else /* CONFIG_TRACEPOINTS */
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode, unsigned long msr)
+{
+}
+
+#define DO_TRACE false
+#endif /* CONFIG_TRACEPOINTS */
+
+static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode)
+{
+ unsigned long flags;
+ unsigned long msr = mfmsr();
+ bool mmu = (msr & (MSR_IR|MSR_DR));
+ int64_t ret;
+
+ msr &= ~MSR_EE;
+
+ if (unlikely(!mmu))
+ return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+
+ local_save_flags(flags);
+ hard_irq_disable();
+
+ if (DO_TRACE) {
+ ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ } else {
+ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ }
+
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+#define OPAL_CALL(name, opcode) \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
+{ \
+ return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
+}
+
+OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
+OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
+OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
+OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
+OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
+OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
+OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
+OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
+OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
+OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
+OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
+OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
+OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
+OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
+OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
+OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
+OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
+OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
+OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
+OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
+OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
+OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
+OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
+OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
+OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
+OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
+OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
+OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
+OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
+OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
+OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
+OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE);
+OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE);
+OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
+OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
+OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
+OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
+OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
+OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
+OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
+OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
+OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
+OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
+OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
+OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
+OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
+OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
+OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
+OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
+OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
+OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
+OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
+OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
+OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
+OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
+OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
+OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
+OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
+OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
+OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
+OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
+OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
+OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
+OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
+OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
+OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
+OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC);
+OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
+OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
+OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
+OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
+OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
+OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
+OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
+OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
+OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE);
+OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
+OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
+OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
+OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
+OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
+OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
+OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE);
+OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
+OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
+OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
+OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
+OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
+OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
+OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
+OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
+OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
+OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
+OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
+OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
+OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
+OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
+OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
+OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
+OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
+OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
+OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
+OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
+OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
+OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_quiesce, OPAL_QUIESCE);
+OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
+OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
+OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index acd3206dfae3..06628c71cef6 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -98,7 +98,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
}
static struct bin_attribute opal_msglog_attr = {
- .attr = {.name = "msglog", .mode = 0444},
+ .attr = {.name = "msglog", .mode = 0400},
.read = opal_msglog_read
};
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f4875fe3f8ff..7d2052d8af9d 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -17,317 +17,51 @@
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
- .section ".text"
-
-#ifdef CONFIG_TRACEPOINTS
-#ifdef CONFIG_JUMP_LABEL
-#define OPAL_BRANCH(LABEL) \
- ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key)
-#else
-
- .section ".toc","aw"
-
- .globl opal_tracepoint_refcount
-opal_tracepoint_refcount:
- .8byte 0
-
- .section ".text"
-
-/*
- * We branch around this in early init by using an unconditional cpu
- * feature.
- */
-#define OPAL_BRANCH(LABEL) \
-BEGIN_FTR_SECTION; \
- b 1f; \
-END_FTR_SECTION(0, 1); \
- ld r11,opal_tracepoint_refcount@toc(r2); \
- cmpdi r11,0; \
- bne- LABEL; \
-1:
-
-#endif
-
-#else
-#define OPAL_BRANCH(LABEL)
-#endif
+ .section ".text"
/*
- * DO_OPAL_CALL assumes:
- * r0 = opal call token
- * r12 = msr
- * LR has been saved
+ * r3-r10 - OPAL call arguments
+ * STK_PARAM(R11) - OPAL opcode
+ * STK_PARAM(R12) - MSR to restore
*/
-#define DO_OPAL_CALL() \
- mfcr r11; \
- stw r11,8(r1); \
- li r11,0; \
- ori r11,r11,MSR_EE; \
- std r12,PACASAVEDMSR(r13); \
- andc r12,r12,r11; \
- mtmsrd r12,1; \
- LOAD_REG_ADDR(r11,opal_return); \
- mtlr r11; \
- li r11,MSR_DR|MSR_IR|MSR_LE;\
- andc r12,r12,r11; \
- mtspr SPRN_HSRR1,r12; \
- LOAD_REG_ADDR(r11,opal); \
- ld r12,8(r11); \
- ld r2,0(r11); \
- mtspr SPRN_HSRR0,r12; \
+_GLOBAL_TOC(__opal_call)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ ld r12,STK_PARAM(R12)(r1)
+ li r0,MSR_IR|MSR_DR|MSR_LE
+ andc r12,r12,r0
+ LOAD_REG_ADDR(r11, opal_return)
+ mtlr r11
+ LOAD_REG_ADDR(r11, opal)
+ ld r2,0(r11)
+ ld r11,8(r11)
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+ /* set token to r0 */
+ ld r0,STK_PARAM(R11)(r1)
hrfid
-
-#define OPAL_CALL(name, token) \
- _GLOBAL_TOC(name); \
- mfmsr r12; \
- mflr r0; \
- andi. r11,r12,MSR_IR|MSR_DR; \
- std r0,PPC_LR_STKOFF(r1); \
- li r0,token; \
- beq opal_real_call; \
- OPAL_BRANCH(opal_tracepoint_entry) \
- DO_OPAL_CALL()
-
-
opal_return:
/*
- * Fixup endian on OPAL return... we should be able to simplify
- * this by instead converting the below trampoline to a set of
- * bytes (always BE) since MSR:LE will end up fixed up as a side
- * effect of the rfid.
+ * Restore MSR on OPAL return. The MSR is set to big-endian.
*/
- FIXUP_ENDIAN_HV
- ld r2,PACATOC(r13);
- lwz r4,8(r1);
- ld r5,PPC_LR_STKOFF(r1);
- ld r6,PACASAVEDMSR(r13);
- mtcr r4;
- mtspr SPRN_HSRR0,r5;
- mtspr SPRN_HSRR1,r6;
- hrfid
-
-opal_real_call:
- mfcr r11
- stw r11,8(r1)
- /* Set opal return address */
- LOAD_REG_ADDR(r11, opal_return_realmode)
- mtlr r11
- li r11,MSR_LE
- andc r12,r12,r11
- mtspr SPRN_HSRR1,r12
- LOAD_REG_ADDR(r11,opal)
- ld r12,8(r11)
- ld r2,0(r11)
- mtspr SPRN_HSRR0,r12
- hrfid
-
-opal_return_realmode:
- FIXUP_ENDIAN_HV
- ld r2,PACATOC(r13);
- lwz r11,8(r1);
- ld r12,PPC_LR_STKOFF(r1)
- mtcr r11;
- mtlr r12
- blr
-
-#ifdef CONFIG_TRACEPOINTS
-opal_tracepoint_entry:
- stdu r1,-STACKFRAMESIZE(r1)
- std r0,STK_REG(R23)(r1)
- std r3,STK_REG(R24)(r1)
- std r4,STK_REG(R25)(r1)
- std r5,STK_REG(R26)(r1)
- std r6,STK_REG(R27)(r1)
- std r7,STK_REG(R28)(r1)
- std r8,STK_REG(R29)(r1)
- std r9,STK_REG(R30)(r1)
- std r10,STK_REG(R31)(r1)
- mr r3,r0
- addi r4,r1,STK_REG(R24)
- bl __trace_opal_entry
- ld r0,STK_REG(R23)(r1)
- ld r3,STK_REG(R24)(r1)
- ld r4,STK_REG(R25)(r1)
- ld r5,STK_REG(R26)(r1)
- ld r6,STK_REG(R27)(r1)
- ld r7,STK_REG(R28)(r1)
- ld r8,STK_REG(R29)(r1)
- ld r9,STK_REG(R30)(r1)
- ld r10,STK_REG(R31)(r1)
-
- /* setup LR so we return via tracepoint_return */
- LOAD_REG_ADDR(r11,opal_tracepoint_return)
- std r11,16(r1)
-
- mfmsr r12
- DO_OPAL_CALL()
-
-opal_tracepoint_return:
- std r3,STK_REG(R31)(r1)
- mr r4,r3
- ld r3,STK_REG(R23)(r1)
- bl __trace_opal_exit
- ld r3,STK_REG(R31)(r1)
- addi r1,r1,STACKFRAMESIZE
- ld r0,16(r1)
+#ifdef __BIG_ENDIAN__
+ ld r11,STK_PARAM(R12)(r1)
+ mtmsrd r11
+#else
+ /* Endian can only be switched with rfi, must byte reverse MSR load */
+ .short 0x4039 /* li r10,STK_PARAM(R12) */
+ .byte (STK_PARAM(R12) >> 8) & 0xff
+ .byte STK_PARAM(R12) & 0xff
+
+ .long 0x280c6a7d /* ldbrx r11,r10,r1 */
+ .long 0x05009f42 /* bcl 20,31,$+4 */
+ .long 0xa602487d /* mflr r10 */
+ .long 0x14004a39 /* addi r10,r10,20 */
+ .long 0xa64b5a7d /* mthsrr0 r10 */
+ .long 0xa64b7b7d /* mthsrr1 r11 */
+ .long 0x2402004c /* hrfid */
+#endif
+ ld r2,PACATOC(r13)
+ ld r0,PPC_LR_STKOFF(r1)
mtlr r0
blr
-#endif
-
-
-OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
-OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
-OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
-OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
-OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
-OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
-OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
-OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
-OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
-OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
-OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
-OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
-OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
-OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
-OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
-OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
-OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
-OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
-OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
-OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
-OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
-OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
-OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
-OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
-OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
-OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
-OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
-OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
-OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
-OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
-OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
-OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
-OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
-OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
-OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
-OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE);
-OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE);
-OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
-OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
-OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
-OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
-OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
-OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
-OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
-OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
-OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
-OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
-OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
-OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
-OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
-OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
-OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
-OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
-OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
-OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
-OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
-OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
-OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
-OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
-OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
-OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
-OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
-OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
-OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
-OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
-OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
-OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
-OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
-OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
-OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
-OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
-OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
-OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
-OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
-OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
-OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
-OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
-OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
-OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
-OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
-OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
-OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
-OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
-OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
-OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC);
-OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
-OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
-OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
-OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
-OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
-OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
-OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
-OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
-OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
-OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
-OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
-OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE);
-OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
-OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
-OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
-OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
-OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
-OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
-OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
-OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
-OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
-OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
-OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
-OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
-OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE);
-OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
-OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
-OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
-OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
-OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
-OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
-OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
-OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
-OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
-OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
-OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
-OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
-OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
-OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
-OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
-OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
-OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
-OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
-OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ);
-OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
-OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
-OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
-OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
-OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
-OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
-OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
-OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
-OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
-OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
-OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
-OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
-OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
-OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
-OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
-OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
-OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
-OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
-OPAL_CALL(opal_quiesce, OPAL_QUIESCE);
-OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
-OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
-OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
-OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
-OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
-OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
-OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
-OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 79586f127521..727a7de08635 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -26,7 +26,6 @@
#include <linux/memblock.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
-#include <linux/printk.h>
#include <linux/kmsg_dump.h>
#include <linux/console.h>
#include <linux/sched/debug.h>
@@ -171,8 +170,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
/*
* Allocate a buffer to hold the MC recoverable ranges.
*/
- mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64)));
- memset(mc_recoverable_range, 0, size);
+ mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
for (i = 0; i < mc_recoverable_range_len; i++) {
mc_recoverable_range[i].start_addr =
@@ -587,7 +585,7 @@ int opal_machine_check(struct pt_regs *regs)
evt.version);
return 0;
}
- machine_check_print_event_info(&evt, user_mode(regs));
+ machine_check_print_event_info(&evt, user_mode(regs), false);
if (opal_recover_mce(regs, &evt))
return 1;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 697449afb3f7..e28f03e1eb5e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -313,7 +313,6 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
page_shift);
tbl->it_level_size = 1ULL << (level_shift - 3);
tbl->it_indirect_levels = levels - 1;
- tbl->it_allocated_size = total_allocated;
tbl->it_userspace = uas;
tbl->it_nid = nid;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7db3119f8a5b..fa6af52b5219 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1593,6 +1593,8 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
#ifdef CONFIG_IOMMU_API
+ iommu_register_group(&pe->table_group,
+ pe->phb->hose->global_number, pe->pe_number);
pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
#endif
}
@@ -1746,7 +1748,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+ pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
/*
* Note: iommu_add_device() will fail here as
@@ -1756,31 +1758,6 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}
-static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
-{
- unsigned short vendor = 0;
- struct pci_dev *pdev;
-
- if (pe->device_count == 1)
- return true;
-
- /* pe->pdev should be set if it's a single device, pe->pbus if not */
- if (!pe->pbus)
- return true;
-
- list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
- if (!vendor) {
- vendor = pdev->vendor;
- continue;
- }
-
- if (pdev->vendor != vendor)
- return false;
- }
-
- return true;
-}
-
/*
* Reconfigure TVE#0 to be usable as 64-bit DMA space.
*
@@ -1850,88 +1827,45 @@ err:
return -EIO;
}
-static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
+ u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
- uint64_t top;
- bool bypass = false;
- s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
- top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+ u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ if (dma_mask >= top)
+ return true;
}
- if (bypass) {
- dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else {
- /*
- * If the device can't set the TCE bypass bit but still wants
- * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
- * bypass the 32-bit region and be usable for 64-bit DMAs.
- * The device needs to be able to address all of this space.
- */
- if (dma_mask >> 32 &&
- dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
- pnv_pci_ioda_pe_single_vendor(pe) &&
- phb->model == PNV_PHB_MODEL_PHB3) {
- /* Configure the bypass mode */
- rc = pnv_pci_ioda_dma_64bit_bypass(pe);
- if (rc)
- return rc;
- /* 4GB offset bypasses 32-bit space */
- set_dma_offset(&pdev->dev, (1ULL << 32));
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
- /*
- * Fail the request if a DMA mask between 32 and 64 bits
- * was requested but couldn't be fulfilled. Ideally we
- * would do this for 64-bits but historically we have
- * always fallen back to 32-bits.
- */
- return -ENOMEM;
- } else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
- }
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ (pe->device_count == 1 || !pe->pbus) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return rc;
+ /* 4GB offset bypasses 32-bit space */
+ pdev->dev.archdata.dma_offset = (1ULL << 32);
+ return true;
}
- *pdev->dev.dma_mask = dma_mask;
- /* Update peer npu devices */
- pnv_npu_try_dma_set_bypass(pdev, bypass);
-
- return 0;
-}
-
-static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pnv_ioda_pe *pe;
- u64 end, mask;
-
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return 0;
-
- pe = &phb->ioda.pe_array[pdn->pe_number];
- if (!pe->tce_bypass_enabled)
- return __dma_get_required_mask(&pdev->dev);
-
-
- end = pe->tce_bypass_base + memblock_end_of_DRAM();
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
+ return false;
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
@@ -1940,7 +1874,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
- set_dma_offset(&dev->dev, pe->tce_bypass_base);
+ dev->dev.archdata.dma_offset = pe->tce_bypass_base;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
@@ -2592,8 +2526,13 @@ static long pnv_pci_ioda2_create_table_userspace(
int num, __u32 page_shift, __u64 window_size, __u32 levels,
struct iommu_table **ptbl)
{
- return pnv_pci_ioda2_create_table(table_group,
+ long ret = pnv_pci_ioda2_create_table(table_group,
num, page_shift, window_size, levels, true, ptbl);
+
+ if (!ret)
+ (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
+ page_shift, window_size, levels);
+ return ret;
}
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
@@ -3659,6 +3598,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
+ .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
@@ -3666,19 +3606,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.window_alignment = pnv_pci_window_alignment,
.setup_bridge = pnv_pci_setup_bridge,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
.shutdown = pnv_pci_ioda_shutdown,
};
-static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
-{
- dev_err_once(&npdev->dev,
- "%s operation unsupported for NVLink devices\n",
- __func__);
- return -EPERM;
-}
-
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.setup_msi_irqs = pnv_setup_msi_irqs,
@@ -3686,7 +3616,6 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
.disable_device = pnv_npu_disable_device,
};
@@ -3944,9 +3873,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
* transactions from previous kernel. The ppc_pci_reset_phbs
- * kernel parameter will force this reset too.
+ * kernel parameter will force this reset too. Additionally,
+ * if the IODA reset above failed then use a bigger hammer.
+ * This can happen if we get a PHB fatal error in very early
+ * boot.
*/
- if (is_kdump_kernel() || pci_reset_phbs) {
+ if (is_kdump_kernel() || pci_reset_phbs || rc) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 45fb70b4bfa7..ef9448a907c6 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1147,6 +1147,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
return 0;
pe = &phb->ioda.pe_array[pdn->pe_number];
+ if (!pe->table_group.group)
+ return 0;
iommu_add_device(&pe->table_group, dev);
return 0;
case BUS_NOTIFY_DEL_DEVICE:
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 0d354e19ef92..db09c7022635 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -39,6 +39,7 @@
#include <asm/cpuidle.h>
#include <asm/kexec.h>
#include <asm/reg.h>
+#include <asm/powernv.h>
#include "powernv.h"
@@ -153,6 +154,7 @@ static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
unsigned long srr1, wmask;
+ u64 lpcr_val;
/* Standard hot unplug procedure */
/*
@@ -174,6 +176,19 @@ static void pnv_smp_cpu_kill_self(void)
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
+ /*
+ * We don't want to take decrementer interrupts while we are
+ * offline, so clear LPCR:PECE1. We keep PECE2 (and
+ * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
+ *
+ * If the CPU gets woken up by a special wakeup, ensure that
+ * the SLW engine sets LPCR with decrementer bit cleared, else
+ * the CPU will come back to the kernel due to a spurious
+ * wakeup.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
while (!generic_check_cpu_restart(cpu)) {
/*
* Clear IPI flag, since we don't handle IPIs while
@@ -246,6 +261,16 @@ static void pnv_smp_cpu_kill_self(void)
}
+ /*
+ * Re-enable decrementer interrupts in LPCR.
+ *
+ * Further, we want stop states to be woken up by decrementer
+ * for non-hotplug cases. So program the LPCR via stop api as
+ * well.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
DBG("CPU%d coming online...\n", cpu);
}
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index e7075aaff1bb..59587b75493d 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -354,9 +354,7 @@ static int ps3_setup_storage_dev(const struct ps3_repository_device *repo,
repo->dev_index, repo->dev_type, port, blk_size, num_blocks,
num_regions);
- p = kzalloc(sizeof(struct ps3_storage_device) +
- num_regions * sizeof(struct ps3_storage_region),
- GFP_KERNEL);
+ p = kzalloc(struct_size(p, regions, num_regions), GFP_KERNEL);
if (!p) {
result = -ENOMEM;
goto fail_malloc;
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index f5387ad82279..4d65c5380020 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -205,11 +205,11 @@ static const struct os_area_db_id os_area_db_id_rtc_diff = {
* 3) The number of seconds from 1970 to 2000.
*/
-struct saved_params {
+static struct saved_params {
unsigned int valid;
s64 rtc_diff;
unsigned int av_multi_out;
-} static saved_params;
+} saved_params;
static struct property property_rtc_diff = {
.name = "linux,rtc_diff",
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 5cc35d6b94b6..7c227e784247 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -37,12 +37,12 @@ static struct device ps3_system_bus = {
};
/* FIXME: need device usage counters! */
-struct {
+static struct {
struct mutex mutex;
int sb_11; /* usb 0 */
int sb_12; /* usb 0 */
int gpu;
-} static usage_hack;
+} usage_hack;
static int ps3_is_device(struct ps3_system_bus_device *dev, u64 bus_id,
u64 dev_id)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 2f8e62163602..97feb6e79f1a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -802,6 +802,25 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
return rc;
}
+int dlpar_cpu_readd(int cpu)
+{
+ struct device_node *dn;
+ struct device *dev;
+ u32 drc_index;
+ int rc;
+
+ dev = get_cpu_device(cpu);
+ dn = dev->of_node;
+
+ rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+
+ rc = dlpar_cpu_remove_by_index(drc_index);
+ if (!rc)
+ rc = dlpar_cpu_add(drc_index);
+
+ return rc;
+}
+
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
u32 count, drc_index;
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 5b4a56131904..84e8ec4011ba 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -261,8 +261,7 @@ static char *ibmebus_chomp(const char *in, size_t count)
return out;
}
-static ssize_t ibmebus_store_probe(struct bus_type *bus,
- const char *buf, size_t count)
+static ssize_t probe_store(struct bus_type *bus, const char *buf, size_t count)
{
struct device_node *dn = NULL;
struct device *dev;
@@ -298,10 +297,9 @@ out:
return rc;
return count;
}
-static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe);
+static BUS_ATTR_WO(probe);
-static ssize_t ibmebus_store_remove(struct bus_type *bus,
- const char *buf, size_t count)
+static ssize_t remove_store(struct bus_type *bus, const char *buf, size_t count)
{
struct device *dev;
char *path;
@@ -325,7 +323,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,
return -ENODEV;
}
}
-static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove);
+static BUS_ATTR_WO(remove);
static struct attribute *ibmbus_bus_attrs[] = {
&bus_attr_probe.attr,
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8fc8fe0b9848..36eb1ddbac69 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -978,7 +978,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
* pdn: the parent pe node with the ibm,dma_window property
* Future: also check if we can remap the base window for our base page size
*
- * returns the dma offset for use by dma_set_mask
+ * returns the dma offset for use by the direct mapped DMA code.
*/
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
@@ -1198,87 +1198,37 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
iommu_add_device(pci->table_group, &dev->dev);
}
-static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
{
- bool ddw_enabled = false;
- struct device_node *pdn, *dn;
- struct pci_dev *pdev;
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
const __be32 *dma_window = NULL;
- u64 dma_offset;
-
- if (!dev->dma_mask)
- return -EIO;
-
- if (!dev_is_pci(dev))
- goto check_mask;
-
- pdev = to_pci_dev(dev);
/* only attempt to use a new window if 64-bit DMA is requested */
- if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
- dn = pci_device_to_OF_node(pdev);
- dev_dbg(dev, "node is %pOF\n", dn);
+ if (dma_mask < DMA_BIT_MASK(64))
+ return false;
- /*
- * the device tree might contain the dma-window properties
- * per-device and not necessarily for the bus. So we need to
- * search upwards in the tree until we either hit a dma-window
- * property, OR find a parent with a table already allocated.
- */
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
- pdn = pdn->parent) {
- dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
- if (dma_window)
- break;
- }
- if (pdn && PCI_DN(pdn)) {
- dma_offset = enable_ddw(pdev, pdn);
- if (dma_offset != 0) {
- dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
- set_dma_offset(dev, dma_offset);
- set_dma_ops(dev, &dma_nommu_ops);
- ddw_enabled = true;
- }
- }
- }
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
- /* fall back on iommu ops */
- if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
- dev_info(dev, "Restoring 32-bit DMA via iommu\n");
- set_dma_ops(dev, &dma_iommu_ops);
+ /*
+ * the device tree might contain the dma-window properties
+ * per-device and not necessarily for the bus. So we need to
+ * search upwards in the tree until we either hit a dma-window
+ * property, OR find a parent with a table already allocated.
+ */
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
+ pdn = pdn->parent) {
+ dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
+ if (dma_window)
+ break;
}
-check_mask:
- if (!dma_supported(dev, dma_mask))
- return -EIO;
-
- *dev->dma_mask = dma_mask;
- return 0;
-}
-
-static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
-{
- if (!dev->dma_mask)
- return 0;
-
- if (!disable_ddw && dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
- struct device_node *dn;
-
- dn = pci_device_to_OF_node(pdev);
-
- /* search upwards for ibm,dma-window */
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
- dn = dn->parent)
- if (of_get_property(dn, "ibm,dma-window", NULL))
- break;
- /* if there is a ibm,ddw-applicable property require 64 bits */
- if (dn && PCI_DN(dn) &&
- of_get_property(dn, "ibm,ddw-applicable", NULL))
- return DMA_BIT_MASK(64);
+ if (pdn && PCI_DN(pdn)) {
+ pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
+ if (pdev->dev.archdata.dma_offset)
+ return true;
}
- return dma_iommu_ops.get_required_mask(dev);
+ return false;
}
static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
@@ -1373,8 +1323,9 @@ void iommu_init_early_pSeries(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
- ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
- ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
+ if (!disable_ddw)
+ pseries_pci_controller_ops.iommu_bypass_supported =
+ iommu_bypass_supported_pSeriesLP;
} else {
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 794487313cc8..e73c7e30efe6 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -475,6 +475,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
splpar_dispatch_data(m);
seq_printf(m, "purr=%ld\n", get_purr());
+ seq_printf(m, "tbr=%ld\n", mftb());
} else { /* non SPLPAR case */
seq_printf(m, "system_active_processors=%d\n",
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 69cedc1b3b8a..1136a38ff039 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -7,8 +7,6 @@
* 2 of the License, or (at your option) any later version.
*
* /dev/nvram driver for PPC64
- *
- * This perhaps should live in drivers/char
*/
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 41f62ca27c63..e4f0dfd4ae33 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -130,8 +130,13 @@ static void __init fwnmi_init(void)
* It will be used in real mode mce handler, hence it needs to be
* below RMA.
*/
- mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus,
- RTAS_ERROR_LOG_MAX, ppc64_rma_size));
+ mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
+ RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!mce_data_buf)
+ panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
+ RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
+
for_each_possible_cpu(i) {
paca_ptrs[i]->mce_data_buf = mce_data_buf +
(RTAS_ERROR_LOG_MAX * i);
@@ -140,8 +145,13 @@ static void __init fwnmi_init(void)
#ifdef CONFIG_PPC_BOOK3S_64
/* Allocate per cpu slb area to save old slb contents during MCE */
size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
- slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
- ppc64_rma_size));
+ slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry),
+ MEMBLOCK_LOW_LIMIT, ppc64_rma_size,
+ NUMA_NO_NODE);
+ if (!slb_ptr)
+ panic("Failed to allocate %zu bytes below %pa for slb area\n",
+ size, &ppc64_rma_size);
+
for_each_possible_cpu(i)
paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
#endif
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 1fad4649735b..141795275ccb 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -492,7 +492,9 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
return NULL;
}
- ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs);
+ ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+ dma_handle, dev->coherent_dma_mask, flag,
+ dev_to_node(dev));
if (unlikely(ret == NULL)) {
vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
atomic_inc(&viodev->cmo.allocs_failed);
@@ -507,8 +509,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
{
struct vio_dev *viodev = to_vio_dev(dev);
- dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs);
-
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
}
@@ -518,22 +519,22 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
+ struct iommu_table *tbl = get_iommu_table_base(dev);
dma_addr_t ret = DMA_MAPPING_ERROR;
- tbl = get_iommu_table_base(dev);
- if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
- atomic_inc(&viodev->cmo.allocs_failed);
- return ret;
- }
-
- ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
- if (unlikely(dma_mapping_error(dev, ret))) {
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
- atomic_inc(&viodev->cmo.allocs_failed);
- }
-
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
+ goto out_fail;
+ ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev),
+ direction, attrs);
+ if (unlikely(ret == DMA_MAPPING_ERROR))
+ goto out_deallocate;
return ret;
+
+out_deallocate:
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
+out_fail:
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return DMA_MAPPING_ERROR;
}
static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
@@ -542,11 +543,9 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
-
- tbl = get_iommu_table_base(dev);
- dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
}
@@ -555,34 +554,32 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
+ struct iommu_table *tbl = get_iommu_table_base(dev);
struct scatterlist *sgl;
int ret, count;
size_t alloc_size = 0;
- tbl = get_iommu_table_base(dev);
for_each_sg(sglist, sgl, nelems, count)
alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
- if (vio_cmo_alloc(viodev, alloc_size)) {
- atomic_inc(&viodev->cmo.allocs_failed);
- return 0;
- }
-
- ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
-
- if (unlikely(!ret)) {
- vio_cmo_dealloc(viodev, alloc_size);
- atomic_inc(&viodev->cmo.allocs_failed);
- return ret;
- }
+ if (vio_cmo_alloc(viodev, alloc_size))
+ goto out_fail;
+ ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev),
+ direction, attrs);
+ if (unlikely(!ret))
+ goto out_deallocate;
for_each_sg(sglist, sgl, ret, count)
alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
if (alloc_size)
vio_cmo_dealloc(viodev, alloc_size);
-
return ret;
+
+out_deallocate:
+ vio_cmo_dealloc(viodev, alloc_size);
+out_fail:
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return 0;
}
static void vio_dma_iommu_unmap_sg(struct device *dev,
@@ -591,40 +588,27 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
unsigned long attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
+ struct iommu_table *tbl = get_iommu_table_base(dev);
struct scatterlist *sgl;
size_t alloc_size = 0;
int count;
- tbl = get_iommu_table_base(dev);
for_each_sg(sglist, sgl, nelems, count)
alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
- dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
-
+ ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
vio_cmo_dealloc(viodev, alloc_size);
}
-static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
-{
- return dma_iommu_ops.dma_supported(dev, mask);
-}
-
-static u64 vio_dma_get_required_mask(struct device *dev)
-{
- return dma_iommu_ops.get_required_mask(dev);
-}
-
static const struct dma_map_ops vio_dma_mapping_ops = {
.alloc = vio_dma_iommu_alloc_coherent,
.free = vio_dma_iommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
.map_sg = vio_dma_iommu_map_sg,
.unmap_sg = vio_dma_iommu_unmap_sg,
.map_page = vio_dma_iommu_map_page,
.unmap_page = vio_dma_iommu_unmap_page,
- .dma_supported = vio_dma_iommu_dma_supported,
- .get_required_mask = vio_dma_get_required_mask,
+ .dma_supported = dma_iommu_dma_supported,
+ .get_required_mask = dma_iommu_get_required_mask,
};
/**
@@ -1715,3 +1699,10 @@ int vio_disable_interrupts(struct vio_dev *dev)
}
EXPORT_SYMBOL(vio_disable_interrupts);
#endif /* CONFIG_PPC_PSERIES */
+
+static int __init vio_init(void)
+{
+ dma_debug_add_bus(&vio_bus_type);
+ return 0;
+}
+fs_initcall(vio_init);
diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S
index cf48e9cb2575..6c4aec25c4ba 100644
--- a/arch/powerpc/sysdev/6xx-suspend.S
+++ b/arch/powerpc/sysdev/6xx-suspend.S
@@ -29,10 +29,9 @@ _GLOBAL(mpc6xx_enter_standby)
ori r5, r5, ret_from_standby@l
mtlr r5
- CURRENT_THREAD_INFO(r5, r1)
- lwz r6, TI_LOCAL_FLAGS(r5)
+ lwz r6, TI_LOCAL_FLAGS(r2)
ori r6, r6, _TLF_SLEEPING
- stw r6, TI_LOCAL_FLAGS(r5)
+ stw r6, TI_LOCAL_FLAGS(r2)
mfmsr r5
ori r5, r5, MSR_EE
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index a5b40d1460f1..fc5c5c23303e 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -251,8 +251,11 @@ static void allocate_dart(void)
* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
* will blow up an entire large page anyway in the kernel mapping.
*/
- dart_tablebase = __va(memblock_alloc_base(1UL<<24,
- 1UL<<24, 0x80000000L));
+ dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
+ MEMBLOCK_LOW_LIMIT, SZ_2G,
+ NUMA_NO_NODE);
+ if (!dart_tablebase)
+ panic("Failed to allocate 16MB below 2GB for DART table\n");
/* There is no point scanning the DART space for leaks*/
kmemleak_no_scan((void *)dart_tablebase);
@@ -360,13 +363,6 @@ static void iommu_table_dart_setup(void)
set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
}
-static void pci_dma_dev_setup_dart(struct pci_dev *dev)
-{
- if (dart_is_u4)
- set_dma_offset(&dev->dev, DART_U4_BYPASS_BASE);
- set_iommu_table_base(&dev->dev, &iommu_table_dart);
-}
-
static void pci_dma_bus_setup_dart(struct pci_bus *bus)
{
if (!iommu_table_dart_inited) {
@@ -390,27 +386,18 @@ static bool dart_device_on_pcie(struct device *dev)
return false;
}
-static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
+static void pci_dma_dev_setup_dart(struct pci_dev *dev)
{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
- /* U4 supports a DART bypass, we use it for 64-bit capable
- * devices to improve performances. However, that only works
- * for devices connected to U4 own PCIe interface, not bridged
- * through hypertransport. We need the device to support at
- * least 40 bits of addresses.
- */
- if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {
- dev_info(dev, "Using 64-bit DMA iommu bypass\n");
- set_dma_ops(dev, &dma_nommu_ops);
- } else {
- dev_info(dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(dev, &dma_iommu_ops);
- }
+ if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
+ dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
+ set_iommu_table_base(&dev->dev, &iommu_table_dart);
+}
- *dev->dma_mask = dma_mask;
- return 0;
+static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
+{
+ return dart_is_u4 &&
+ dart_device_on_pcie(&dev->dev) &&
+ mask >= DMA_BIT_MASK(40);
}
void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
@@ -428,26 +415,20 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
/* Initialize the DART HW */
if (dart_init(dn) != 0)
- goto bail;
-
- /* Setup bypass if supported */
- if (dart_is_u4)
- ppc_md.dma_set_mask = dart_dma_set_mask;
+ return;
+ /*
+ * U4 supports a DART bypass, we use it for 64-bit capable devices to
+ * improve performance. However, that only works for devices connected
+ * to the U4 own PCIe interface, not bridged through hypertransport.
+ * We need the device to support at least 40 bits of addresses.
+ */
controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
+ controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
/* Setup pci_dma ops */
set_pci_dma_ops(&dma_iommu_ops);
- return;
-
- bail:
- /* If init failed, use direct iommu and null setup functions */
- controller_ops->dma_dev_setup = NULL;
- controller_ops->dma_bus_setup = NULL;
-
- /* Setup pci_dma ops */
- set_pci_dma_ops(&dma_nommu_ops);
}
#ifdef CONFIG_PM
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 918be816b097..f49aec251a5a 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -40,6 +40,7 @@
#include <asm/mpc85xx.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
+#include <asm/swiotlb.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -114,33 +115,33 @@ static struct pci_ops fsl_indirect_pcie_ops =
static u64 pci64_dma_offset;
#ifdef CONFIG_SWIOTLB
+static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+ pdev->dev.bus_dma_mask =
+ hose->dma_window_base_cur + hose->dma_window_size;
+}
+
static void setup_swiotlb_ops(struct pci_controller *hose)
{
- if (ppc_swiotlb_enable) {
+ if (ppc_swiotlb_enable)
hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb;
- set_pci_dma_ops(&powerpc_swiotlb_dma_ops);
- }
}
#else
static inline void setup_swiotlb_ops(struct pci_controller *hose) {}
#endif
-static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
+static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
/*
* Fix up PCI devices that are able to DMA to the large inbound
* mapping that allows addressing any RAM address from across PCI.
*/
if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
- set_dma_ops(dev, &dma_nommu_ops);
- set_dma_offset(dev, pci64_dma_offset);
+ dev->bus_dma_mask = 0;
+ dev->archdata.dma_offset = pci64_dma_offset;
}
-
- *dev->dma_mask = dma_mask;
- return 0;
}
static int setup_one_atmu(struct ccsr_pci __iomem *pci,
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index 8030a0f55e96..fd129c8ecceb 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -771,21 +771,6 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
return ipic;
}
-void ipic_set_highest_priority(unsigned int virq)
-{
- struct ipic *ipic = ipic_from_irq(virq);
- unsigned int src = virq_to_hw(virq);
- u32 temp;
-
- temp = ipic_read(ipic->regs, IPIC_SICFR);
-
- /* clear and set HPI */
- temp &= 0x7f000000;
- temp |= (src & 0x7f) << 24;
-
- ipic_write(ipic->regs, IPIC_SICFR, temp);
-}
-
void ipic_set_default_priority(void)
{
ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT);
@@ -796,26 +781,6 @@ void ipic_set_default_priority(void)
ipic_write(primary_ipic->regs, IPIC_SMPRR_B, IPIC_PRIORITY_DEFAULT);
}
-void ipic_enable_mcp(enum ipic_mcp_irq mcp_irq)
-{
- struct ipic *ipic = primary_ipic;
- u32 temp;
-
- temp = ipic_read(ipic->regs, IPIC_SERMR);
- temp |= (1 << (31 - mcp_irq));
- ipic_write(ipic->regs, IPIC_SERMR, temp);
-}
-
-void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
-{
- struct ipic *ipic = primary_ipic;
- u32 temp;
-
- temp = ipic_read(ipic->regs, IPIC_SERMR);
- temp &= (1 << (31 - mcp_irq));
- ipic_write(ipic->regs, IPIC_SERMR, temp);
-}
-
u32 ipic_get_mcp_status(void)
{
return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 1fd0717ade02..1f1af12f23e2 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -51,7 +51,7 @@ phys_addr_t get_csrbase(void)
const void *prop = of_get_property(tsi, "reg", &size);
tsi108_csr_base = of_translate_address(tsi, prop);
of_node_put(tsi);
- };
+ }
return tsi108_csr_base;
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 94a69a62f5db..70a8f9e31a2d 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -442,7 +442,7 @@ static void xive_dec_target_count(int cpu)
struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
struct xive_q *q = &xc->queue[xive_irq_priority];
- if (unlikely(WARN_ON(cpu < 0 || !xc))) {
+ if (WARN_ON(cpu < 0 || !xc)) {
pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
return;
}
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 878f9c1d3615..3050f9323254 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,7 @@
subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
# Disable ftrace for the entire directory
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 9deea5ee13f6..27f1e6415036 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -158,7 +158,7 @@ int print_insn_powerpc (unsigned long insn, unsigned long memaddr)
dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
| PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9 | PPC_OPCODE_HTM
| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
- | PPC_OPCODE_VSX | PPC_OPCODE_VSX3),
+ | PPC_OPCODE_VSX | PPC_OPCODE_VSX3);
/* Get the major opcode of the insn. */
opcode = NULL;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 757b8499aba2..a0f44f992360 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2997,7 +2997,7 @@ static void show_task(struct task_struct *tsk)
printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp,
tsk->pid, rcu_dereference(tsk->parent)->pid,
- state, task_thread_info(tsk)->cpu,
+ state, task_cpu(tsk),
tsk->comm);
}