summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig12
-rw-r--r--arch/powerpc/Makefile65
-rw-r--r--arch/powerpc/boot/Makefile86
-rw-r--r--arch/powerpc/boot/cuboot-c2k.c1
-rw-r--r--arch/powerpc/boot/decompress.c148
-rw-r--r--arch/powerpc/boot/fixup-headers.sed12
-rw-r--r--arch/powerpc/boot/gunzip_util.c204
-rw-r--r--arch/powerpc/boot/gunzip_util.h45
-rw-r--r--arch/powerpc/boot/main.c35
-rw-r--r--arch/powerpc/boot/ops.h3
-rw-r--r--arch/powerpc/boot/stdbool.h14
-rw-r--r--arch/powerpc/boot/stdint.h13
-rw-r--r--arch/powerpc/boot/types.h14
-rwxr-xr-xarch/powerpc/boot/wrapper61
-rw-r--r--arch/powerpc/boot/xz_config.h39
-rw-r--r--arch/powerpc/configs/powernv_defconfig19
-rw-r--r--arch/powerpc/configs/ppc64_defconfig19
-rw-r--r--arch/powerpc/configs/pseries_defconfig19
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c3
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h43
-rw-r--r--arch/powerpc/include/asm/atomic.h4
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h37
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h7
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h88
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h1
-rw-r--r--arch/powerpc/include/asm/cpu_has_feature.h2
-rw-r--r--arch/powerpc/include/asm/cpuidle.h13
-rw-r--r--arch/powerpc/include/asm/cputable.h4
-rw-r--r--arch/powerpc/include/asm/cputhreads.h1
-rw-r--r--arch/powerpc/include/asm/exception-64s.h148
-rw-r--r--arch/powerpc/include/asm/fadump.h4
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h1
-rw-r--r--arch/powerpc/include/asm/head-64.h393
-rw-r--r--arch/powerpc/include/asm/hmi.h2
-rw-r--r--arch/powerpc/include/asm/io.h29
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h10
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h39
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h90
-rw-r--r--arch/powerpc/include/asm/kvm_host.h124
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h28
-rw-r--r--arch/powerpc/include/asm/machdep.h2
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h3
-rw-r--r--arch/powerpc/include/asm/mmu.h5
-rw-r--r--arch/powerpc/include/asm/mmu_context.h4
-rw-r--r--arch/powerpc/include/asm/mmzone.h3
-rw-r--r--arch/powerpc/include/asm/mpic_msgr.h6
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/opal.h1
-rw-r--r--arch/powerpc/include/asm/paca.h12
-rw-r--r--arch/powerpc/include/asm/parport.h2
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h1
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h7
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h49
-rw-r--r--arch/powerpc/include/asm/processor.h16
-rw-r--r--arch/powerpc/include/asm/reg.h6
-rw-r--r--arch/powerpc/include/asm/signal.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h8
-rw-r--r--arch/powerpc/include/asm/tm.h5
-rw-r--r--arch/powerpc/include/asm/uaccess.h29
-rw-r--r--arch/powerpc/include/asm/xics.h2
-rw-r--r--arch/powerpc/kernel/Makefile17
-rw-r--r--arch/powerpc/kernel/asm-offsets.c14
-rw-r--r--arch/powerpc/kernel/cputable.c19
-rw-r--r--arch/powerpc/kernel/eeh.c8
-rw-r--r--arch/powerpc/kernel/eeh_driver.c10
-rw-r--r--arch/powerpc/kernel/eeh_pe.c1
-rw-r--r--arch/powerpc/kernel/entry_32.S1
-rw-r--r--arch/powerpc/kernel/entry_64.S33
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2082
-rw-r--r--arch/powerpc/kernel/fadump.c13
-rw-r--r--arch/powerpc/kernel/fpu.S26
-rw-r--r--arch/powerpc/kernel/ftrace.c3
-rw-r--r--arch/powerpc/kernel/head_32.S3
-rw-r--r--arch/powerpc/kernel/head_64.S53
-rw-r--r--arch/powerpc/kernel/head_8xx.S1
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c9
-rw-r--r--arch/powerpc/kernel/ibmebus.c2
-rw-r--r--arch/powerpc/kernel/idle_book3s.S27
-rw-r--r--arch/powerpc/kernel/irq.c17
-rw-r--r--arch/powerpc/kernel/kprobes.c2
-rw-r--r--arch/powerpc/kernel/legacy_serial.c14
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c75
-rw-r--r--arch/powerpc/kernel/mce.c3
-rw-r--r--arch/powerpc/kernel/misc_32.S4
-rw-r--r--arch/powerpc/kernel/misc_64.S30
-rw-r--r--arch/powerpc/kernel/module.c2
-rw-r--r--arch/powerpc/kernel/nvram_64.c10
-rw-r--r--arch/powerpc/kernel/pci-common.c48
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c2
-rw-r--r--arch/powerpc/kernel/process.c201
-rw-r--r--arch/powerpc/kernel/prom_init.c93
-rw-r--r--arch/powerpc/kernel/ptrace.c372
-rw-r--r--arch/powerpc/kernel/setup_32.c9
-rw-r--r--arch/powerpc/kernel/setup_64.c1
-rw-r--r--arch/powerpc/kernel/signal.c41
-rw-r--r--arch/powerpc/kernel/signal.h18
-rw-r--r--arch/powerpc/kernel/signal_32.c136
-rw-r--r--arch/powerpc/kernel/signal_64.c221
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/syscalls.c1
-rw-r--r--arch/powerpc/kernel/time.c1
-rw-r--r--arch/powerpc/kernel/tm.S94
-rw-r--r--arch/powerpc/kernel/traps.c86
-rw-r--r--arch/powerpc/kernel/vdso.c1
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile6
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile10
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S2
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S2
-rw-r--r--arch/powerpc/kernel/vector.S25
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S56
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/Makefile20
-rw-r--r--arch/powerpc/kvm/book3s.c13
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c533
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c156
-rw-r--r--arch/powerpc/kvm/book3s_hv_hmi.c (renamed from arch/powerpc/kernel/hmi.c)0
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c120
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S197
-rw-r--r--arch/powerpc/kvm/book3s_pr.c10
-rw-r--r--arch/powerpc/kvm/book3s_xics.c69
-rw-r--r--arch/powerpc/kvm/book3s_xics.h2
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/e500_mmu.c73
-rw-r--r--arch/powerpc/kvm/powerpc.c61
-rw-r--r--arch/powerpc/kvm/trace_hv.h22
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/checksum_32.S8
-rw-r--r--arch/powerpc/lib/feature-fixups.c3
-rw-r--r--arch/powerpc/lib/mem_64.S2
-rw-r--r--arch/powerpc/mm/Makefile7
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/powerpc/mm/hash_native_64.c42
-rw-r--r--arch/powerpc/mm/hash_utils_64.c136
-rw-r--r--arch/powerpc/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/init_32.c2
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c81
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c56
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c11
-rw-r--r--arch/powerpc/mm/pgtable-radix.c40
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/slb_low.S15
-rw-r--r--arch/powerpc/mm/tlb-radix.c24
-rw-r--r--arch/powerpc/net/bpf_jit.h2
-rw-r--r--arch/powerpc/net/bpf_jit64.h26
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c264
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c2
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c4
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/hv-gpci.c2
-rw-r--r--arch/powerpc/perf/power7-pmu.c2
-rw-r--r--arch/powerpc/perf/power8-pmu.c2
-rw-r--r--arch/powerpc/perf/power9-pmu.c2
-rw-r--r--arch/powerpc/platforms/44x/warp.c2
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c8
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c3
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pic.c2
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads-pci-pic.c2
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c2
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c4
-rw-r--r--arch/powerpc/platforms/85xx/common.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c4
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c4
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c6
-rw-r--r--arch/powerpc/platforms/86xx/pic.c4
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c2
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c6
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c2
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c18
-rw-r--r--arch/powerpc/platforms/cell/iommu.c4
-rw-r--r--arch/powerpc/platforms/cell/pmu.c4
-rw-r--r--arch/powerpc/platforms/cell/ras.c2
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c18
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c16
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c4
-rw-r--r--arch/powerpc/platforms/chrp/setup.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c4
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/mvme5100.c4
-rw-r--r--arch/powerpc/platforms/maple/pci.c6
-rw-r--r--arch/powerpc/platforms/maple/setup.c4
-rw-r--r--arch/powerpc/platforms/pasemi/Kconfig10
-rw-r--r--arch/powerpc/platforms/pasemi/gpio_mdio.c4
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c7
-rw-r--r--arch/powerpc/platforms/pasemi/misc.c2
-rw-r--r--arch/powerpc/platforms/pasemi/msi.c4
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c8
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c6
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c4
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c2
-rw-r--r--arch/powerpc/platforms/powermac/pic.c6
-rw-r--r--arch/powerpc/platforms/powermac/smp.c52
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c18
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c7
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c7
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c7
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S1
-rw-r--r--arch/powerpc/platforms/powernv/opal.c1
-rw-r--r--arch/powerpc/platforms/powernv/pci-cxl.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c168
-rw-r--r--arch/powerpc/platforms/powernv/pci.c8
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c2
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c10
-rw-r--r--arch/powerpc/platforms/ps3/smp.c4
-rw-r--r--arch/powerpc/platforms/ps3/spu.c4
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c5
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c26
-rw-r--r--arch/powerpc/platforms/pseries/msi.c6
-rw-r--r--arch/powerpc/platforms/pseries/pci.c4
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c7
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c2
-rw-r--r--arch/powerpc/platforms/pseries/setup.c6
-rw-r--r--arch/powerpc/sysdev/axonram.c6
-rw-r--r--arch/powerpc/sysdev/cpm1.c14
-rw-r--r--arch/powerpc/sysdev/cpm_common.c3
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c4
-rw-r--r--arch/powerpc/sysdev/fsl_gtm.c2
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c6
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c12
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c2
-rw-r--r--arch/powerpc/sysdev/ge/ge_pic.c8
-rw-r--r--arch/powerpc/sysdev/i8259.c4
-rw-r--r--arch/powerpc/sysdev/ipic.c4
-rw-r--r--arch/powerpc/sysdev/mmio_nvram.c2
-rw-r--r--arch/powerpc/sysdev/mpc8xx_pic.c2
-rw-r--r--arch/powerpc/sysdev/mpic.c14
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c4
-rw-r--r--arch/powerpc/sysdev/mpic_u3msi.c4
-rw-r--r--arch/powerpc/sysdev/mv64x60_pic.c2
-rw-r--r--arch/powerpc/sysdev/pmi.c2
-rw-r--r--arch/powerpc/sysdev/ppc4xx_hsta_msi.c6
-rw-r--r--arch/powerpc/sysdev/ppc4xx_msi.c6
-rw-r--r--arch/powerpc/sysdev/ppc4xx_soc.c2
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c2
-rw-r--r--arch/powerpc/sysdev/uic.c2
-rw-r--r--arch/powerpc/sysdev/xics/Kconfig1
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c6
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c20
-rw-r--r--arch/powerpc/sysdev/xics/ics-opal.c4
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c4
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c61
-rw-r--r--arch/powerpc/xmon/spr_access.S4
253 files changed, 5349 insertions, 3433 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ec4047e170a0..65fba4c34cd7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -12,11 +12,6 @@ config 64BIT
bool
default y if PPC64
-config WORD_SIZE
- int
- default 64 if PPC64
- default 32 if !PPC64
-
config ARCH_PHYS_ADDR_T_64BIT
def_bool PPC64 || PHYS_64BIT
@@ -101,7 +96,7 @@ config PPC
select VIRT_TO_BUS if !PPC64
select HAVE_IDE
select HAVE_IOREMAP_PROT
- select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
select HAVE_KPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
@@ -113,7 +108,6 @@ config PPC
select HAVE_DEBUG_KMEMLEAK
select ARCH_HAS_SG_CHAIN
select GENERIC_ATOMIC64 if PPC32
- select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
@@ -166,6 +160,8 @@ config PPC
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select GENERIC_CPU_AUTOPROBE
select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_ARCH_HARDENED_USERCOPY
+ select HAVE_KERNEL_GZIP
config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -636,7 +632,7 @@ config FORCE_MAX_ZONEORDER
int "Maximum zone order"
range 8 9 if PPC64 && PPC_64K_PAGES
default "9" if PPC64 && PPC_64K_PAGES
- range 9 13 if PPC64 && !PPC_64K_PAGES
+ range 13 13 if PPC64 && !PPC_64K_PAGES
default "13" if PPC64 && !PPC_64K_PAGES
range 9 64 if PPC32 && PPC_16K_PAGES
default "9" if PPC32 && PPC_16K_PAGES
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ca254546cd05..50d020ac0f48 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -43,57 +43,49 @@ NM := $(NM) --synthetic
endif
endif
-ifeq ($(CONFIG_PPC64),y)
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-OLDARCH := ppc64le
-else
-OLDARCH := ppc64
-endif
-else
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-OLDARCH := ppcle
-else
-OLDARCH := ppc
-endif
-endif
+# BITS is used as extension for files which are available in a 32 bit
+# and a 64 bit version to simplify shared Makefiles.
+# e.g.: obj-y += foo_$(BITS).o
+export BITS
-# It seems there are times we use this Makefile without
-# including the config file, but this replicates the old behaviour
-ifeq ($(CONFIG_WORD_SIZE),)
-CONFIG_WORD_SIZE := 32
+ifdef CONFIG_PPC64
+ BITS := 64
+else
+ BITS := 32
endif
-UTS_MACHINE := $(OLDARCH)
+machine-y = ppc
+machine-$(CONFIG_PPC64) += 64
+machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
+UTS_MACHINE := $(subst $(space),,$(machine-y))
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC += -mno-strict-align
-endif
-override AS += -mlittle-endian
override LD += -EL
-override CROSS32CC += -mlittle-endian
-override CROSS32AS += -mlittle-endian
LDEMULATION := lppc
GNUTARGET := powerpcle
MULTIPLEWORD := -mno-multiple
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC += -mbig-endian
-override AS += -mbig-endian
-endif
override LD += -EB
LDEMULATION := ppc
GNUTARGET := powerpc
MULTIPLEWORD := -mmultiple
endif
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+ifneq ($(cc-name),clang)
+ cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+
ifeq ($(HAS_BIARCH),y)
-override AS += -a$(CONFIG_WORD_SIZE)
-override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
-override CC += -m$(CONFIG_WORD_SIZE)
-override AR := GNUTARGET=elf$(CONFIG_WORD_SIZE)-$(GNUTARGET) $(AR)
+override AS += -a$(BITS)
+override LD += -m elf$(BITS)$(LDEMULATION)
+override CC += -m$(BITS)
+override AR := GNUTARGET=elf$(BITS)-$(GNUTARGET) $(AR)
endif
LDFLAGS_vmlinux-y := -Bstatic
@@ -180,7 +172,7 @@ KBUILD_CFLAGS += $(call cc-option,-msoft-float)
KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)
-CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__
+CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__
ifdef CONFIG_CPU_BIG_ENDIAN
CHECKFLAGS += -D__BIG_ENDIAN__
else
@@ -232,7 +224,10 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
-head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
+head-y := arch/powerpc/kernel/head_$(BITS).o
head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 1a2a6e8dc40d..eae2dc8bc218 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -19,10 +19,15 @@
all: $(obj)/zImage
+compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP
+compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
+
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -Os -msoft-float -pipe \
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
- -isystem $(shell $(CROSS32CC) -print-file-name=include)
+ -isystem $(shell $(CROSS32CC) -print-file-name=include) \
+ -D$(compress-y)
+
ifdef CONFIG_PPC64_BOOT_WRAPPER
BOOTCFLAGS += -m64
endif
@@ -59,13 +64,30 @@ $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405
$(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405
$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405
+# The pre-boot decompressors pull in a lot of kernel headers and other source
+# files. This creates a bit of a dependency headache since we need to copy
+# these files into the build dir, fix up any includes and ensure that dependent
+# files are copied in the right order.
+
+# these need to be seperate variables because they are copied out of different
+# directories in the kernel tree. Sure you COULd merge them, but it's a
+# cure-is-worse-than-disease situation.
+zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c
+zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c
+zlibheader-$(CONFIG_KERNEL_GZIP) := inffast.h inffixed.h inflate.h inftrees.h infutil.h
+zliblinuxheader-$(CONFIG_KERNEL_GZIP) := zlib.h zconf.h zutil.h
-zlib := inffast.c inflate.c inftrees.c
-zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h
-zliblinuxheader := zlib.h zconf.h zutil.h
+$(addprefix $(obj)/, decompress.o): \
+ $(addprefix $(obj)/,$(zlib-decomp-y))
-$(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o): \
- $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader))
+$(addprefix $(obj)/, $(zlib-decomp-y)): \
+ $(addprefix $(obj)/,$(zliblinuxheader-y)) \
+ $(addprefix $(obj)/,$(zlibheader-y)) \
+ $(addprefix $(obj)/,$(zlib-y))
+
+$(addprefix $(obj)/,$(zlib-y)): \
+ $(addprefix $(obj)/,$(zliblinuxheader-y)) \
+ $(addprefix $(obj)/,$(zlibheader-y))
libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
libfdtheader := fdt.h libfdt.h libfdt_internal.h
@@ -73,10 +95,10 @@ libfdtheader := fdt.h libfdt.h libfdt_internal.h
$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
$(addprefix $(obj)/,$(libfdtheader))
-src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \
+src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
- gunzip_util.c elf_util.c $(zlib) devtree.c stdlib.c \
+ elf_util.c $(zlib-y) devtree.c stdlib.c \
oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
uartlite.c mpc52xx-psc.c opal.c opal-calls.S
src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
@@ -125,23 +147,20 @@ obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat))))
obj-plat: $(libfdt)
-quiet_cmd_copy_zlib = COPY $@
- cmd_copy_zlib = sed "s@__used@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
+quiet_cmd_copy_kern_src = COPY $@
+ cmd_copy_kern_src = sed -f $(srctree)/arch/powerpc/boot/fixup-headers.sed $< > $@
-quiet_cmd_copy_zlibheader = COPY $@
- cmd_copy_zlibheader = sed "s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
-# stddef.h for NULL
-quiet_cmd_copy_zliblinuxheader = COPY $@
- cmd_copy_zliblinuxheader = sed "s@<linux/string.h>@\"string.h\"@;s@<linux/kernel.h>@<stddef.h>@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
+$(addprefix $(obj)/,$(zlib-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
- $(call cmd,copy_zlib)
+$(addprefix $(obj)/,$(zlibheader-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
- $(call cmd,copy_zlibheader)
+$(addprefix $(obj)/,$(zliblinuxheader-y)): $(obj)/%: $(srctree)/include/linux/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/%
- $(call cmd,copy_zliblinuxheader)
+$(addprefix $(obj)/,$(zlib-decomp-y)): $(obj)/%: $(srctree)/lib/%
+ $(call cmd,copy_kern_src)
quiet_cmd_copy_libfdt = COPY $@
cmd_copy_libfdt = cp $< $@
@@ -150,17 +169,17 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc
$(call cmd,copy_libfdt)
$(obj)/empty.c:
- @touch $@
+ $(Q)touch $@
$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
$(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
-D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
- @cp $< $@
+ $(Q)cp $< $@
-clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
- $(libfdt) $(libfdtheader) \
+clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
+ $(zlib-decomp-) $(libfdt) $(libfdtheader) \
empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@
@@ -207,10 +226,14 @@ CROSSWRAP := -C "$(CROSS_COMPILE)"
endif
endif
+compressor-$(CONFIG_KERNEL_GZIP) := gz
+compressor-$(CONFIG_KERNEL_XZ) := xz
+
# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
quiet_cmd_wrap = WRAP $@
- cmd_wrap =$(CONFIG_SHELL) $(wrapper) -c -o $@ -p $2 $(CROSSWRAP) \
- $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) vmlinux
+ cmd_wrap =$(CONFIG_SHELL) $(wrapper) -Z $(compressor-y) -c -o $@ -p $2 \
+ $(CROSSWRAP) $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) \
+ vmlinux
image-$(CONFIG_PPC_PSERIES) += zImage.pseries
image-$(CONFIG_PPC_POWERNV) += zImage.pseries
@@ -391,9 +414,9 @@ image-y := vmlinux.strip
endif
$(obj)/zImage: $(addprefix $(obj)/, $(image-y))
- @rm -f $@; ln $< $@
+ $(Q)rm -f $@; ln $< $@
$(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y))
- @rm -f $@; ln $< $@
+ $(Q)rm -f $@; ln $< $@
# Only install the vmlinux
install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
@@ -410,8 +433,9 @@ clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
zImage.maple simpleImage.* otheros.bld *.dtb
# clean up files cached by wrapper
-clean-kernel := vmlinux.strip vmlinux.bin
-clean-kernel += $(addsuffix .gz,$(clean-kernel))
+clean-kernel-base := vmlinux.strip vmlinux.bin
+clean-kernel := $(addsuffix .gz,$(clean-kernel-base))
+clean-kernel += $(addsuffix .xz,$(clean-kernel-base))
# If not absolute clean-files are relative to $(obj).
clean-files += $(addprefix $(objtree)/, $(clean-kernel))
diff --git a/arch/powerpc/boot/cuboot-c2k.c b/arch/powerpc/boot/cuboot-c2k.c
index e43594950ba3..9309c51f1d65 100644
--- a/arch/powerpc/boot/cuboot-c2k.c
+++ b/arch/powerpc/boot/cuboot-c2k.c
@@ -18,7 +18,6 @@
#include "io.h"
#include "ops.h"
#include "elf.h"
-#include "gunzip_util.h"
#include "mv64x60.h"
#include "cuboot.h"
#include "ppcboot.h"
diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c
new file mode 100644
index 000000000000..3aff4423ad01
--- /dev/null
+++ b/arch/powerpc/boot/decompress.c
@@ -0,0 +1,148 @@
+/*
+ * Wrapper around the kernel's pre-boot decompression library.
+ *
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+#include "types.h"
+
+/*
+ * The decompressor_*.c files play #ifdef games so they can be used in both
+ * pre-boot and regular kernel code. We need these definitions to make the
+ * includes work.
+ */
+
+#define STATIC static
+#define INIT
+#define __always_inline inline
+
+/*
+ * The build process will copy the required zlib source files and headers
+ * out of lib/ and "fix" the includes so they do not pull in other kernel
+ * headers.
+ */
+
+#ifdef CONFIG_KERNEL_GZIP
+# include "decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+# include "xz_config.h"
+# include "../../../lib/decompress_unxz.c"
+#endif
+
+/* globals for tracking the state of the decompression */
+static unsigned long decompressed_bytes;
+static unsigned long limit;
+static unsigned long skip;
+static char *output_buffer;
+
+/*
+ * flush() is called by __decompress() when the decompressor's scratch buffer is
+ * full.
+ */
+static long flush(void *v, unsigned long buffer_size)
+{
+ unsigned long end = decompressed_bytes + buffer_size;
+ unsigned long size = buffer_size;
+ unsigned long offset = 0;
+ char *in = v;
+ char *out;
+
+ /*
+ * if we hit our decompression limit, we need to fake an error to abort
+ * the in-progress decompression.
+ */
+ if (decompressed_bytes >= limit)
+ return -1;
+
+ /* skip this entire block */
+ if (end <= skip) {
+ decompressed_bytes += buffer_size;
+ return buffer_size;
+ }
+
+ /* skip some data at the start, but keep the rest of the block */
+ if (decompressed_bytes < skip && end > skip) {
+ offset = skip - decompressed_bytes;
+
+ in += offset;
+ size -= offset;
+ decompressed_bytes += offset;
+ }
+
+ out = &output_buffer[decompressed_bytes - skip];
+ size = min(decompressed_bytes + size, limit) - decompressed_bytes;
+
+ memcpy(out, in, size);
+ decompressed_bytes += size;
+
+ return buffer_size;
+}
+
+static void print_err(char *s)
+{
+ /* suppress the "error" when we terminate the decompressor */
+ if (decompressed_bytes >= limit)
+ return;
+
+ printf("Decompression error: '%s'\n\r", s);
+}
+
+/**
+ * partial_decompress - decompresses part or all of a compressed buffer
+ * @inbuf: input buffer
+ * @input_size: length of the input buffer
+ * @outbuf: input buffer
+ * @output_size: length of the input buffer
+ * @skip number of output bytes to ignore
+ *
+ * This function takes compressed data from inbuf, decompresses and write it to
+ * outbuf. Once output_size bytes are written to the output buffer, or the
+ * stream is exhausted the function will return the number of bytes that were
+ * decompressed. Otherwise it will return whatever error code the decompressor
+ * reported (NB: This is specific to each decompressor type).
+ *
+ * The skip functionality is mainly there so the program and discover
+ * the size of the compressed image so that it can ask firmware (if present)
+ * for an appropriately sized buffer.
+ */
+long partial_decompress(void *inbuf, unsigned long input_size,
+ void *outbuf, unsigned long output_size, unsigned long _skip)
+{
+ int ret;
+
+ /*
+ * The skipped bytes needs to be included in the size of data we want
+ * to decompress.
+ */
+ output_size += _skip;
+
+ decompressed_bytes = 0;
+ output_buffer = outbuf;
+ limit = output_size;
+ skip = _skip;
+
+ ret = __decompress(inbuf, input_size, NULL, flush, outbuf,
+ output_size, NULL, print_err);
+
+ /*
+ * If decompression was aborted due to an actual error rather than
+ * a fake error that we used to abort, then we should report it.
+ */
+ if (decompressed_bytes < limit)
+ return ret;
+
+ return decompressed_bytes - skip;
+}
diff --git a/arch/powerpc/boot/fixup-headers.sed b/arch/powerpc/boot/fixup-headers.sed
new file mode 100644
index 000000000000..96362428eb37
--- /dev/null
+++ b/arch/powerpc/boot/fixup-headers.sed
@@ -0,0 +1,12 @@
+# Copyright 2016 IBM Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+
+s@#include <linux/decompress/mm\.h>@@;
+s@\"zlib_inflate/\([^\"]*\).*@"\1"@;
+s@<linux/kernel.h>@<stddef.h>@;
+
+s@__used@@;
+s@<linux/\([^>]*\).*@"\1"@;
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
deleted file mode 100644
index 9dc52501de83..000000000000
--- a/arch/powerpc/boot/gunzip_util.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright 2007 David Gibson, IBM Corporation.
- * Based on earlier work, Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <stddef.h>
-#include "string.h"
-#include "stdio.h"
-#include "ops.h"
-#include "gunzip_util.h"
-
-#define HEAD_CRC 2
-#define EXTRA_FIELD 4
-#define ORIG_NAME 8
-#define COMMENT 0x10
-#define RESERVED 0xe0
-
-/**
- * gunzip_start - prepare to decompress gzip data
- * @state: decompressor state structure to be initialized
- * @src: buffer containing gzip compressed or uncompressed data
- * @srclen: size in bytes of the buffer at src
- *
- * If the buffer at @src contains a gzip header, this function
- * initializes zlib to decompress the data, storing the decompression
- * state in @state. The other functions in this file can then be used
- * to decompress data from the gzipped stream.
- *
- * If the buffer at @src does not contain a gzip header, it is assumed
- * to contain uncompressed data. The buffer information is recorded
- * in @state and the other functions in this file will simply copy
- * data from the uncompressed data stream at @src.
- *
- * Any errors, such as bad compressed data, cause an error to be
- * printed an the platform's exit() function to be called.
- */
-void gunzip_start(struct gunzip_state *state, void *src, int srclen)
-{
- char *hdr = src;
- int hdrlen = 0;
-
- memset(state, 0, sizeof(*state));
-
- /* Check for gzip magic number */
- if ((hdr[0] == 0x1f) && (hdr[1] == 0x8b)) {
- /* gzip data, initialize zlib parameters */
- int r, flags;
-
- state->s.workspace = state->scratch;
- if (zlib_inflate_workspacesize() > sizeof(state->scratch))
- fatal("insufficient scratch space for gunzip\n\r");
-
- /* skip header */
- hdrlen = 10;
- flags = hdr[3];
- if (hdr[2] != Z_DEFLATED || (flags & RESERVED) != 0)
- fatal("bad gzipped data\n\r");
- if ((flags & EXTRA_FIELD) != 0)
- hdrlen = 12 + hdr[10] + (hdr[11] << 8);
- if ((flags & ORIG_NAME) != 0)
- while (hdr[hdrlen++] != 0)
- ;
- if ((flags & COMMENT) != 0)
- while (hdr[hdrlen++] != 0)
- ;
- if ((flags & HEAD_CRC) != 0)
- hdrlen += 2;
- if (hdrlen >= srclen)
- fatal("gunzip_start: ran out of data in header\n\r");
-
- r = zlib_inflateInit2(&state->s, -MAX_WBITS);
- if (r != Z_OK)
- fatal("inflateInit2 returned %d\n\r", r);
- }
-
- state->s.total_in = hdrlen;
- state->s.next_in = src + hdrlen;
- state->s.avail_in = srclen - hdrlen;
-}
-
-/**
- * gunzip_partial - extract bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: maximum number of bytes to extract
- *
- * This function extracts at most @dstlen bytes from the data stream
- * previously associated with @state by gunzip_start(), decompressing
- * if necessary. Exactly @dstlen bytes are extracted unless the data
- * stream doesn't contain enough bytes, in which case the entire
- * remainder of the stream is decompressed.
- *
- * Returns the actual number of bytes extracted. If any errors occur,
- * such as a corrupted compressed stream, an error is printed an the
- * platform's exit() function is called.
- */
-int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- if (state->s.workspace) {
- /* gunzipping */
- int r;
-
- state->s.next_out = dst;
- state->s.avail_out = dstlen;
- r = zlib_inflate(&state->s, Z_FULL_FLUSH);
- if (r != Z_OK && r != Z_STREAM_END)
- fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
- len = state->s.next_out - (Byte *)dst;
- } else {
- /* uncompressed image */
- len = min(state->s.avail_in, (uLong)dstlen);
- memcpy(dst, state->s.next_in, len);
- state->s.next_in += len;
- state->s.avail_in -= len;
- }
- return len;
-}
-
-/**
- * gunzip_exactly - extract a fixed number of bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: number of bytes to extract
- *
- * This function extracts exactly @dstlen bytes from the data stream
- * previously associated with @state by gunzip_start(), decompressing
- * if necessary.
- *
- * If there are less @dstlen bytes available in the data stream, or if
- * any other errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-void gunzip_exactly(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- len = gunzip_partial(state, dst, dstlen);
- if (len < dstlen)
- fatal("\n\rgunzip_exactly: ran out of data!"
- " Wanted %d, got %d.\n\r", dstlen, len);
-}
-
-/**
- * gunzip_discard - discard bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @len: number of bytes to discard
- *
- * This function extracts, then discards exactly @len bytes from the
- * data stream previously associated with @state by gunzip_start().
- * Subsequent gunzip_partial(), gunzip_exactly() or gunzip_finish()
- * calls will extract the data following the discarded bytes in the
- * data stream.
- *
- * If there are less @len bytes available in the data stream, or if
- * any other errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-void gunzip_discard(struct gunzip_state *state, int len)
-{
- static char discard_buf[128];
-
- while (len > sizeof(discard_buf)) {
- gunzip_exactly(state, discard_buf, sizeof(discard_buf));
- len -= sizeof(discard_buf);
- }
-
- if (len > 0)
- gunzip_exactly(state, discard_buf, len);
-}
-
-/**
- * gunzip_finish - extract all remaining bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: maximum number of bytes to extract
- *
- * This function extracts all remaining data, or at most @dstlen
- * bytes, from the stream previously associated with @state by
- * gunzip_start(). zlib is then shut down, so it is an error to use
- * any of the functions in this file on @state until it is
- * re-initialized with another call to gunzip_start().
- *
- * If any errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-int gunzip_finish(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- len = gunzip_partial(state, dst, dstlen);
-
- if (state->s.workspace) {
- zlib_inflateEnd(&state->s);
- }
-
- return len;
-}
diff --git a/arch/powerpc/boot/gunzip_util.h b/arch/powerpc/boot/gunzip_util.h
deleted file mode 100644
index b3dfa6e87b3a..000000000000
--- a/arch/powerpc/boot/gunzip_util.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Decompression convenience functions
- *
- * Copyright 2007 David Gibson, IBM Corporation.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#ifndef _PPC_BOOT_GUNZIP_UTIL_H_
-#define _PPC_BOOT_GUNZIP_UTIL_H_
-
-#include "zlib.h"
-
-/*
- * These functions are designed to make life easy for decompressing
- * kernel images, initrd images or any other gzip compressed image,
- * particularly if its useful to decompress part of the image (e.g. to
- * examine headers) before decompressing the remainder.
- *
- * To use:
- * - declare a gunzip_state structure
- * - use gunzip_start() to initialize the state, associating it
- * with a stream of compressed data
- * - use gunzip_partial(), gunzip_exactly() and gunzip_discard()
- * in any combination to extract pieces of data from the stream
- * - Finally use gunzip_finish() to extract the tail of the
- * compressed stream and wind up zlib
- */
-
-/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */
-#define GUNZIP_SCRATCH_SIZE 46912
-
-struct gunzip_state {
- z_stream s;
- char scratch[46912];
-};
-
-void gunzip_start(struct gunzip_state *state, void *src, int srclen);
-int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen);
-void gunzip_exactly(struct gunzip_state *state, void *dst, int len);
-void gunzip_discard(struct gunzip_state *state, int len);
-int gunzip_finish(struct gunzip_state *state, void *dst, int len);
-
-#endif /* _PPC_BOOT_GUNZIP_UTIL_H_ */
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index d80161b633f4..f7a184b6c35b 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -15,11 +15,8 @@
#include "string.h"
#include "stdio.h"
#include "ops.h"
-#include "gunzip_util.h"
#include "reg.h"
-static struct gunzip_state gzstate;
-
struct addr_range {
void *addr;
unsigned long size;
@@ -30,15 +27,14 @@ struct addr_range {
static struct addr_range prep_kernel(void)
{
char elfheader[256];
- void *vmlinuz_addr = _vmlinux_start;
+ unsigned char *vmlinuz_addr = (unsigned char *)_vmlinux_start;
unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start;
void *addr = 0;
struct elf_info ei;
- int len;
+ long len;
- /* gunzip the ELF header of the kernel */
- gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size);
- gunzip_exactly(&gzstate, elfheader, sizeof(elfheader));
+ partial_decompress(vmlinuz_addr, vmlinuz_size,
+ elfheader, sizeof(elfheader), 0);
if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -51,7 +47,7 @@ static struct addr_range prep_kernel(void)
* the kernel bss must be claimed (it will be zero'd by the
* kernel itself)
*/
- printf("Allocating 0x%lx bytes for kernel ...\n\r", ei.memsize);
+ printf("Allocating 0x%lx bytes for kernel...\n\r", ei.memsize);
if (platform_ops.vmlinux_alloc) {
addr = platform_ops.vmlinux_alloc(ei.memsize);
@@ -71,16 +67,21 @@ static struct addr_range prep_kernel(void)
"device tree\n\r");
}
- /* Finally, gunzip the kernel */
- printf("gunzipping (0x%p <- 0x%p:0x%p)...", addr,
+ /* Finally, decompress the kernel */
+ printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
- /* discard up to the actual load data */
- gunzip_discard(&gzstate, ei.elfoffset - sizeof(elfheader));
- len = gunzip_finish(&gzstate, addr, ei.loadsize);
+
+ len = partial_decompress(vmlinuz_addr, vmlinuz_size,
+ addr, ei.loadsize, ei.elfoffset);
+
+ if (len < 0)
+ fatal("Decompression failed with error code %ld\n\r", len);
+
if (len != ei.loadsize)
- fatal("ran out of data! only got 0x%x of 0x%lx bytes.\n\r",
- len, ei.loadsize);
- printf("done 0x%x bytes\n\r", len);
+ fatal("Decompression error: got 0x%lx bytes, expected 0x%lx.\n\r",
+ len, ei.loadsize);
+
+ printf("Done! Decompressed 0x%lx bytes\n\r", len);
flush_cache(addr, ei.loadsize);
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index e19b64ef977a..309d1b127e96 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -260,4 +260,7 @@ int __ilog2_u32(u32 n)
return 31 - bit;
}
+long partial_decompress(void *inbuf, unsigned long input_size, void *outbuf,
+ unsigned long output_size, unsigned long skip);
+
#endif /* _PPC_BOOT_OPS_H_ */
diff --git a/arch/powerpc/boot/stdbool.h b/arch/powerpc/boot/stdbool.h
new file mode 100644
index 000000000000..f818efb08891
--- /dev/null
+++ b/arch/powerpc/boot/stdbool.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdbool.h to be available.
+ *
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/stdint.h b/arch/powerpc/boot/stdint.h
new file mode 100644
index 000000000000..c1c853be7490
--- /dev/null
+++ b/arch/powerpc/boot/stdint.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdint.h to be available.
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h
index 85565a89bcc2..af6b66b842c4 100644
--- a/arch/powerpc/boot/types.h
+++ b/arch/powerpc/boot/types.h
@@ -1,6 +1,8 @@
#ifndef _TYPES_H_
#define _TYPES_H_
+#include <stdbool.h>
+
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
typedef unsigned char u8;
@@ -34,4 +36,16 @@ typedef s64 int64_t;
(void) (&_x == &_y); \
_x > _y ? _x : _y; })
+#define min_t(type, a, b) min(((type) a), ((type) b))
+#define max_t(type, a, b) max(((type) a), ((type) b))
+
+typedef int bool;
+
+#ifndef true
+#define true 1
+#endif
+
+#ifndef false
+#define false 0
+#endif
#endif /* _TYPES_H_ */
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 6681ec3625c9..404b3aabdb4d 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -20,6 +20,8 @@
# -D dir specify directory containing data files used by script
# (default ./arch/powerpc/boot)
# -W dir specify working directory for temporary files (default .)
+# -z use gzip (legacy)
+# -Z zsuffix compression to use (gz, xz or none)
# Stop execution if any command fails
set -e
@@ -38,7 +40,7 @@ dtb=
dts=
cacheit=
binary=
-gzip=.gz
+compression=.gz
pie=
format=
@@ -59,7 +61,8 @@ tmpdir=.
usage() {
echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2
- echo ' [-D datadir] [-W workingdir] [--no-gzip] [vmlinux]' >&2
+ echo ' [-D datadir] [-W workingdir] [-Z (gz|xz|none)]' >&2
+ echo ' [--no-compression] [vmlinux]' >&2
exit 1
}
@@ -126,8 +129,24 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
tmpdir="$1"
;;
+ -z)
+ compression=.gz
+ ;;
+ -Z)
+ shift
+ [ "$#" -gt 0 ] || usage
+ [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage
+
+ compression=".$1"
+
+ if [ $compression = ".none" ]; then
+ compression=
+ fi
+ ;;
--no-gzip)
- gzip=
+ # a "feature" of the the wrapper script is that it can be used outside
+ # the kernel tree. So keeping this around for backwards compatibility.
+ compression=
;;
-?)
usage
@@ -140,6 +159,7 @@ while [ "$#" -gt 0 ]; do
shift
done
+
if [ -n "$dts" ]; then
if [ ! -r "$dts" -a -r "$object/dts/$dts" ]; then
dts="$object/dts/$dts"
@@ -212,7 +232,7 @@ miboot|uboot*)
;;
cuboot*)
binary=y
- gzip=
+ compression=
case "$platform" in
*-mpc866ads|*-mpc885ads|*-adder875*|*-ep88xc)
platformo=$object/cuboot-8xx.o
@@ -243,7 +263,7 @@ cuboot*)
ps3)
platformo="$object/ps3-head.o $object/ps3-hvcall.o $object/ps3.o"
lds=$object/zImage.ps3.lds
- gzip=
+ compression=
ext=bin
objflags="-O binary --set-section-flags=.bss=contents,alloc,load,data"
ksection=.kernel:vmlinux.bin
@@ -310,27 +330,37 @@ mvme7100)
esac
vmz="$tmpdir/`basename \"$kernel\"`.$ext"
-if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
- ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
- strip_size=$(stat -c %s $vmz.$$)
+# Calculate the vmlinux.strip size
+${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
+strip_size=$(stat -c %s $vmz.$$)
- if [ -n "$gzip" ]; then
+if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel" ]; then
+ # recompress the image if we need to
+ case $compression in
+ .xz)
+ xz --check=crc32 -f -6 "$vmz.$$"
+ ;;
+ .gz)
gzip -n -f -9 "$vmz.$$"
- fi
+ ;;
+ *)
+ # drop the compression suffix so the stripped vmlinux is used
+ compression=
+ ;;
+ esac
if [ -n "$cacheit" ]; then
- mv -f "$vmz.$$$gzip" "$vmz$gzip"
+ mv -f "$vmz.$$$compression" "$vmz$compression"
else
vmz="$vmz.$$"
fi
else
- # Calculate the vmlinux.strip size
- ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
- strip_size=$(stat -c %s $vmz.$$)
rm -f $vmz.$$
fi
+vmz="$vmz$compression"
+
if [ "$make_space" = "y" ]; then
# Round the size to next higher MB limit
round_size=$(((strip_size + 0xfffff) & 0xfff00000))
@@ -346,8 +376,6 @@ if [ "$make_space" = "y" ]; then
fi
fi
-vmz="$vmz$gzip"
-
# Extract kernel version information, some platforms want to include
# it in the image header
version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \
@@ -417,6 +445,7 @@ if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"
fi
+#link everything
${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
$platformo $tmp $object/wrapper.a
rm $tmp
diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h
new file mode 100644
index 000000000000..5c6afdbca642
--- /dev/null
+++ b/arch/powerpc/boot/xz_config.h
@@ -0,0 +1,39 @@
+#ifndef __XZ_CONFIG_H__
+#define __XZ_CONFIG_H__
+
+/*
+ * most of this is copied from lib/xz/xz_private.h, we can't use their defines
+ * since the boot wrapper is not built in the same environment as the rest of
+ * the kernel.
+ */
+
+#include "types.h"
+#include "swab.h"
+
+static inline uint32_t swab32p(void *p)
+{
+ uint32_t *q = p;
+
+ return swab32(*q);
+}
+
+#ifdef __LITTLE_ENDIAN__
+#define get_le32(p) (*((uint32_t *) (p)))
+#else
+#define get_le32(p) swab32p(p)
+#endif
+
+#define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#define memzero(buf, size) memset(buf, 0, size)
+
+/* prevent the inclusion of the xz-preboot MM headers */
+#define DECOMPR_MM_H
+#define memmove memmove
+#define XZ_EXTERN static
+
+/* xz.h needs to be included directly since we need enum xz_mode */
+#include "../../../include/linux/xz.h"
+
+#undef XZ_EXTERN
+
+#endif
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index dce352e9153b..d98b6eb3254f 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -15,6 +15,8 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
CONFIG_NUMA_BALANCING=y
CONFIG_CGROUPS=y
CONFIG_MEMCG=y
@@ -95,7 +97,7 @@ CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_GENERIC=y
CONFIG_BLK_DEV_AMD74XX=y
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
@@ -107,7 +109,7 @@ CONFIG_SCSI_CXGB4_ISCSI=m
CONFIG_SCSI_BNX2_ISCSI=m
CONFIG_BE2ISCSI=m
CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
@@ -149,10 +151,10 @@ CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
+CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
+CONFIG_PCNET32=m
CONFIG_TIGON3=y
CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
@@ -163,6 +165,7 @@ CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_IXGB=m
CONFIG_IXGBE=m
+CONFIG_I40E=m
CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
CONFIG_QLGE=m
@@ -238,7 +241,7 @@ CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS=m
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
@@ -253,10 +256,10 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=y
+CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -310,6 +313,8 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 0a8d250cb97e..58a98d40086f 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -10,6 +10,8 @@ CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
CONFIG_CGROUPS=y
CONFIG_CPUSETS=y
CONFIG_BLK_DEV_INITRD=y
@@ -90,7 +92,7 @@ CONFIG_BLK_DEV_AMD74XX=y
CONFIG_BLK_DEV_IDE_PMAC=y
CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
@@ -103,7 +105,7 @@ CONFIG_BE2ISCSI=m
CONFIG_SCSI_MPT2SAS=m
CONFIG_SCSI_IBMVSCSI=y
CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
@@ -149,10 +151,10 @@ CONFIG_NETCONSOLE=y
CONFIG_TUN=m
CONFIG_VIRTIO_NET=m
CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
+CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
+CONFIG_PCNET32=m
CONFIG_TIGON3=y
CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
@@ -165,6 +167,7 @@ CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_IXGB=m
CONFIG_IXGBE=m
+CONFIG_I40E=m
CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
CONFIG_PASEMI_MAC=y
@@ -269,7 +272,7 @@ CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS=m
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
@@ -284,10 +287,10 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=y
+CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -347,6 +350,8 @@ CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 654aeffc57ef..8a3bc016b732 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -15,6 +15,8 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
CONFIG_NUMA_BALANCING=y
CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y
CONFIG_CGROUPS=y
@@ -95,7 +97,7 @@ CONFIG_BLK_DEV_IDECD=y
CONFIG_BLK_DEV_GENERIC=y
CONFIG_BLK_DEV_AMD74XX=y
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
@@ -108,7 +110,7 @@ CONFIG_BE2ISCSI=m
CONFIG_SCSI_MPT2SAS=m
CONFIG_SCSI_IBMVSCSI=y
CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
@@ -150,10 +152,10 @@ CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
+CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
+CONFIG_PCNET32=m
CONFIG_TIGON3=y
CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
@@ -166,6 +168,7 @@ CONFIG_E1000=y
CONFIG_E1000E=y
CONFIG_IXGB=m
CONFIG_IXGBE=m
+CONFIG_I40E=m
CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
CONFIG_QLGE=m
@@ -241,7 +244,7 @@ CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS=m
CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
@@ -256,10 +259,10 @@ CONFIG_NILFS2_FS=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
-CONFIG_ISO9660_FS=y
+CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -314,6 +317,8 @@ CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index bfe3d37a24ef..9fa046d56eba 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -4,6 +4,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/cpufeature.h>
#include <asm/switch_to.h>
#define CHKSUM_BLOCK_SIZE 1
@@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void)
crypto_unregister_shash(&alg);
}
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
module_exit(crc32c_vpmsum_mod_fini);
MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index e71b9097594c..d1492736d852 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -15,6 +15,8 @@
#include <linux/threads.h>
#include <linux/kprobes.h>
+#include <uapi/asm/ucontext.h>
+
/* SMP */
extern struct thread_info *current_set[NR_CPUS];
extern struct thread_info *secondary_ti;
@@ -52,8 +54,8 @@ void SMIException(struct pt_regs *regs);
void handle_hmi_exception(struct pt_regs *regs);
void instruction_breakpoint_exception(struct pt_regs *regs);
void RunModeException(struct pt_regs *regs);
-void __kprobes single_step_exception(struct pt_regs *regs);
-void __kprobes program_check_exception(struct pt_regs *regs);
+void single_step_exception(struct pt_regs *regs);
+void program_check_exception(struct pt_regs *regs);
void alignment_exception(struct pt_regs *regs);
void StackOverflow(struct pt_regs *regs);
void nonrecoverable_exception(struct pt_regs *regs);
@@ -70,6 +72,41 @@ void unrecoverable_exception(struct pt_regs *regs);
void kernel_bad_stack(struct pt_regs *regs);
void system_reset_exception(struct pt_regs *regs);
void machine_check_exception(struct pt_regs *regs);
-void __kprobes emulation_assist_interrupt(struct pt_regs *regs);
+void emulation_assist_interrupt(struct pt_regs *regs);
+
+/* signals, syscalls and interrupts */
+#ifdef CONFIG_PPC64
+int sys_swapcontext(struct ucontext __user *old_ctx,
+ struct ucontext __user *new_ctx,
+ long ctx_size, long r6, long r7, long r8, struct pt_regs *regs);
+#else
+long sys_swapcontext(struct ucontext __user *old_ctx,
+ struct ucontext __user *new_ctx,
+ int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
+#endif
+long sys_switch_endian(void);
+notrace unsigned int __check_irq_replay(void);
+void notrace restore_interrupts(void);
+
+/* ptrace */
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
+
+/* process */
+void restore_math(struct pt_regs *regs);
+void restore_tm_state(struct pt_regs *regs);
+
+/* prom_init (OpenFirmware) */
+unsigned long __init prom_init(unsigned long r3, unsigned long r4,
+ unsigned long pp,
+ unsigned long r6, unsigned long r7,
+ unsigned long kbase);
+
+/* setup */
+void __init early_setup(unsigned long dt_ptr);
+void early_setup_secondary(void);
+
+/* time */
+void accumulate_stolen_time(void);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index f08d567e0ca4..2b90335194a7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -233,7 +233,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %0,0,%1 # __atomic_add_unless\n\
cmpw 0,%0,%3 \n\
- beq- 2f \n\
+ beq 2f \n\
add %0,%2,%0 \n"
PPC405_ERR77(0,%2)
" stwcx. %0,0,%1 \n\
@@ -539,7 +539,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
PPC_ATOMIC_ENTRY_BARRIER
"1: ldarx %0,0,%1 # __atomic_add_unless\n\
cmpd 0,%0,%3 \n\
- beq- 2f \n\
+ beq 2f \n\
add %0,%2,%0 \n"
" stdcx. %0,0,%1 \n\
bne- 1b \n"
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 38b33dcfcc9d..6b8b2d57fdc8 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -223,7 +223,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+ pte_t *ptep, pte_t entry)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 287a656ceb57..e407af2b7333 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -245,6 +245,43 @@ static inline int segment_shift(int ssize)
}
/*
+ * This array is indexed by the LP field of the HPTE second dword.
+ * Since this field may contain some RPN bits, some entries are
+ * replicated so that we get the same value irrespective of RPN.
+ * The top 4 bits are the page size index (MMU_PAGE_*) for the
+ * actual page size, the bottom 4 bits are the base page size.
+ */
+extern u8 hpte_page_sizes[1 << LP_BITS];
+
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+ bool is_base_size)
+{
+ unsigned int i, lp;
+
+ if (!(h & HPTE_V_LARGE))
+ return 1ul << 12;
+
+ /* Look at the 8 bit LP value */
+ lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ i = hpte_page_sizes[lp];
+ if (!i)
+ return 0;
+ if (!is_base_size)
+ i >>= 4;
+ return 1ul << mmu_psize_defs[i & 0xf].shift;
+}
+
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+ return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+ return __hpte_page_size(h, l, 1);
+}
+
+/*
* The current system page and segment sizes
*/
extern int mmu_kernel_ssize;
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 263bf39ced40..9fd77f8794a0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -6,6 +6,8 @@
*/
#define _PAGE_BIT_SWAP_TYPE 0
+#define _PAGE_RO 0
+
#define _PAGE_EXEC 0x00001 /* execute permission */
#define _PAGE_WRITE 0x00002 /* write access allowed */
#define _PAGE_READ 0x00004 /* read access allowed */
@@ -565,10 +567,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev)
* Generic functions with hash/radix callbacks
*/
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+ pte_t *ptep, pte_t entry)
{
if (radix_enabled())
- return radix__ptep_set_access_flags(ptep, entry);
+ return radix__ptep_set_access_flags(mm, ptep, entry);
return hash__ptep_set_access_flags(ptep, entry);
}
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index df294224e280..2a46dea8e1b1 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -11,6 +11,11 @@
#include <asm/book3s/64/radix-4k.h>
#endif
+#ifndef __ASSEMBLY__
+#include <asm/book3s/64/tlbflush-radix.h>
+#include <asm/cpu_has_feature.h>
+#endif
+
/* An empty PTE can still have a R or C writeback */
#define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
@@ -105,11 +110,8 @@
#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
-static inline unsigned long radix__pte_update(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, unsigned long clr,
- unsigned long set,
- int huge)
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+ unsigned long set)
{
pte_t pte;
unsigned long old_pte, new_pte;
@@ -121,9 +123,39 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
- /* We already do a sync in cmpxchg, is ptesync needed ?*/
+ return old_pte;
+}
+
+
+static inline unsigned long radix__pte_update(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set,
+ int huge)
+{
+ unsigned long old_pte;
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+
+ unsigned long new_pte;
+
+ old_pte = __radix_pte_update(ptep, ~0, 0);
+ asm volatile("ptesync" : : : "memory");
+ /*
+ * new value of pte
+ */
+ new_pte = (old_pte | set) & ~clr;
+
+ /*
+ * For now let's do heavy pid flush
+ * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
+ */
+ radix__flush_tlb_mm(mm);
+
+ __radix_pte_update(ptep, 0, new_pte);
+ } else
+ old_pte = __radix_pte_update(ptep, clr, set);
asm volatile("ptesync" : : : "memory");
- /* huge pages use the old page table lock */
if (!huge)
assert_pte_locked(mm, addr);
@@ -134,20 +166,33 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm,
* Set the dirty and/or accessed bits atomically in a linux PTE, this
* function doesn't need to invalidate tlb.
*/
-static inline void radix__ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void radix__ptep_set_access_flags(struct mm_struct *mm,
+ pte_t *ptep, pte_t entry)
{
- pte_t pte;
- unsigned long old_pte, new_pte;
+
unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
_PAGE_RW | _PAGE_EXEC);
- do {
- pte = READ_ONCE(*ptep);
- old_pte = pte_val(pte);
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+
+ unsigned long old_pte, new_pte;
+
+ old_pte = __radix_pte_update(ptep, ~0, 0);
+ asm volatile("ptesync" : : : "memory");
+ /*
+ * new value of pte
+ */
new_pte = old_pte | set;
- } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+ /*
+ * For now let's do heavy pid flush
+ * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
+ */
+ radix__flush_tlb_mm(mm);
- /* We already do a sync in cmpxchg, is ptesync needed ?*/
+ __radix_pte_update(ptep, 0, new_pte);
+ } else
+ __radix_pte_update(ptep, 0, set);
asm volatile("ptesync" : : : "memory");
}
@@ -233,14 +278,19 @@ static inline unsigned long radix__get_tree_size(void)
{
unsigned long rts_field;
/*
- * we support 52 bits, hence 52-31 = 21, 0b10101
+ * We support 52 bits, hence:
+ * DD1 52-28 = 24, 0b11000
+ * Others 52-31 = 21, 0b10101
* RTS encoding details
* bits 0 - 3 of rts -> bits 6 - 8 unsigned long
* bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
*/
- rts_field = (0x5UL << 5); /* 6 - 8 bits */
- rts_field |= (0x2UL << 61);
-
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ rts_field = (0x3UL << 61);
+ else {
+ rts_field = (0x5UL << 5); /* 6 - 8 bits */
+ rts_field |= (0x2UL << 61);
+ }
return rts_field;
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 65037762b120..a9e19cb2f7c5 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -41,4 +41,5 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad
extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
unsigned long page_size);
extern void radix__flush_tlb_lpid(unsigned long lpid);
+extern void radix__flush_tlb_all(void);
#endif
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 2ef55f8968a2..b312b152461b 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature)
#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
#include <linux/jump_label.h>
-#define NUM_CPU_FTR_KEYS 64
+#define NUM_CPU_FTR_KEYS BITS_PER_LONG
extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 3d7fc06532a1..01b8a13f0224 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state;
#endif
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
+#endif /* CONFIG_PPC_P7_NAP */
+
#endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 82026b419341..f752e6f7cfbe 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -212,6 +212,7 @@ enum {
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
+#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
#ifndef __ASSEMBLY__
@@ -472,6 +473,7 @@ enum {
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
+#define CPU_FTRS_POWER9_DD1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -490,7 +492,7 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 666bef4ebfae..9377bdf42eb8 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -3,6 +3,7 @@
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
/*
* Mapping of threads to cores
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index bed66e5743b3..2e4e7d878c8e 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -34,6 +34,7 @@
* exception handlers (including pSeries LPAR) and iSeries LPAR
* implementations as possible.
*/
+#include <asm/head-64.h>
#define EX_R9 0
#define EX_R10 8
@@ -52,7 +53,6 @@
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
- ld r12,PACAKBASE(r13); /* get high part of &label */ \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
LOAD_HANDLER(r12,label); \
mtctr r12; \
@@ -84,13 +84,14 @@
/*
* We're short on space and time in the exception prolog, so we can't
- * use the normal SET_REG_IMMEDIATE macro. Normally we just need the
- * low halfword of the address, but for Kdump we need the whole low
- * word.
+ * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
+ * Instead we get the base of the kernel from paca->kernelbase and or in the low
+ * part of label. This requires that the label be within 64KB of kernelbase, and
+ * that kernelbase be 64K aligned.
*/
#define LOAD_HANDLER(reg, label) \
- /* Handlers must be within 64K of kbase, which must be 64k aligned */ \
- ori reg,reg,(label)-_stext; /* virt addr of handler ... */
+ ld reg,PACAKBASE(r13); /* get high part of &label */ \
+ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
/* Exception register prefixes */
#define EXC_HV H
@@ -175,7 +176,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
__EXCEPTION_PROLOG_1(area, extra, vec)
#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
- ld r12,PACAKBASE(r13); /* get high part of &label */ \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
LOAD_HANDLER(r12,label) \
@@ -192,10 +192,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_PROLOG_1(area, extra, vec); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
-#define __KVMTEST(n) \
- lbz r10,HSTATE_IN_GUEST(r13); \
+#define __KVMTEST(h, n) \
+ lbz r10,HSTATE_IN_GUEST(r13); \
cmpwi r10,0; \
- bne do_kvm_##n
+ bne do_kvm_##h##n
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
@@ -208,8 +208,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-#define __KVM_HANDLER(area, h, n) \
-do_kvm_##n: \
+#define __KVM_HANDLER_PROLOG(area, n) \
BEGIN_FTR_SECTION_NESTED(947) \
ld r10,area+EX_CFAR(r13); \
std r10,HSTATE_CFAR(r13); \
@@ -222,21 +221,23 @@ do_kvm_##n: \
stw r9,HSTATE_SCRATCH1(r13); \
ld r9,area+EX_R9(r13); \
std r12,HSTATE_SCRATCH0(r13); \
+
+#define __KVM_HANDLER(area, h, n) \
+ __KVM_HANDLER_PROLOG(area, n) \
li r12,n; \
b kvmppc_interrupt
#define __KVM_HANDLER_SKIP(area, h, n) \
-do_kvm_##n: \
cmpwi r10,KVM_GUEST_MODE_SKIP; \
ld r10,area+EX_R10(r13); \
beq 89f; \
- stw r9,HSTATE_SCRATCH1(r13); \
+ stw r9,HSTATE_SCRATCH1(r13); \
BEGIN_FTR_SECTION_NESTED(948) \
ld r9,area+EX_PPR(r13); \
std r9,HSTATE_PPR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
ld r9,area+EX_R9(r13); \
- std r12,HSTATE_SCRATCH0(r13); \
+ std r12,HSTATE_SCRATCH0(r13); \
li r12,n; \
b kvmppc_interrupt; \
89: mtocrf 0x80,r9; \
@@ -244,12 +245,12 @@ do_kvm_##n: \
b kvmppc_skip_##h##interrupt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define KVMTEST(n) __KVMTEST(n)
+#define KVMTEST(h, n) __KVMTEST(h, n)
#define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n)
#define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
#else
-#define KVMTEST(n)
+#define KVMTEST(h, n)
#define KVM_HANDLER(area, h, n)
#define KVM_HANDLER_SKIP(area, h, n)
#endif
@@ -333,94 +334,79 @@ do_kvm_##n: \
/*
* Exception vectors.
*/
-#define STD_EXCEPTION_PSERIES(vec, label) \
- . = vec; \
- .globl label##_pSeries; \
-label##_pSeries: \
+#define STD_EXCEPTION_PSERIES(vec, label) \
SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_STD, KVMTEST, vec)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \
+ EXC_STD, KVMTEST_PR, vec); \
/* Version of above for when we have to branch out-of-line */
+#define __OOL_EXCEPTION(vec, label, hdlr) \
+ SET_SCRATCH0(r13) \
+ EXCEPTION_PROLOG_0(PACA_EXGEN) \
+ b hdlr;
+
#define STD_EXCEPTION_PSERIES_OOL(vec, label) \
- .globl label##_pSeries; \
-label##_pSeries: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD)
-
-#define STD_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_hv; \
-label##_hv: \
+ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
+
+#define STD_EXCEPTION_HV(loc, vec, label) \
SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_HV, KVMTEST, vec)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \
+ EXC_HV, KVMTEST_HV, vec);
-/* Version of above for when we have to branch out-of-line */
-#define STD_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_hv; \
-label##_hv: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV)
+#define STD_EXCEPTION_HV_OOL(vec, label) \
+ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
#define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
/* No guest interrupts come through here */ \
SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_STD, NOTEST, vec)
+ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec);
#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD)
+ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD)
#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_relon_hv; \
-label##_relon_hv: \
/* No guest interrupts come through here */ \
SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_HV, NOTEST, vec)
+ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec);
#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_relon_hv; \
-label##_relon_hv: \
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV)
+ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/* This associate vector numbers with bits in paca->irq_happened */
#define SOFTEN_VALUE_0x500 PACA_IRQ_EE
-#define SOFTEN_VALUE_0x502 PACA_IRQ_EE
#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC
-#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC
+#define SOFTEN_VALUE_0x980 PACA_IRQ_DEC
#define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL
#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL
#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
-#define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI
#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE
-#define SOFTEN_VALUE_0xea2 PACA_IRQ_EE
#define __SOFTEN_TEST(h, vec) \
lbz r10,PACASOFTIRQEN(r13); \
cmpwi r10,0; \
li r10,SOFTEN_VALUE_##vec; \
beq masked_##h##interrupt
+
#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec)
#define SOFTEN_TEST_PR(vec) \
- KVMTEST(vec); \
+ KVMTEST(EXC_STD, vec); \
_SOFTEN_TEST(EXC_STD, vec)
#define SOFTEN_TEST_HV(vec) \
- KVMTEST(vec); \
+ KVMTEST(EXC_HV, vec); \
_SOFTEN_TEST(EXC_HV, vec)
+#define KVMTEST_PR(vec) \
+ KVMTEST(EXC_STD, vec)
+
+#define KVMTEST_HV(vec) \
+ KVMTEST(EXC_HV, vec)
+
#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
@@ -428,58 +414,47 @@ label##_relon_hv: \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
__EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, h);
+ EXCEPTION_PROLOG_PSERIES_1(label, h);
#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
__MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_pSeries; \
-label##_pSeries: \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
EXC_STD, SOFTEN_TEST_PR)
+#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \
+ EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
+
#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_hv; \
-label##_hv: \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
EXC_HV, SOFTEN_TEST_HV)
#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_hv; \
-label##_hv: \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV);
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h);
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
+
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
__MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
EXC_STD, SOFTEN_NOTEST_PR)
#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_relon_hv; \
-label##_relon_hv: \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
EXC_HV, SOFTEN_NOTEST_HV)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_relon_hv; \
-label##_relon_hv: \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV);
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
/*
* Our exception common code can be passed various "additions"
@@ -505,9 +480,6 @@ BEGIN_FTR_SECTION \
END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \
- .align 7; \
- .globl label##_common; \
-label##_common: \
EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
/* Volatile regs are potentially clobbered here */ \
additions; \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index b4407d0add27..0031806475f0 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -45,10 +45,6 @@
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-#ifndef ELF_CORE_EFLAGS
-#define ELF_CORE_EFLAGS 0
-#endif
-
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 57fec8ac7b92..ddf54f5bbdd1 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -186,6 +186,7 @@ label##3: \
#ifndef __ASSEMBLY__
void apply_feature_fixups(void);
+void setup_feature_keys(void);
#endif
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
new file mode 100644
index 000000000000..ab90c2fa1ea6
--- /dev/null
+++ b/arch/powerpc/include/asm/head-64.h
@@ -0,0 +1,393 @@
+#ifndef _ASM_POWERPC_HEAD_64_H
+#define _ASM_POWERPC_HEAD_64_H
+
+#include <asm/cache.h>
+
+/*
+ * We can't do CPP stringification and concatination directly into the section
+ * name for some reason, so these macros can do it for us.
+ */
+.macro define_ftsec name
+ .section ".head.text.\name\()","ax",@progbits
+.endm
+.macro define_data_ftsec name
+ .section ".head.data.\name\()","a",@progbits
+.endm
+.macro use_ftsec name
+ .section ".head.text.\name\()"
+.endm
+
+/*
+ * Fixed (location) sections are used by opening fixed sections and emitting
+ * fixed section entries into them before closing them. Multiple fixed sections
+ * can be open at any time.
+ *
+ * Each fixed section created in a .S file must have corresponding linkage
+ * directives including location, added to arch/powerpc/kernel/vmlinux.lds.S
+ *
+ * For each fixed section, code is generated into it in the order which it
+ * appears in the source. Fixed section entries can be placed at a fixed
+ * location within the section using _LOCATION postifx variants. These must
+ * be ordered according to their relative placements within the section.
+ *
+ * OPEN_FIXED_SECTION(section_name, start_address, end_address)
+ * FIXED_SECTION_ENTRY_BEGIN(section_name, label1)
+ *
+ * USE_FIXED_SECTION(section_name)
+ * label3:
+ * li r10,128
+ * mv r11,r10
+
+ * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address)
+ * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, end_address)
+ * CLOSE_FIXED_SECTION(section_name)
+ *
+ * ZERO_FIXED_SECTION can be used to emit zeroed data.
+ *
+ * Troubleshooting:
+ * - If the build dies with "Error: attempt to move .org backwards" at
+ * CLOSE_FIXED_SECTION() or elsewhere, there may be something
+ * unexpected being added there. Remove the '. = x_len' line, rebuild, and
+ * check what is pushing the section down.
+ * - If the build dies in linking, check arch/powerpc/kernel/vmlinux.lds.S
+ * for instructions.
+ * - If the kernel crashes or hangs in very early boot, it could be linker
+ * stubs at the start of the main text.
+ */
+
+#define OPEN_FIXED_SECTION(sname, start, end) \
+ sname##_start = (start); \
+ sname##_end = (end); \
+ sname##_len = (end) - (start); \
+ define_ftsec sname; \
+ . = 0x0; \
+start_##sname:
+
+#define OPEN_TEXT_SECTION(start) \
+ text_start = (start); \
+ .section ".text","ax",@progbits; \
+ . = 0x0; \
+start_text:
+
+#define ZERO_FIXED_SECTION(sname, start, end) \
+ sname##_start = (start); \
+ sname##_end = (end); \
+ sname##_len = (end) - (start); \
+ define_data_ftsec sname; \
+ . = 0x0; \
+ . = sname##_len;
+
+#define USE_FIXED_SECTION(sname) \
+ fs_label = start_##sname; \
+ fs_start = sname##_start; \
+ use_ftsec sname;
+
+#define USE_TEXT_SECTION() \
+ fs_label = start_text; \
+ fs_start = text_start; \
+ .text
+
+#define CLOSE_FIXED_SECTION(sname) \
+ USE_FIXED_SECTION(sname); \
+ . = sname##_len; \
+end_##sname:
+
+
+#define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \
+ USE_FIXED_SECTION(sname); \
+ .align __align; \
+ .global name; \
+name:
+
+#define FIXED_SECTION_ENTRY_BEGIN(sname, name) \
+ __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0)
+
+#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \
+ USE_FIXED_SECTION(sname); \
+ name##_start = (start); \
+ .if (start) < sname##_start; \
+ .error "Fixed section underflow"; \
+ .abort; \
+ .endif; \
+ . = (start) - sname##_start; \
+ .global name; \
+name:
+
+#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, end) \
+ .if (end) > sname##_end; \
+ .error "Fixed section overflow"; \
+ .abort; \
+ .endif; \
+ .if (. - name > end - name##_start); \
+ .error "Fixed entry overflow"; \
+ .abort; \
+ .endif; \
+ . = ((end) - sname##_start); \
+
+
+/*
+ * These macros are used to change symbols in other fixed sections to be
+ * absolute or related to our current fixed section.
+ *
+ * - DEFINE_FIXED_SYMBOL / FIXED_SYMBOL_ABS_ADDR is used to find the
+ * absolute address of a symbol within a fixed section, from any section.
+ *
+ * - ABS_ADDR is used to find the absolute address of any symbol, from within
+ * a fixed section.
+ */
+#define DEFINE_FIXED_SYMBOL(label) \
+ label##_absolute = (label - fs_label + fs_start)
+
+#define FIXED_SYMBOL_ABS_ADDR(label) \
+ (label##_absolute)
+
+#define ABS_ADDR(label) (label - fs_label + fs_start)
+
+/*
+ * Following are the BOOK3S exception handler helper macros.
+ * Handlers come in a number of types, and each type has a number of varieties.
+ *
+ * EXC_REAL_* - real, unrelocated exception vectors
+ * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
+ * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM - KVM handlers that get put into real, unrelocated
+ * EXC_COMMON_* - virt, relocated common handlers
+ *
+ * The EXC handlers are given a name, and branch to name_common, or the
+ * appropriate KVM or masking function. Vector handler verieties are as
+ * follows:
+ *
+ * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
+ *
+ * EXC_{REAL|VIRT} - standard exception
+ *
+ * EXC_{REAL|VIRT}_suffix
+ * where _suffix is:
+ * - _MASKABLE - maskable exception
+ * - _OOL - out of line with trampoline to common handler
+ * - _HV - HV exception
+ *
+ * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
+ *
+ * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is
+ * an OOL vector that branches to a specified handler rather than the usual
+ * trampoline that goes to common. It, and other underscore macros, should
+ * be used with care.
+ *
+ * KVM handlers come in the following verieties:
+ * TRAMP_KVM
+ * TRAMP_KVM_SKIP
+ * TRAMP_KVM_HV
+ * TRAMP_KVM_HV_SKIP
+ *
+ * COMMON handlers come in the following verieties:
+ * EXC_COMMON_BEGIN/END - used to open-code the handler
+ * EXC_COMMON
+ * EXC_COMMON_ASYNC
+ * EXC_COMMON_HV
+ *
+ * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
+ * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
+ */
+
+#define EXC_REAL_BEGIN(name, start, end) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start)
+
+#define EXC_REAL_END(name, start, end) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, end)
+
+#define EXC_VIRT_BEGIN(name, start, end) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start)
+
+#define EXC_VIRT_END(name, start, end) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .align 7; \
+ .global name; \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_REAL_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, end) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, end)
+
+#define EXC_VIRT_NONE(start, end) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, end);
+
+
+#define EXC_REAL(name, start, end) \
+ EXC_REAL_BEGIN(name, start, end); \
+ STD_EXCEPTION_PSERIES(start, name##_common); \
+ EXC_REAL_END(name, start, end);
+
+#define EXC_VIRT(name, start, end, realvec) \
+ EXC_VIRT_BEGIN(name, start, end); \
+ STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+ EXC_VIRT_END(name, start, end);
+
+#define EXC_REAL_MASKABLE(name, start, end) \
+ EXC_REAL_BEGIN(name, start, end); \
+ MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \
+ EXC_REAL_END(name, start, end);
+
+#define EXC_VIRT_MASKABLE(name, start, end, realvec) \
+ EXC_VIRT_BEGIN(name, start, end); \
+ MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+ EXC_VIRT_END(name, start, end);
+
+#define EXC_REAL_HV(name, start, end) \
+ EXC_REAL_BEGIN(name, start, end); \
+ STD_EXCEPTION_HV(start, start, name##_common); \
+ EXC_REAL_END(name, start, end);
+
+#define EXC_VIRT_HV(name, start, end, realvec) \
+ EXC_VIRT_BEGIN(name, start, end); \
+ STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \
+ EXC_VIRT_END(name, start, end);
+
+#define __EXC_REAL_OOL(name, start, end) \
+ EXC_REAL_BEGIN(name, start, end); \
+ __OOL_EXCEPTION(start, label, tramp_real_##name); \
+ EXC_REAL_END(name, start, end);
+
+#define __TRAMP_REAL_REAL_OOL(name, vec) \
+ TRAMP_REAL_BEGIN(tramp_real_##name); \
+ STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \
+
+#define EXC_REAL_OOL(name, start, end) \
+ __EXC_REAL_OOL(name, start, end); \
+ __TRAMP_REAL_REAL_OOL(name, start);
+
+#define __EXC_REAL_OOL_MASKABLE(name, start, end) \
+ __EXC_REAL_OOL(name, start, end);
+
+#define __TRAMP_REAL_REAL_OOL_MASKABLE(name, vec) \
+ TRAMP_REAL_BEGIN(tramp_real_##name); \
+ MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \
+
+#define EXC_REAL_OOL_MASKABLE(name, start, end) \
+ __EXC_REAL_OOL_MASKABLE(name, start, end); \
+ __TRAMP_REAL_REAL_OOL_MASKABLE(name, start);
+
+#define __EXC_REAL_OOL_HV_DIRECT(name, start, end, handler) \
+ EXC_REAL_BEGIN(name, start, end); \
+ __OOL_EXCEPTION(start, label, handler); \
+ EXC_REAL_END(name, start, end);
+
+#define __EXC_REAL_OOL_HV(name, start, end) \
+ __EXC_REAL_OOL(name, start, end);
+
+#define __TRAMP_REAL_REAL_OOL_HV(name, vec) \
+ TRAMP_REAL_BEGIN(tramp_real_##name); \
+ STD_EXCEPTION_HV_OOL(vec, name##_common); \
+
+#define EXC_REAL_OOL_HV(name, start, end) \
+ __EXC_REAL_OOL_HV(name, start, end); \
+ __TRAMP_REAL_REAL_OOL_HV(name, start);
+
+#define __EXC_REAL_OOL_MASKABLE_HV(name, start, end) \
+ __EXC_REAL_OOL(name, start, end);
+
+#define __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, vec) \
+ TRAMP_REAL_BEGIN(tramp_real_##name); \
+ MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \
+
+#define EXC_REAL_OOL_MASKABLE_HV(name, start, end) \
+ __EXC_REAL_OOL_MASKABLE_HV(name, start, end); \
+ __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, start);
+
+#define __EXC_VIRT_OOL(name, start, end) \
+ EXC_VIRT_BEGIN(name, start, end); \
+ __OOL_EXCEPTION(start, label, tramp_virt_##name); \
+ EXC_VIRT_END(name, start, end);
+
+#define __TRAMP_REAL_VIRT_OOL(name, realvec) \
+ TRAMP_VIRT_BEGIN(tramp_virt_##name); \
+ STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
+
+#define EXC_VIRT_OOL(name, start, end, realvec) \
+ __EXC_VIRT_OOL(name, start, end); \
+ __TRAMP_REAL_VIRT_OOL(name, realvec);
+
+#define __EXC_VIRT_OOL_MASKABLE(name, start, end) \
+ __EXC_VIRT_OOL(name, start, end);
+
+#define __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec) \
+ TRAMP_VIRT_BEGIN(tramp_virt_##name); \
+ MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
+
+#define EXC_VIRT_OOL_MASKABLE(name, start, end, realvec) \
+ __EXC_VIRT_OOL_MASKABLE(name, start, end); \
+ __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec);
+
+#define __EXC_VIRT_OOL_HV(name, start, end) \
+ __EXC_VIRT_OOL(name, start, end);
+
+#define __TRAMP_REAL_VIRT_OOL_HV(name, realvec) \
+ TRAMP_VIRT_BEGIN(tramp_virt_##name); \
+ STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
+
+#define EXC_VIRT_OOL_HV(name, start, end, realvec) \
+ __EXC_VIRT_OOL_HV(name, start, end); \
+ __TRAMP_REAL_VIRT_OOL_HV(name, realvec);
+
+#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, end) \
+ __EXC_VIRT_OOL(name, start, end);
+
+#define __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec) \
+ TRAMP_VIRT_BEGIN(tramp_virt_##name); \
+ MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
+
+#define EXC_VIRT_OOL_MASKABLE_HV(name, start, end, realvec) \
+ __EXC_VIRT_OOL_MASKABLE_HV(name, start, end); \
+ __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec);
+
+#define TRAMP_KVM(area, n) \
+ TRAMP_KVM_BEGIN(do_kvm_##n); \
+ KVM_HANDLER(area, EXC_STD, n); \
+
+#define TRAMP_KVM_SKIP(area, n) \
+ TRAMP_KVM_BEGIN(do_kvm_##n); \
+ KVM_HANDLER_SKIP(area, EXC_STD, n); \
+
+/*
+ * HV variant exceptions get the 0x2 bit added to their trap number.
+ */
+#define TRAMP_KVM_HV(area, n) \
+ TRAMP_KVM_BEGIN(do_kvm_H##n); \
+ KVM_HANDLER(area, EXC_HV, n + 0x2); \
+
+#define TRAMP_KVM_HV_SKIP(area, n) \
+ TRAMP_KVM_BEGIN(do_kvm_H##n); \
+ KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \
+
+#define EXC_COMMON(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ STD_EXCEPTION_COMMON(realvec, name, hdlr); \
+
+#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \
+
+#define EXC_COMMON_HV(name, realvec, hdlr) \
+ EXC_COMMON_BEGIN(name); \
+ STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
+
+#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 88b4901ac4ee..85b7a1a21e22 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -21,7 +21,7 @@
#ifndef __ASM_PPC64_HMI_H__
#define __ASM_PPC64_HMI_H__
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define CORE_TB_RESYNC_REQ_BIT 63
#define MAX_SUBCORE_PER_CORE 4
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 2fd1690b79d2..f6fda8482f60 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
#endif
#endif /* __powerpc64__ */
+
+/*
+ * Simple Cache inhibited accessors
+ * Unlike the DEF_MMIO_* macros, these don't include any h/w memory
+ * barriers, callers need to manage memory barriers on their own.
+ * These can only be used in hypervisor real mode.
+ */
+
+static inline u32 _lwzcix(unsigned long addr)
+{
+ u32 ret;
+
+ __asm__ __volatile__("lwzcix %0,0, %1"
+ : "=r" (ret) : "r" (addr) : "memory");
+ return ret;
+}
+
+static inline void _stbcix(u64 addr, u8 val)
+{
+ __asm__ __volatile__("stbcix %0,0,%1"
+ : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void _stwcix(u64 addr, u32 val)
+{
+ __asm__ __volatile__("stwcix %0,0,%1"
+ : : "r" (val), "r" (addr) : "memory");
+}
+
/*
* Low level IO stream instructions are defined out of line for now
*/
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5bca220bbb60..05cabed3d1bd 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -105,6 +105,15 @@
#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
+/* book3s_hv */
+
+/*
+ * Special trap used to indicate to host that this is a
+ * passthrough interrupt that could not be handled
+ * completely in the guest.
+ */
+#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555
+
#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
#define BOOK3S_IRQPRIO_DATA_SEGMENT 1
#define BOOK3S_IRQPRIO_INST_SEGMENT 2
@@ -136,6 +145,7 @@
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
#define RESUME_FLAG_ARCH1 (1<<2)
+#define RESUME_FLAG_ARCH2 (1<<3)
#define RESUME_GUEST 0
#define RESUME_GUEST_NV RESUME_FLAG_NV
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 8f39796c9da8..5cf306ae0ac3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -69,6 +69,43 @@ struct hpte_cache {
int pagesize;
};
+/*
+ * Struct for a virtual core.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits. This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
+ */
+struct kvmppc_vcore {
+ int n_runnable;
+ int num_threads;
+ int entry_exit_map;
+ int napping_threads;
+ int first_vcpuid;
+ u16 pcpu;
+ u16 last_cpu;
+ u8 vcore_state;
+ u8 in_guest;
+ struct kvmppc_vcore *master_vcore;
+ struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
+ struct list_head preempt_list;
+ spinlock_t lock;
+ struct swait_queue_head wq;
+ spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
+ u64 stolen_tb;
+ u64 preempt_tb;
+ struct kvm_vcpu *runner;
+ struct kvm *kvm;
+ u64 tb_offset; /* guest timebase - host timebase */
+ ulong lpcr;
+ u32 arch_compat;
+ ulong pcr;
+ ulong dpdes; /* doorbell state (POWER8) */
+ ulong vtb; /* virtual timebase */
+ ulong conferring_threads;
+ unsigned int halt_poll_ns;
+};
+
struct kvmppc_vcpu_book3s {
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct {
@@ -83,6 +120,7 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
+ u64 vtb;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
u32 vsid_next;
@@ -191,6 +229,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
struct kvm_vcpu *vcpu);
extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 88d17b4ea9c8..848292176908 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__
+#include <asm/book3s/64/mmu-hash.h>
+
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
@@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
hpte[0] = cpu_to_be64(hpte_v);
}
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
- int i, shift;
- unsigned int mask;
-
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
- }
- return -1;
-}
-
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
- int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
+ int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
unsigned int penc;
unsigned long rb = 0, va_low, sllp;
unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
if (v & HPTE_V_LARGE) {
- for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[b_psize].shift)
- continue;
-
- a_psize = __hpte_actual_psize(lp, b_psize);
- if (a_psize != -1)
- break;
- }
+ i = hpte_page_sizes[lp];
+ b_psize = i & 0xf;
+ a_psize = i >> 4;
}
+
/*
* Ignore the top 14 bits of va
* v have top two bits covering segment size, hence move
@@ -159,7 +125,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* This covers 14..54 bits of va*/
rb = (v & ~0x7fUL) << 16; /* AVA field */
- rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
/*
* AVA in v had cleared lower 23 bits. We need to derive
* that from pteg index
@@ -211,49 +176,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
break;
}
}
- rb |= (v >> 54) & 0x300; /* B field */
+ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
return rb;
}
-static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
- bool is_base_size)
-{
-
- int size, a_psize;
- /* Look at the 8 bit LP value */
- unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
-
- /* only handle 4k, 64k and 16M pages for now */
- if (!(h & HPTE_V_LARGE))
- return 1ul << 12;
- else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_psize = __hpte_actual_psize(lp, size);
- if (a_psize != -1) {
- if (is_base_size)
- return 1ul << mmu_psize_defs[size].shift;
- return 1ul << mmu_psize_defs[a_psize].shift;
- }
- }
-
- }
- return 0;
-}
-
-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
-{
- return __hpte_page_size(h, l, 0);
-}
-
-static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
-{
- return __hpte_page_size(h, l, 1);
-}
-
static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
{
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ec35af34a3fb..28350a294b1e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -43,6 +43,8 @@
#include <asm/cputhreads.h>
#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
@@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu;
struct kvm_vm_stat {
- u32 remote_tlb_flush;
+ ulong remote_tlb_flush;
};
struct kvm_vcpu_stat {
- u32 sum_exits;
- u32 mmio_exits;
- u32 signal_exits;
- u32 light_exits;
+ u64 sum_exits;
+ u64 mmio_exits;
+ u64 signal_exits;
+ u64 light_exits;
/* Account for special types of light exits: */
- u32 itlb_real_miss_exits;
- u32 itlb_virt_miss_exits;
- u32 dtlb_real_miss_exits;
- u32 dtlb_virt_miss_exits;
- u32 syscall_exits;
- u32 isi_exits;
- u32 dsi_exits;
- u32 emulated_inst_exits;
- u32 dec_exits;
- u32 ext_intr_exits;
- u32 halt_successful_poll;
- u32 halt_attempted_poll;
- u32 halt_poll_invalid;
- u32 halt_wakeup;
- u32 dbell_exits;
- u32 gdbell_exits;
- u32 ld;
- u32 st;
+ u64 itlb_real_miss_exits;
+ u64 itlb_virt_miss_exits;
+ u64 dtlb_real_miss_exits;
+ u64 dtlb_virt_miss_exits;
+ u64 syscall_exits;
+ u64 isi_exits;
+ u64 dsi_exits;
+ u64 emulated_inst_exits;
+ u64 dec_exits;
+ u64 ext_intr_exits;
+ u64 halt_poll_success_ns;
+ u64 halt_poll_fail_ns;
+ u64 halt_wait_ns;
+ u64 halt_successful_poll;
+ u64 halt_attempted_poll;
+ u64 halt_successful_wait;
+ u64 halt_poll_invalid;
+ u64 halt_wakeup;
+ u64 dbell_exits;
+ u64 gdbell_exits;
+ u64 ld;
+ u64 st;
#ifdef CONFIG_PPC_BOOK3S
- u32 pf_storage;
- u32 pf_instruc;
- u32 sp_storage;
- u32 sp_instruc;
- u32 queue_intr;
- u32 ld_slow;
- u32 st_slow;
+ u64 pf_storage;
+ u64 pf_instruc;
+ u64 sp_storage;
+ u64 sp_instruc;
+ u64 queue_intr;
+ u64 ld_slow;
+ u64 st_slow;
#endif
+ u64 pthru_all;
+ u64 pthru_host;
+ u64 pthru_bad_aff;
};
enum kvm_exit_types {
@@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table {
struct kvmppc_xics;
struct kvmppc_icp;
+struct kvmppc_passthru_irqmap;
+
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@@ -267,6 +278,7 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
+ struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -275,41 +287,6 @@ struct kvm_arch {
#endif
};
-/*
- * Struct for a virtual core.
- * Note: entry_exit_map combines a bitmap of threads that have entered
- * in the bottom 8 bits and a bitmap of threads that have exited in the
- * next 8 bits. This is so that we can atomically set the entry bit
- * iff the exit map is 0 without taking a lock.
- */
-struct kvmppc_vcore {
- int n_runnable;
- int num_threads;
- int entry_exit_map;
- int napping_threads;
- int first_vcpuid;
- u16 pcpu;
- u16 last_cpu;
- u8 vcore_state;
- u8 in_guest;
- struct kvmppc_vcore *master_vcore;
- struct list_head runnable_threads;
- struct list_head preempt_list;
- spinlock_t lock;
- struct swait_queue_head wq;
- spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
- u64 stolen_tb;
- u64 preempt_tb;
- struct kvm_vcpu *runner;
- struct kvm *kvm;
- u64 tb_offset; /* guest timebase - host timebase */
- ulong lpcr;
- u32 arch_compat;
- ulong pcr;
- ulong dpdes; /* doorbell state (POWER8) */
- ulong conferring_threads;
-};
-
#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
@@ -329,6 +306,7 @@ struct kvmppc_vcore {
#define VCORE_SLEEPING 3
#define VCORE_RUNNING 4
#define VCORE_EXITING 5
+#define VCORE_POLLING 6
/*
* Struct used to manage memory for a virtual processor area
@@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator {
u64 tb_max; /* max time */
};
+#ifdef CONFIG_PPC_BOOK3S_64
+struct kvmppc_irq_map {
+ u32 r_hwirq;
+ u32 v_hwirq;
+ struct irq_desc *desc;
+};
+
+#define KVMPPC_PIRQ_MAPPED 1024
+struct kvmppc_passthru_irqmap {
+ int n_mapped;
+ struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
+};
+#endif
+
# ifdef CONFIG_PPC_FSL_BOOK3E
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
@@ -483,7 +475,6 @@ struct kvm_vcpu_arch {
ulong purr;
ulong spurr;
ulong ic;
- ulong vtb;
ulong dscr;
ulong amr;
ulong uamor;
@@ -668,7 +659,6 @@ struct kvm_vcpu_arch {
long pgfault_index;
unsigned long pgfault_hpte[2];
- struct list_head run_list;
struct task_struct *run_task;
struct kvm_run *kvm_run;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2544edabe7f3..f6e49640dbe1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,6 +287,10 @@ struct kvmppc_ops {
long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
unsigned long arg);
int (*hcall_implemented)(unsigned long hcall);
+ int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
+ struct irq_bypass_producer *);
+ void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
+ struct irq_bypass_producer *);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
+
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+ struct kvm *kvm)
+{
+ if (kvm && kvm_irq_bypass)
+ return kvm->arch.pimap;
+ return NULL;
+}
+
extern void kvmppc_alloc_host_rm_ops(void);
extern void kvmppc_free_host_rm_ops(void);
+extern void kvmppc_free_pimap(struct kvm *kvm);
+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
@@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xics_ipi_action(void);
+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap);
extern int h_ipi_redirect;
#else
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+ struct kvm *kvm)
+ { return NULL; }
static inline void kvmppc_alloc_host_rm_ops(void) {};
static inline void kvmppc_free_host_rm_ops(void) {};
+static inline void kvmppc_free_pimap(struct kvm *kvm) {};
+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+ { return 0; }
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 0420b388dd83..e02cbc6a6c70 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -61,7 +61,7 @@ struct machdep_calls {
void (*init_IRQ)(void);
- /* Return an irq, or NO_IRQ to indicate there are none pending. */
+ /* Return an irq, or 0 to indicate there are none pending. */
unsigned int (*get_irq)(void);
/* PCI stuff */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cd4f04a74802..b62a8d43a06c 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -313,6 +313,9 @@ extern int book3e_htw_mode;
* return 1, indicating that the tlb requires preloading.
*/
#define HUGETLB_NEED_PRELOAD
+
+#define mmu_cleanup_all NULL
+
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e2fb408f8398..e88368354e49 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -204,6 +204,10 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
* make it match the size our of bolted TLB area
*/
extern u64 ppc64_rma_size;
+
+/* Cleanup function used by kexec */
+extern void mmu_cleanup_all(void);
+extern void radix__mmu_cleanup_all(void);
#endif /* CONFIG_PPC64 */
struct mm_struct;
@@ -271,6 +275,7 @@ static inline bool early_radix_enabled(void)
#define MMU_PAGE_16G 13
#define MMU_PAGE_64G 14
+/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */
#define MMU_PAGE_COUNT 15
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 9d2cd0c36ec2..5c451140660a 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -18,6 +18,7 @@ extern void destroy_context(struct mm_struct *mm);
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct mm_iommu_table_group_mem_t;
+extern int isolate_lru_page(struct page *page); /* from internal.h */
extern bool mm_iommu_preregistered(void);
extern long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
@@ -71,7 +72,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
/* Mark this context has been used on the new CPU */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
/* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 7b589178be46..4d52ccfc2366 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -41,6 +41,9 @@ u64 memory_hotplug_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#ifdef CONFIG_FA_DUMP
+#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h
index d4f471fb1031..088420d8aa59 100644
--- a/arch/powerpc/include/asm/mpic_msgr.h
+++ b/arch/powerpc/include/asm/mpic_msgr.h
@@ -122,9 +122,9 @@ static inline void mpic_msgr_set_destination(struct mpic_msgr *msgr,
* @msgr: the message register whose IRQ is to be returned
*
* Returns the IRQ number associated with the given message register.
- * NO_IRQ is returned if this message register is not capable of
- * receiving interrupts. What message register can and cannot receive
- * interrupts is specified in the device tree for the system.
+ * 0 is returned if this message register is not capable of receiving
+ * interrupts. What message register can and cannot receive interrupts is
+ * specified in the device tree for the system.
*/
static inline int mpic_msgr_get_irq(struct mpic_msgr *msgr)
{
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 780847597514..c219ef7be53b 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -267,7 +267,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
}
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+ pte_t *ptep, pte_t entry)
{
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index d4d808cf905e..653a1838469d 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -300,7 +300,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
/* Set the dirty and/or accessed bits atomically in a linux PTE, this
* function doesn't need to flush the hash entry
*/
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+ pte_t *ptep, pte_t entry)
{
unsigned long bits = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index ee05bd203630..e958b7096f19 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func,
uint64_t offset, uint32_t data);
int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
+int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority);
int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority);
int64_t opal_register_exception_handler(uint64_t opal_exception,
uint64_t handler_address,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 148303e7771f..6a6792bb39fb 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -183,11 +183,6 @@ struct paca_struct {
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
- /*
- * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
- * more details
- */
- struct sibling_subcore_state *sibling_subcore_state;
#endif
/* Stuff for accurate time accounting */
@@ -202,6 +197,13 @@ struct paca_struct {
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
#endif
struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+ * more details
+ */
+ struct sibling_subcore_state *sibling_subcore_state;
+#endif
#endif
};
diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h
index a452968b29ea..6595ad1d18cc 100644
--- a/arch/powerpc/include/asm/parport.h
+++ b/arch/powerpc/include/asm/parport.h
@@ -28,7 +28,7 @@ static int parport_pc_find_nonpci_ports (int autoirq, int autodma)
io1 = prop[1]; io2 = prop[2];
virq = irq_of_parse_and_map(np, 0);
- if (virq == NO_IRQ)
+ if (!virq)
continue;
if (parport_pc_probe_port(io1, io2, virq, autodma, NULL, 0)
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index b5e88e4a171a..c0309c59bed8 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -301,6 +301,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* Allocate & free a PCI host bridge structure */
extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
#ifdef CONFIG_PCI
extern int pcibios_vaddr_is_ioport(void __iomem *address);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 0cbd8134ce81..696438f09aea 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -12,10 +12,11 @@
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
+#include <linux/irq.h>
#include <misc/cxl-base.h>
#include <asm/opal-api.h>
-#define PCI_SLOT_ID_PREFIX 0x8000000000000000
+#define PCI_SLOT_ID_PREFIX (1UL << 63)
#define PCI_SLOT_ID(phb_id, bdfn) \
(PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id))
@@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
int pnv_cxl_get_irq_count(struct pci_dev *dev);
struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev);
+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq);
+bool is_pnv_opal_msi(struct irq_chip *chip);
#ifdef CONFIG_CXL_BASE
int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
@@ -60,6 +63,8 @@ struct pnv_php_slot {
#define PNV_PHP_STATE_POPULATED 2
#define PNV_PHP_STATE_OFFLINE 3
int state;
+ int irq;
+ struct workqueue_struct *wq;
struct device_node *dn;
struct pci_dev *pdev;
struct pci_bus *bus;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 127ebf5862b4..54ff8ce7fa96 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -236,6 +236,7 @@
#define PPC_INST_STWU 0x94000000
#define PPC_INST_MFLR 0x7c0802a6
#define PPC_INST_MTLR 0x7c0803a6
+#define PPC_INST_MTCTR 0x7c0903a6
#define PPC_INST_CMPWI 0x2c000000
#define PPC_INST_CMPDI 0x2c200000
#define PPC_INST_CMPW 0x7c000000
@@ -250,6 +251,7 @@
#define PPC_INST_SUB 0x7c000050
#define PPC_INST_BLR 0x4e800020
#define PPC_INST_BLRL 0x4e800021
+#define PPC_INST_BCTR 0x4e800420
#define PPC_INST_MULLD 0x7c0001d2
#define PPC_INST_MULLW 0x7c0001d6
#define PPC_INST_MULHWU 0x7c000016
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index d5d5b5e348f2..c73750b0d9fa 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -201,14 +201,12 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#ifdef PPC64_ELF_ABI_v2
#define _GLOBAL(name) \
- .section ".text"; \
.align 2 ; \
.type name,@function; \
.globl name; \
name:
#define _GLOBAL_TOC(name) \
- .section ".text"; \
.align 2 ; \
.type name,@function; \
.globl name; \
@@ -217,13 +215,6 @@ name: \
addi r2,r2,(.TOC.-0b)@l; \
.localentry name,.-name
-#define _KPROBE(name) \
- .section ".kprobes.text","a"; \
- .align 2 ; \
- .type name,@function; \
- .globl name; \
-name:
-
#define DOTSYM(a) a
#else
@@ -232,35 +223,20 @@ name:
#define GLUE(a,b) XGLUE(a,b)
#define _GLOBAL(name) \
- .section ".text"; \
.align 2 ; \
.globl name; \
.globl GLUE(.,name); \
- .section ".opd","aw"; \
+ .pushsection ".opd","aw"; \
name: \
.quad GLUE(.,name); \
.quad .TOC.@tocbase; \
.quad 0; \
- .previous; \
+ .popsection; \
.type GLUE(.,name),@function; \
GLUE(.,name):
#define _GLOBAL_TOC(name) _GLOBAL(name)
-#define _KPROBE(name) \
- .section ".kprobes.text","a"; \
- .align 2 ; \
- .globl name; \
- .globl GLUE(.,name); \
- .section ".opd","aw"; \
-name: \
- .quad GLUE(.,name); \
- .quad .TOC.@tocbase; \
- .quad 0; \
- .previous; \
- .type GLUE(.,name),@function; \
-GLUE(.,name):
-
#define DOTSYM(a) GLUE(.,a)
#endif
@@ -272,20 +248,28 @@ GLUE(.,name):
n:
#define _GLOBAL(n) \
- .text; \
.stabs __stringify(n:F-1),N_FUN,0,0,n;\
.globl n; \
n:
#define _GLOBAL_TOC(name) _GLOBAL(name)
-#define _KPROBE(n) \
- .section ".kprobes.text","a"; \
- .globl n; \
-n:
-
#endif
+/*
+ * __kprobes (the C annotation) puts the symbol into the .kprobes.text
+ * section, which gets emitted at the end of regular text.
+ *
+ * _ASM_NOKPROBE_SYMBOL and NOKPROBE_SYMBOL just adds the symbol to
+ * a blacklist. The former is for core kprobe functions/data, the
+ * latter is for those that incdentially must be excluded from probing
+ * and allows them to be linked at more optimal location within text.
+ */
+#define _ASM_NOKPROBE_SYMBOL(entry) \
+ .pushsection "_kprobe_blacklist","aw"; \
+ PPC_LONG (entry) ; \
+ .popsection
+
#define FUNC_START(name) _GLOBAL(name)
#define FUNC_END(name)
@@ -527,7 +511,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#endif
#define MTMSRD(r) mtmsr r
#define MTMSR_EERI(reg) mtmsr reg
-#define CLR_TOP32(r)
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 68e3bf57b027..c07c31b0e89e 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -147,7 +147,7 @@ typedef struct {
} mm_segment_t;
#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
+#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
/* FP and VSX 0-31 register set */
struct thread_fp_state {
@@ -257,6 +257,7 @@ struct thread_struct {
int used_spe; /* set if process has used spe */
#endif /* CONFIG_SPE */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ u8 load_tm;
u64 tm_tfhar; /* Transaction fail handler addr */
u64 tm_texasr; /* Transaction exception & summary */
u64 tm_tfiar; /* Transaction fail instr address reg */
@@ -267,20 +268,17 @@ struct thread_struct {
unsigned long tm_dscr;
/*
- * Transactional FP and VSX 0-31 register set.
- * NOTE: the sense of these is the opposite of the integer ckpt_regs!
+ * Checkpointed FP and VSX 0-31 register set.
*
* When a transaction is active/signalled/scheduled etc., *regs is the
* most recent set of/speculated GPRs with ckpt_regs being the older
* checkpointed regs to which we roll back if transaction aborts.
*
- * However, fpr[] is the checkpointed 'base state' of FP regs, and
- * transact_fpr[] is the new set of transactional values.
- * VRs work the same way.
+ * These are analogous to how ckpt_regs and pt_regs work
*/
- struct thread_fp_state transact_fp;
- struct thread_vr_state transact_vr;
- unsigned long transact_vrsave;
+ struct thread_fp_state ckfp_state; /* Checkpointed FP state */
+ struct thread_vr_state ckvr_state; /* Checkpointed VR state */
+ unsigned long ckvrsave; /* Checkpointed VRSAVE */
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index f69f40f1519a..2a620789954b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -475,6 +475,9 @@
#define HID0_POWER8_1TO4LPAR __MASK(51)
#define HID0_POWER8_DYNLPARDIS __MASK(48)
+/* POWER9 HID0 bits */
+#define HID0_POWER9_RADIX __MASK(63 - 8)
+
#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */
#ifdef CONFIG_6xx
#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */
@@ -737,6 +740,7 @@
#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
#define SPRN_MMCR1 798
#define SPRN_MMCR2 785
+#define SPRN_UMMCR2 769
#define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
@@ -1247,7 +1251,7 @@ static inline void mtmsr_isync(unsigned long val)
: "memory")
#endif
-extern void msr_check_and_set(unsigned long bits);
+extern unsigned long msr_check_and_set(unsigned long bits);
extern bool strict_msr_control;
extern void __msr_check_and_clear(unsigned long bits);
static inline void msr_check_and_clear(unsigned long bits)
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 9322c28aebd2..5ff77722a52d 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -5,6 +5,4 @@
#include <uapi/asm/signal.h>
#include <uapi/asm/ptrace.h>
-extern unsigned long get_tm_stackpointer(struct pt_regs *regs);
-
#endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 0a74ebe934e1..17c8380673a6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void)
static inline void __giveup_spe(struct task_struct *t) { }
#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
static inline void clear_task_ebb(struct task_struct *t)
{
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index c22d704b6d41..82e06ca3a49b 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -9,11 +9,6 @@
#ifndef __ASSEMBLY__
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void do_load_up_transact_fpu(struct thread_struct *thread);
-extern void do_load_up_transact_altivec(struct thread_struct *thread);
-#endif
-
extern void tm_enable(void);
extern void tm_reclaim(struct thread_struct *thread,
unsigned long orig_msr, uint8_t cause);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index b7c20f0b8fbe..c266227fdd5b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -308,29 +308,20 @@ extern unsigned long __copy_tofrom_user(void __user *to,
static inline unsigned long copy_from_user(void *to,
const void __user *from, unsigned long n)
{
- unsigned long over;
-
- if (access_ok(VERIFY_READ, from, n))
+ if (likely(access_ok(VERIFY_READ, from, n))) {
+ check_object_size(to, n, false);
return __copy_tofrom_user((__force void __user *)to, from, n);
- if ((unsigned long)from < TASK_SIZE) {
- over = (unsigned long)from + n - TASK_SIZE;
- return __copy_tofrom_user((__force void __user *)to, from,
- n - over) + over;
}
+ memset(to, 0, n);
return n;
}
static inline unsigned long copy_to_user(void __user *to,
const void *from, unsigned long n)
{
- unsigned long over;
-
- if (access_ok(VERIFY_WRITE, to, n))
+ if (access_ok(VERIFY_WRITE, to, n)) {
+ check_object_size(from, n, true);
return __copy_tofrom_user(to, (__force void __user *)from, n);
- if ((unsigned long)to < TASK_SIZE) {
- over = (unsigned long)to + n - TASK_SIZE;
- return __copy_tofrom_user(to, (__force void __user *)from,
- n - over) + over;
}
return n;
}
@@ -372,6 +363,9 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
if (ret == 0)
return 0;
}
+
+ check_object_size(to, n, false);
+
return __copy_tofrom_user((__force void __user *)to, from, n);
}
@@ -398,6 +392,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
if (ret == 0)
return 0;
}
+
+ check_object_size(from, n, true);
+
return __copy_tofrom_user(to, (__force const void __user *)from, n);
}
@@ -422,10 +419,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
might_fault();
if (likely(access_ok(VERIFY_WRITE, addr, size)))
return __clear_user(addr, size);
- if ((unsigned long)addr < TASK_SIZE) {
- unsigned long over = (unsigned long)addr + size - TASK_SIZE;
- return __clear_user(addr, size - over) + over;
- }
return size;
}
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index f5f729c11578..f0b238516e9b 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void);
extern void xics_kexec_teardown_cpu(int secondary);
extern void xics_migrate_irqs_away(void);
extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
#ifdef CONFIG_SMP
extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
unsigned int strict_check);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b2027a5cf508..aded29ad2e8f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -31,8 +31,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o \
- misc_$(CONFIG_WORD_SIZE).o \
+ udbg.o misc.o io.o dma.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
@@ -41,7 +40,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
@@ -70,23 +69,23 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
ifeq ($(CONFIG_FSL_BOOKE),y)
obj-$(CONFIG_HIBERNATION) += swsusp_booke.o
else
-obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o
endif
obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
-obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_MODULES) += module.o module_$(BITS).o
obj-$(CONFIG_44x) += cpu_setup_44x.o
obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
obj-$(CONFIG_PPC_DOORBELL) += dbell.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-y := head_$(CONFIG_WORD_SIZE).o
+extra-y := head_$(BITS).o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
extra-y += vmlinux.lds
-obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
@@ -104,11 +103,11 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
-obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
+obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
- machine_kexec_$(CONFIG_WORD_SIZE).o
+ machine_kexec_$(BITS).o
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b89d14c0352c..caec7bf3b99a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -142,12 +142,12 @@ int main(void)
DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
- DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
- transact_vr));
- DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
- transact_vrsave));
- DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
- transact_fp));
+ DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct,
+ ckvr_state));
+ DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct,
+ ckvrsave));
+ DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct,
+ ckfp_state));
/* Local pt_regs on stack for Transactional Memory funcs. */
DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
sizeof(struct pt_regs) + 16);
@@ -506,7 +506,6 @@ int main(void)
DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
- DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
@@ -557,6 +556,7 @@ int main(void)
DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
+ DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb));
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 74248ab18e98..6c4646ac9234 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -506,6 +506,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* Power9 DD1*/
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x004e0100,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .platform = "power9",
+ },
{ /* Power9 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c9bc78e9c610..f25731627d7f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -116,6 +116,7 @@ struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
+EXPORT_SYMBOL_GPL(confirm_error_lock);
/* Lock to protect passed flags */
static DEFINE_MUTEX(eeh_dev_mutex);
@@ -168,10 +169,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
int n = 0, l = 0;
char buffer[128];
- n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+ n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
- pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+ pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
edev->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
@@ -1044,7 +1045,7 @@ int eeh_init(void)
if (eeh_enabled())
pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
else
- pr_warn("EEH: No capable adapters found\n");
+ pr_info("EEH: No capable adapters found\n");
return ret;
}
@@ -1502,6 +1503,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
break;
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
+ case EEH_OPT_FREEZE_PE:
if (!eeh_ops || !eeh_ops->set_option) {
ret = -ENOENT;
break;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 5f36e8a70daa..a62be72da274 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -993,9 +993,17 @@ static void eeh_handle_special_event(void)
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
+ bus = eeh_pe_bus_get(phb_pe);
+ if (!bus) {
+ pr_err("%s: Cannot find PCI bus for "
+ "PHB#%d-PE#%x\n",
+ __func__,
+ pe->phb->global_number,
+ pe->addr);
+ break;
+ }
pci_hp_remove_devices(bus);
}
pci_unlock_rescan_remove();
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f0520da85759..de7d091c4c31 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -581,6 +581,7 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
{
eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
}
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
{
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9899032230b4..83428a283fa0 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -654,7 +654,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
#endif /* CONFIG_SMP */
tophys(r0,r4)
- CLR_TOP32(r0)
mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
lwz r1,KSP(r4) /* Load new stack pointer */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6b8bc0dd09d4..51df82b61084 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -139,7 +139,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#ifdef CONFIG_PPC_BOOK3E
wrteei 1
#else
- ld r11,PACAKMSR(r13)
+ li r11,MSR_RI
ori r11,r11,MSR_EE
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -195,7 +195,6 @@ system_call: /* label this so stack traces look sane */
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- ld r10,PACAKMSR(r13)
/*
* For performance reasons we clear RI the same time that we
* clear EE. We only need to clear RI just before we restore r13
@@ -203,8 +202,7 @@ system_call: /* label this so stack traces look sane */
* We have to be careful to restore RI if we branch anywhere from
* here (eg syscall_exit_work).
*/
- li r9,MSR_RI
- andc r11,r10,r9
+ li r11,0
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -221,13 +219,12 @@ system_call: /* label this so stack traces look sane */
#endif
2: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
- ld r10,PACAKMSR(r13)
- li r9,MSR_RI
- andc r11,r10,r9 /* Re-clear RI */
+ li r11,0
mtmsrd r11,1
#endif
ld r8,_MSR(r1)
@@ -308,6 +305,7 @@ syscall_enosys:
syscall_exit_work:
#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
@@ -354,7 +352,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
#ifdef CONFIG_PPC_BOOK3E
wrteei 1
#else
- ld r10,PACAKMSR(r13)
+ li r10,MSR_RI
ori r10,r10,MSR_EE
mtmsrd r10,1
#endif /* CONFIG_PPC_BOOK3E */
@@ -368,13 +366,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
tabort_syscall:
/* Firstly we need to enable TM in the kernel */
mfmsr r10
- li r13, 1
- rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+ li r9, 1
+ rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG
mtmsrd r10, 0
/* tabort, this dooms the transaction, nothing else */
- li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
- TABORT(R13)
+ li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+ TABORT(R9)
/*
* Return directly to userspace. We have corrupted user register state,
@@ -382,8 +380,8 @@ tabort_syscall:
* resume after the tbegin of the aborted transaction with the
* checkpointed register state.
*/
- li r13, MSR_RI
- andc r10, r10, r13
+ li r9, MSR_RI
+ andc r10, r10, r9
mtmsrd r10, 1
mtspr SPRN_SRR0, r11
mtspr SPRN_SRR1, r12
@@ -619,7 +617,7 @@ _GLOBAL(ret_from_except_lite)
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
+ li r10,MSR_RI
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
@@ -751,7 +749,7 @@ resume_kernel:
#ifdef CONFIG_PPC_BOOK3E
wrteei 0
#else
- ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
+ li r10,MSR_RI
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
#endif /* CONFIG_PREEMPT */
@@ -841,8 +839,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* userspace and we take an exception after restoring r13,
* we end up corrupting the userspace r13 value.
*/
- ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */
- andc r4,r4,r0 /* r0 contains MSR_RI here */
+ li r4,0
mtmsrd r4,1
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 41091fdf9bd8..08992f8f5036 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -16,72 +16,71 @@
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
#include <asm/cpuidle.h>
+#include <asm/head-64.h>
/*
+ * There are a few constraints to be concerned with.
+ * - Real mode exceptions code/data must be located at their physical location.
+ * - Virtual mode exceptions must be mapped at their 0xc000... location.
+ * - Fixed location code must not call directly beyond the __end_interrupts
+ * area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence
+ * must be used.
+ * - LOAD_HANDLER targets must be within first 64K of physical 0 /
+ * virtual 0xc00...
+ * - Conditional branch targets must be within +/-32K of caller.
+ *
+ * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and
+ * therefore don't have to run in physically located code or rfid to
+ * virtual mode kernel code. However on relocatable kernels they do have
+ * to branch to KERNELBASE offset because the rest of the kernel (outside
+ * the exception vectors) may be located elsewhere.
+ *
+ * Virtual exceptions correspond with physical, except their entry points
+ * are offset by 0xc000000000000000 and also tend to get an added 0x4000
+ * offset applied. Virtual exceptions are enabled with the Alternate
+ * Interrupt Location (AIL) bit set in the LPCR. However this does not
+ * guarantee they will be delivered virtually. Some conditions (see the ISA)
+ * cause exceptions to be delivered in real mode.
+ *
+ * It's impossible to receive interrupts below 0x300 via AIL.
+ *
+ * KVM: None of the virtual exceptions are from the guest. Anything that
+ * escalated to HV=1 from HV=0 is delivered via real mode handlers.
+ *
+ *
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x17ff : pSeries Interrupt prologs
- * 0x1800 - 0x4000 : interrupt support common interrupt prologs
- * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
- * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
+ * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
+ * 0x1900 - 0x3fff : Real mode trampolines
+ * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x5900 - 0x6fff : Relon mode trampolines
* 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 - 0x8fff : Initial (CPU0) segment table
- * 0x9000 - : Early init and support code
+ * 0x8000 - .... : Common interrupt handlers, remaining early
+ * setup code, rest of kernel.
+ *
+ * We could reclaim 0x4000-0x42ff for real mode trampolines if the space
+ * is necessary. Until then it's more consistent to explicitly put VIRT_NONE
+ * vectors there.
*/
- /* Syscall routine is used twice, in reloc-off and reloc-on paths */
-#define SYSCALL_PSERIES_1 \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
- mr r9,r13 ; \
- GET_PACA(r13) ; \
- mfspr r11,SPRN_SRR0 ; \
-0:
-
-#define SYSCALL_PSERIES_2_RFID \
- mfspr r12,SPRN_SRR1 ; \
- ld r10,PACAKBASE(r13) ; \
- LOAD_HANDLER(r10, system_call_entry) ; \
- mtspr SPRN_SRR0,r10 ; \
- ld r10,PACAKMSR(r13) ; \
- mtspr SPRN_SRR1,r10 ; \
- rfid ; \
- b . ; /* prevent speculative execution */
-
-#define SYSCALL_PSERIES_3 \
- /* Fast LE/BE switch system call */ \
-1: mfspr r12,SPRN_SRR1 ; \
- xori r12,r12,MSR_LE ; \
- mtspr SPRN_SRR1,r12 ; \
- rfid ; /* return to userspace */ \
- b . ; /* prevent speculative execution */
-
-#if defined(CONFIG_RELOCATABLE)
- /*
- * We can't branch directly so we do it via the CTR which
- * is volatile across system calls.
- */
-#define SYSCALL_PSERIES_2_DIRECT \
- mflr r10 ; \
- ld r12,PACAKBASE(r13) ; \
- LOAD_HANDLER(r12, system_call_entry) ; \
- mtctr r12 ; \
- mfspr r12,SPRN_SRR1 ; \
- /* Re-use of r13... No spare regs to do this */ \
- li r13,MSR_RI ; \
- mtmsrd r13,1 ; \
- GET_PACA(r13) ; /* get r13 back */ \
- bctr ;
+OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
+OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
+OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
+OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ * pseries and powernv need to keep the whole page from
+ * 0x7000 to 0x8000 free for use by the firmware
+ */
+ZERO_FIXED_SECTION(fwnmi_page, 0x7000, 0x8000)
+OPEN_TEXT_SECTION(0x8000)
#else
- /* We can branch directly */
-#define SYSCALL_PSERIES_2_DIRECT \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_common ;
+OPEN_TEXT_SECTION(0x7000)
#endif
+USE_FIXED_SECTION(real_vectors)
+
/*
* This is the start of the interrupt handlers for pSeries
* This code runs with relocation off.
@@ -90,12 +89,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
* Therefore any relative branches in this section must only
* branch to labels in this section.
*/
- . = 0x100
.globl __start_interrupts
__start_interrupts:
- .globl system_reset_pSeries;
-system_reset_pSeries:
+/* No virt vectors corresponding with 0x0..0x100 */
+EXC_VIRT_NONE(0x4000, 0x4100)
+
+EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
SET_SCRATCH0(r13)
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
@@ -136,290 +136,44 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
NOTEST, 0x100)
+EXC_REAL_END(system_reset, 0x100, 0x200)
+EXC_VIRT_NONE(0x4100, 0x4200)
+EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Vectors for the FWNMI option. Share common code.
+ */
+TRAMP_REAL_BEGIN(system_reset_fwnmi)
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ NOTEST, 0x100)
+#endif /* CONFIG_PPC_PSERIES */
+
- . = 0x200
-machine_check_pSeries_1:
+EXC_REAL_BEGIN(machine_check, 0x200, 0x300)
/* This is moved out of line as it can be patched by FW, but
* some code path might still want to branch into the original
* vector
*/
SET_SCRATCH0(r13) /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
+ /*
+ * Running native on arch 2.06 or later, we may wakeup from winkle
+ * inside machine check. If yes, then last bit of HSPGR0 would be set
+ * to 1. Hence clear it unconditionally.
*/
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- beq 9f
-
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal. let's just stay stuck here */
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- bgt cr1,.
-9:
- OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+ GET_PACA(r13)
+ clrrdi r13,r13,1
+ SET_PACA(r13)
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
b machine_check_powernv_early
FTR_SECTION_ELSE
b machine_check_pSeries_0
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-
- . = 0x300
- .globl data_access_pSeries
-data_access_pSeries:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
- KVMTEST, 0x300)
-
- . = 0x380
- .globl data_access_slb_pSeries
-data_access_slb_pSeries:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
-
- STD_EXCEPTION_PSERIES(0x400, instruction_access)
-
- . = 0x480
- .globl instruction_access_slb_pSeries
-instruction_access_slb_pSeries:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
-
- /* We open code these as we can't have a ". = x" (even with
- * x = "." within a feature section
- */
- . = 0x500;
- .globl hardware_interrupt_pSeries;
- .globl hardware_interrupt_hv;
-hardware_interrupt_pSeries:
-hardware_interrupt_hv:
- BEGIN_FTR_SECTION
- _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
- EXC_HV, SOFTEN_TEST_HV)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
- FTR_SECTION_ELSE
- _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
- EXC_STD, SOFTEN_TEST_PR)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-
- STD_EXCEPTION_PSERIES(0x600, alignment)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600)
-
- STD_EXCEPTION_PSERIES(0x700, program_check)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700)
-
- STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800)
-
- . = 0x900
- .globl decrementer_pSeries
-decrementer_pSeries:
- _MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR)
-
- STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
-
- MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00)
-
- STD_EXCEPTION_PSERIES(0xb00, trap_0b)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00)
-
- . = 0xc00
- .globl system_call_pSeries
-system_call_pSeries:
- /*
- * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
- * that support it) before changing to HMT_MEDIUM. That allows the KVM
- * code to save that value into the guest state (it is the guest's PPR
- * value). Otherwise just change to HMT_MEDIUM as userspace has
- * already saved the PPR.
- */
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- SET_SCRATCH0(r13)
- GET_PACA(r13)
- std r9,PACA_EXGEN+EX_R9(r13)
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
- HMT_MEDIUM;
- std r10,PACA_EXGEN+EX_R10(r13)
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
- mfcr r9
- KVMTEST(0xc00)
- GET_SCRATCH0(r13)
-#else
- HMT_MEDIUM;
-#endif
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_RFID
- SYSCALL_PSERIES_3
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
-
- STD_EXCEPTION_PSERIES(0xd00, single_step)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00)
-
- /* At 0xe??? we have a bunch of hypervisor exceptions, we branch
- * out of line to handle them
- */
- . = 0xe00
-hv_data_storage_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_data_storage_hv
-
- . = 0xe20
-hv_instr_storage_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_instr_storage_hv
-
- . = 0xe40
-emulation_assist_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b emulation_assist_hv
-
- . = 0xe60
-hv_exception_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_early
-
- . = 0xe80
-hv_doorbell_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_doorbell_hv
-
- . = 0xea0
-hv_virt_irq_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_virt_irq_hv
-
- /* We need to deal with the Altivec unavailable exception
- * here which is at 0xf20, thus in the middle of the
- * prolog code of the PerformanceMonitor one. A little
- * trickery is thus necessary
- */
- . = 0xf00
-performance_monitor_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b performance_monitor_pSeries
-
- . = 0xf20
-altivec_unavailable_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b altivec_unavailable_pSeries
-
- . = 0xf40
-vsx_unavailable_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b vsx_unavailable_pSeries
-
- . = 0xf60
-facility_unavailable_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_pSeries
-
- . = 0xf80
-hv_facility_unavailable_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_hv
-
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
-#endif /* CONFIG_CBE_RAS */
-
- STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
-
- . = 0x1500
- .global denorm_exception_hv
-denorm_exception_hv:
- mtspr SPRN_SPRG_HSCRATCH0,r13
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
-
-#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
- addi r11,r11,-4 /* HSRR0 is next instruction */
- bne+ denorm_assist
-#endif
-
- KVMTEST(0x1500)
- EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
-
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
-#endif /* CONFIG_CBE_RAS */
-
- STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700)
-
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
-#else
- . = 0x1800
-#endif /* CONFIG_CBE_RAS */
-
-
-/*** Out of line interrupts support ***/
-
- .align 7
- /* moved from 0x200 */
-machine_check_powernv_early:
+EXC_REAL_END(machine_check, 0x200, 0x300)
+EXC_VIRT_NONE(0x4200, 0x4300)
+TRAMP_REAL_BEGIN(machine_check_powernv_early)
BEGIN_FTR_SECTION
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
@@ -472,7 +226,6 @@ BEGIN_FTR_SECTION
mfmsr r11 /* get MSR value */
ori r11,r11,MSR_ME /* turn on ME bit */
ori r11,r11,MSR_RI /* turn on RI bit */
- ld r12,PACAKBASE(r13) /* get high part of &label */
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r11
@@ -485,7 +238,6 @@ BEGIN_FTR_SECTION
*/
addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
ld r11,PACAKMSR(r13)
- ld r12,PACAKBASE(r13)
LOAD_HANDLER(r12, unrecover_mce)
li r10,MSR_ME
andc r11,r11,r10 /* Turn off MSR_ME */
@@ -493,295 +245,266 @@ BEGIN_FTR_SECTION
b . /* prevent speculative execution */
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-machine_check_pSeries:
+TRAMP_REAL_BEGIN(machine_check_pSeries)
.globl machine_check_fwnmi
machine_check_fwnmi:
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
machine_check_pSeries_0:
- EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
- EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
- KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
- KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400)
- KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
-
-#ifdef CONFIG_PPC_DENORMALISATION
-denorm_assist:
-BEGIN_FTR_SECTION
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER6 do that here for all FP regs.
- */
- mfmsr r10
- ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
- xori r10,r10,(MSR_FE0|MSR_FE1)
- mtmsrd r10
- sync
+ EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
+ /*
+ * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
+ * difference that MSR_RI is not enabled, because PACA_EXMC is being
+ * used, so nested machine check corrupts it. machine_check_common
+ * enables MSR_RI.
+ */
+ ld r10,PACAKMSR(r13)
+ xori r10,r10,MSR_RI
+ mfspr r11,SPRN_SRR0
+ LOAD_HANDLER(r12, machine_check_common)
+ mtspr SPRN_SRR0,r12
+ mfspr r12,SPRN_SRR1
+ mtspr SPRN_SRR1,r10
+ rfid
+ b . /* prevent speculative execution */
-#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
-#define FMR4(n) FMR2(n) ; FMR2(n+2)
-#define FMR8(n) FMR4(n) ; FMR4(n+4)
-#define FMR16(n) FMR8(n) ; FMR8(n+8)
-#define FMR32(n) FMR16(n) ; FMR16(n+16)
- FMR32(0)
+TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
-FTR_SECTION_ELSE
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER7 do that here for the first 32 VSX registers only.
- */
- mfmsr r10
- oris r10,r10,MSR_VSX@h
- mtmsrd r10
- sync
+EXC_COMMON_BEGIN(machine_check_common)
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXMC+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXMC+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ FINISH_NAP
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r3,PACA_EXMC+EX_DAR(r13)
+ lwz r4,PACA_EXMC+EX_DSISR(r13)
+ /* Enable MSR_RI when finished with PACA_EXMC */
+ li r10,MSR_RI
+ mtmsrd r10,1
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ b ret_from_except
-#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
-#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
-#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
-#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
-#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
- XVCPSGNDP32(0)
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r0,MSR_RI; \
+ mfmsr r9; /* get MSR value */ \
+ andc r9,r9,r0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Move original SRR0 and SRR1 into the respective regs */ \
+ ld r9,_MSR(r1); \
+ mtspr SPRN_SRR1,r9; \
+ ld r3,_NIP(r1); \
+ mtspr SPRN_SRR0,r3; \
+ ld r9,_CTR(r1); \
+ mtctr r9; \
+ ld r9,_XER(r1); \
+ mtxer r9; \
+ ld r9,_LINK(r1); \
+ mtlr r9; \
+ REST_GPR(0, r1); \
+ REST_8GPRS(2, r1); \
+ REST_GPR(10, r1); \
+ ld r11,_CCR(r1); \
+ mtcr r11; \
+ /* Decrement paca->in_mce. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ REST_GPR(11, r1); \
+ REST_2GPRS(12, r1); \
+ /* restore original r1. */ \
+ ld r1,GPR1(r1)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+ /*
+ * Handle machine check early in real mode. We come here with
+ * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+ */
+EXC_COMMON_BEGIN(machine_check_handle_early)
+ std r0,GPR0(r1) /* Save r0 */
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
+ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
+ beq 4f /* No, it wasn;t */
+ /* Thread was in power saving mode. Go back to nap again. */
+ cmpwi r11,2
+ blt 3f
+ /* Supervisor/Hypervisor state loss */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+3: bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ /*
+ * Check what idle state this CPU was in and go back to same mode
+ * again.
+ */
+ lbz r3,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi r3,PNV_THREAD_NAP
+ bgt 10f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+10:
+ cmpwi r3,PNV_THREAD_SLEEP
+ bgt 2f
+ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
-BEGIN_FTR_SECTION
- b denorm_done
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER8 we need to do that for all 64 VSX registers
- */
- XVCPSGNDP32(32)
-denorm_done:
- mtspr SPRN_HSRR0,r11
- mtcrf 0x80,r9
- ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
-BEGIN_FTR_SECTION
- ld r10,PACA_EXGEN+EX_CFAR(r13)
- mtspr SPRN_CFAR,r10
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- HRFID
- b .
+2:
+ /*
+ * Go back to winkle. Please note that this thread was woken up in
+ * machine check from winkle and have not restored the per-subcore
+ * state. Hence before going back to winkle, set last bit of HSPGR0
+ * to 1. This will make sure that if this thread gets woken up
+ * again at reset vector 0x100 then it will get chance to restore
+ * the subcore state.
+ */
+ ori r13,r13,1
+ SET_PACA(r13)
+ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+ /* No return */
+4:
#endif
-
- .align 7
- /* moved from 0xe00 */
- STD_EXCEPTION_HV_OOL(0xe02, h_data_storage)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
- STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
- STD_EXCEPTION_HV_OOL(0xe42, emulation_assist)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
- MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
-
- MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
-
- MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2)
-
- /* moved from 0xf00 */
- STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00)
- STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20)
- STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40)
- STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60)
- STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
-
-/*
- * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
- * - If it was a decrementer interrupt, we bump the dec to max and and return.
- * - If it was a doorbell we return immediately since doorbells are edge
- * triggered and won't automatically refire.
- * - If it was a HMI we return immediately since we handled it in realmode
- * and it won't refire.
- * - else we hard disable and return.
- * This is called with r10 containing the value to OR to the paca field.
- */
-#define MASKED_INTERRUPT(_H) \
-masked_##_H##interrupt: \
- std r11,PACA_EXGEN+EX_R11(r13); \
- lbz r11,PACAIRQHAPPENED(r13); \
- or r11,r11,r10; \
- stb r11,PACAIRQHAPPENED(r13); \
- cmpwi r10,PACA_IRQ_DEC; \
- bne 1f; \
- lis r10,0x7fff; \
- ori r10,r10,0xffff; \
- mtspr SPRN_DEC,r10; \
- b 2f; \
-1: cmpwi r10,PACA_IRQ_DBELL; \
- beq 2f; \
- cmpwi r10,PACA_IRQ_HMI; \
- beq 2f; \
- mfspr r10,SPRN_##_H##SRR1; \
- rldicl r10,r10,48,1; /* clear MSR_EE */ \
- rotldi r10,r10,16; \
- mtspr SPRN_##_H##SRR1,r10; \
-2: mtcrf 0x80,r9; \
- ld r9,PACA_EXGEN+EX_R9(r13); \
- ld r10,PACA_EXGEN+EX_R10(r13); \
- ld r11,PACA_EXGEN+EX_R11(r13); \
- GET_SCRATCH0(r13); \
- ##_H##rfid; \
- b .
-
- MASKED_INTERRUPT()
- MASKED_INTERRUPT(H)
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
+ /*
+ * Check if we are coming from hypervisor userspace. If yes then we
+ * continue in host kernel in V mode to deliver the MC event.
*/
- mfmsr r12
- mflr r11
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
- beq hardware_interrupt_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xe80
- beq h_doorbell_common
- cmpwi r3,0xea0
- beq h_virt_irq_common
- cmpwi r3,0xe60
- beq hmi_exception_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
- blr
-
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Vectors for the FWNMI option. Share common code.
- */
- .globl system_reset_fwnmi
- .align 7
-system_reset_fwnmi:
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
-
-#endif /* CONFIG_PPC_PSERIES */
+ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
+ beq 5f
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 9f /* continue in V mode if we are. */
+5:
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * We are coming from kernel context. Check if we are coming from
+ * guest. if yes, then we can continue. We will fall through
+ * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
*/
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne 9f /* continue if we are. */
+#endif
+ /*
+ * At this point we are not sure about what context we come from.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ bne 2f
+1: mfspr r11,SPRN_SRR0
+ LOAD_HANDLER(r10,unrecover_mce)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ /*
+ * We are going down. But there are chances that we might get hit by
+ * another MCE during panic path and we may run into unstable state
+ * with no way out. Hence, turn ME bit off while going down, so that
+ * when another MCE is hit during panic path, system will checkstop
+ * and hypervisor will get restarted cleanly by SP.
+ */
+ li r3,MSR_ME
+ andc r10,r10,r3 /* Turn off MSR_ME */
+ mtspr SPRN_SRR1,r10
rfid
b .
-
-kvmppc_skip_Hinterrupt:
+2:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
*/
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- hrfid
- b .
-#endif
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
+ beq 1b /* if !handled then panic */
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ rfid
+9:
+ /* Deliver the machine check to host kernel in V mode. */
+ MACHINE_CHECK_HANDLER_WINDUP
+ b machine_check_pSeries
-/*** Common interrupt handlers ***/
+EXC_COMMON_BEGIN(unrecover_mce)
+ /* Invoke machine_check_exception to print MCE event and panic. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ /*
+ * We will not reach here. Even if we did, there is no way out. Call
+ * unrecoverable_exception and die.
+ */
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
- STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
- STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
- STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
- STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
-#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
-#else
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
-#endif
- STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
- STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
- STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
- STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
- STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception)
-#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
-#else
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
-#endif
- STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ)
- STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
- STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
- STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
-#ifdef CONFIG_ALTIVEC
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
-#else
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
-#endif
+EXC_REAL(data_access, 0x300, 0x380)
+EXC_VIRT(data_access, 0x4300, 0x4380, 0x300)
+TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
+EXC_COMMON_BEGIN(data_access_common)
/*
- * Relocation-on interrupts: A subset of the interrupts can be delivered
- * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
- * it. Addresses are the same as the original interrupt addresses, but
- * offset by 0xc000000000004000.
- * It's impossible to receive interrupts below 0x300 via this mechanism.
- * KVM: None of these traps are from the guest ; anything that escalated
- * to HV=1 from HV=0 is delivered via real mode handlers.
+ * Here r13 points to the paca, r9 contains the saved CR,
+ * SRR0 and SRR1 are saved in r11 and r12,
+ * r9 - r13 are saved in paca->exgen.
*/
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r12,_MSR(r1)
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ li r5,0x300
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
+ b do_hash_page /* Try to handle as hpte fault */
+MMU_FTR_SECTION_ELSE
+ b handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
- /*
- * This uses the standard macro, since the original 0x300 vector
- * only has extra guff for STAB-based processors -- which never
- * come here.
- */
- STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
- . = 0x4380
- .globl data_access_slb_relon_pSeries
-data_access_slb_relon_pSeries:
+
+EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
mfspr r12,SPRN_SRR1
+ crset 4*cr6+eq
#ifndef CONFIG_RELOCATABLE
b slb_miss_realmode
#else
@@ -791,217 +514,221 @@ data_access_slb_relon_pSeries:
* the kernel ends up being put.
*/
mfctr r11
- ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
+EXC_REAL_END(data_access_slb, 0x380, 0x400)
- STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
- . = 0x4480
- .globl instruction_access_slb_relon_pSeries
-instruction_access_slb_relon_pSeries:
+EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+ mfspr r3,SPRN_DAR
mfspr r12,SPRN_SRR1
+ crset 4*cr6+eq
#ifndef CONFIG_RELOCATABLE
b slb_miss_realmode
#else
+ /*
+ * We can't just use a direct branch to slb_miss_realmode
+ * because the distance from here to there depends on where
+ * the kernel ends up being put.
+ */
mfctr r11
- ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10, slb_miss_realmode)
mtctr r10
bctr
#endif
+EXC_VIRT_END(data_access_slb, 0x4380, 0x4400)
+TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
- . = 0x4500
- .globl hardware_interrupt_relon_pSeries;
- .globl hardware_interrupt_relon_hv;
-hardware_interrupt_relon_pSeries:
-hardware_interrupt_relon_hv:
- BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
- FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
- STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
- STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
- STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
- MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
- STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
- MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super)
- STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
-
- . = 0x4c00
- .globl system_call_relon_pSeries
-system_call_relon_pSeries:
- HMT_MEDIUM
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_DIRECT
- SYSCALL_PSERIES_3
- STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
+EXC_REAL(instruction_access, 0x400, 0x480)
+EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400)
+TRAMP_KVM(PACA_EXGEN, 0x400)
- . = 0x4e00
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+EXC_COMMON_BEGIN(instruction_access_common)
+ EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r12,_MSR(r1)
+ ld r3,_NIP(r1)
+ andis. r4,r12,0x5820
+ li r5,0x400
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+BEGIN_MMU_FTR_SECTION
+ b do_hash_page /* Try to handle as hpte fault */
+MMU_FTR_SECTION_ELSE
+ b handle_page_fault
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
- . = 0x4e20
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
- . = 0x4e40
-emulation_assist_relon_trampoline:
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500)
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b emulation_assist_relon_hv
-
- . = 0x4e60
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+ mfspr r12,SPRN_SRR1
+ crclr 4*cr6+eq
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ mfctr r11
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+EXC_REAL_END(instruction_access_slb, 0x480, 0x500)
- . = 0x4e80
-h_doorbell_relon_trampoline:
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500)
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_doorbell_relon_hv
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+ mfspr r12,SPRN_SRR1
+ crclr 4*cr6+eq
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ mfctr r11
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500)
+TRAMP_KVM(PACA_EXSLB, 0x480)
- . = 0x4ea0
-h_virt_irq_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_virt_irq_relon_hv
- . = 0x4f00
-performance_monitor_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b performance_monitor_relon_pSeries
+/* This handler is used by both 0x380 and 0x480 slb miss interrupts */
+EXC_COMMON_BEGIN(slb_miss_realmode)
+ /*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
+ * We assume we aren't going to take any exceptions during this
+ * procedure.
+ */
+ mflr r10
+#ifdef CONFIG_RELOCATABLE
+ mtctr r11
+#endif
- . = 0x4f20
-altivec_unavailable_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b altivec_unavailable_relon_pSeries
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+ std r3,PACA_EXSLB+EX_DAR(r13)
- . = 0x4f40
-vsx_unavailable_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b vsx_unavailable_relon_pSeries
+ crset 4*cr0+eq
+#ifdef CONFIG_PPC_STD_MMU_64
+BEGIN_MMU_FTR_SECTION
+ bl slb_allocate_realmode
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+#endif
- . = 0x4f60
-facility_unavailable_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_relon_pSeries
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+ mtlr r10
- . = 0x4f80
-hv_facility_unavailable_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hv_facility_unavailable_relon_hv
+ beq 8f /* if bad address, make full stack frame */
- STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
-#ifdef CONFIG_PPC_DENORMALISATION
- . = 0x5500
- b denorm_exception_hv
-#endif
- STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- 2f
- .align 7
-system_call_entry:
- b system_call_common
+ /* All done -- return from exception. */
-ppc64_runlatch_on_trampoline:
- b __ppc64_runlatch_on
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x02,r9 /* I/D indication is in cr6 */
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
-/*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- */
- .align 7
- .globl data_access_common
-data_access_common:
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
-BEGIN_MMU_FTR_SECTION
- b do_hash_page /* Try to handle as hpte fault */
-MMU_FTR_SECTION_ELSE
- b handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
- .align 7
- .globl h_data_storage_common
-h_data_storage_common:
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
- bl save_nvgprs
+2: mfspr r11,SPRN_SRR0
+ LOAD_HANDLER(r10,unrecov_slb)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+
+8: mfspr r11,SPRN_SRR0
+ LOAD_HANDLER(r10,bad_addr_slb)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+
+EXC_COMMON_BEGIN(unrecov_slb)
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
- b ret_from_except
+ bl save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
- .align 7
- .globl instruction_access_common
-instruction_access_common:
- EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+EXC_COMMON_BEGIN(bad_addr_slb)
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,0x5820
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
-BEGIN_MMU_FTR_SECTION
- b do_hash_page /* Try to handle as hpte fault */
-MMU_FTR_SECTION_ELSE
- b handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+ ld r3, PACA_EXSLB+EX_DAR(r13)
+ std r3, _DAR(r1)
+ beq cr6, 2f
+ li r10, 0x480 /* fix trap number for I-SLB miss */
+ std r10, _TRAP(r1)
+2: bl save_nvgprs
+ addi r3, r1, STACK_FRAME_OVERHEAD
+ bl slb_miss_bad_addr
+ b ret_from_except
- STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
+EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600)
+ .globl hardware_interrupt_hv;
+hardware_interrupt_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_HV, SOFTEN_TEST_HV)
+do_kvm_H0x500:
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
+ FTR_SECTION_ELSE
+ _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_STD, SOFTEN_TEST_PR)
+do_kvm_0x500:
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+EXC_REAL_END(hardware_interrupt, 0x500, 0x600)
- /*
- * Machine check is different because we use a different
- * save area: PACA_EXMC instead of PACA_EXGEN.
- */
- .align 7
- .globl machine_check_common
-machine_check_common:
+EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600)
+ .globl hardware_interrupt_relon_hv;
+hardware_interrupt_relon_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+ FTR_SECTION_ELSE
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600)
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
- FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- b ret_from_except
+EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
- .align 7
- .globl alignment_common
-alignment_common:
+
+EXC_REAL(alignment, 0x600, 0x700)
+EXC_VIRT(alignment, 0x4600, 0x4700, 0x600)
+TRAMP_KVM(PACA_EXGEN, 0x600)
+EXC_COMMON_BEGIN(alignment_common)
mfspr r10,SPRN_DAR
std r10,PACA_EXGEN+EX_DAR(r13)
mfspr r10,SPRN_DSISR
@@ -1017,9 +744,11 @@ alignment_common:
bl alignment_exception
b ret_from_except
- .align 7
- .globl program_check_common
-program_check_common:
+
+EXC_REAL(program_check, 0x700, 0x800)
+EXC_VIRT(program_check, 0x4700, 0x4800, 0x700)
+TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_COMMON_BEGIN(program_check_common)
EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
@@ -1027,9 +756,11 @@ program_check_common:
bl program_check_exception
b ret_from_except
- .align 7
- .globl fp_unavailable_common
-fp_unavailable_common:
+
+EXC_REAL(fp_unavailable, 0x800, 0x900)
+EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800)
+TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_COMMON_BEGIN(fp_unavailable_common)
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
bne 1f /* if from user, just load it up */
bl save_nvgprs
@@ -1057,9 +788,250 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
bl fp_unavailable_tm
b ret_from_except
#endif
- .align 7
- .globl altivec_unavailable_common
-altivec_unavailable_common:
+
+
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x980)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900)
+TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
+
+
+EXC_REAL_HV(hdecrementer, 0x980, 0xa00)
+EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980)
+TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
+
+
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00)
+TRAMP_KVM(PACA_EXGEN, 0xa00)
+#ifdef CONFIG_PPC_DOORBELL
+EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
+#else
+EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
+#endif
+
+
+EXC_REAL(trap_0b, 0xb00, 0xc00)
+EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00)
+TRAMP_KVM(PACA_EXGEN, 0xb00)
+EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+
+
+#define LOAD_SYSCALL_HANDLER(reg) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(system_call_common))@l;
+
+/* Syscall routine is used twice, in reloc-off and reloc-on paths */
+#define SYSCALL_PSERIES_1 \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+ mr r9,r13 ; \
+ GET_PACA(r13) ; \
+ mfspr r11,SPRN_SRR0 ; \
+0:
+
+#define SYSCALL_PSERIES_2_RFID \
+ mfspr r12,SPRN_SRR1 ; \
+ LOAD_SYSCALL_HANDLER(r10) ; \
+ mtspr SPRN_SRR0,r10 ; \
+ ld r10,PACAKMSR(r13) ; \
+ mtspr SPRN_SRR1,r10 ; \
+ rfid ; \
+ b . ; /* prevent speculative execution */
+
+#define SYSCALL_PSERIES_3 \
+ /* Fast LE/BE switch system call */ \
+1: mfspr r12,SPRN_SRR1 ; \
+ xori r12,r12,MSR_LE ; \
+ mtspr SPRN_SRR1,r12 ; \
+ rfid ; /* return to userspace */ \
+ b . ; /* prevent speculative execution */
+
+#if defined(CONFIG_RELOCATABLE)
+ /*
+ * We can't branch directly so we do it via the CTR which
+ * is volatile across system calls.
+ */
+#define SYSCALL_PSERIES_2_DIRECT \
+ LOAD_SYSCALL_HANDLER(r12) ; \
+ mtctr r12 ; \
+ mfspr r12,SPRN_SRR1 ; \
+ li r10,MSR_RI ; \
+ mtmsrd r10,1 ; \
+ bctr ;
+#else
+ /* We can branch directly */
+#define SYSCALL_PSERIES_2_DIRECT \
+ mfspr r12,SPRN_SRR1 ; \
+ li r10,MSR_RI ; \
+ mtmsrd r10,1 ; /* Set RI (EE=0) */ \
+ b system_call_common ;
+#endif
+
+EXC_REAL_BEGIN(system_call, 0xc00, 0xd00)
+ /*
+ * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
+ * that support it) before changing to HMT_MEDIUM. That allows the KVM
+ * code to save that value into the guest state (it is the guest's PPR
+ * value). Otherwise just change to HMT_MEDIUM as userspace has
+ * already saved the PPR.
+ */
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
+ HMT_MEDIUM;
+ std r10,PACA_EXGEN+EX_R10(r13)
+ OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
+ mfcr r9
+ KVMTEST_PR(0xc00)
+ GET_SCRATCH0(r13)
+#else
+ HMT_MEDIUM;
+#endif
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_RFID
+ SYSCALL_PSERIES_3
+EXC_REAL_END(system_call, 0xc00, 0xd00)
+
+EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00)
+ HMT_MEDIUM
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_DIRECT
+ SYSCALL_PSERIES_3
+EXC_VIRT_END(system_call, 0x4c00, 0x4d00)
+
+TRAMP_KVM(PACA_EXGEN, 0xc00)
+
+
+EXC_REAL(single_step, 0xd00, 0xe00)
+EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00)
+TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_COMMON(single_step_common, 0xd00, single_step_exception)
+
+EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20)
+EXC_VIRT_NONE(0x4e00, 0x4e20)
+TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+EXC_COMMON_BEGIN(h_data_storage_common)
+ mfspr r10,SPRN_HDAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_HDSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ bl save_nvgprs
+ RECONCILE_IRQ_STATE(r10, r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_except
+
+
+EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40)
+EXC_VIRT_NONE(0x4e20, 0x4e40)
+TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
+
+
+EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60)
+EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40)
+TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
+
+
+/*
+ * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
+ * first, and then eventaully from there to the trampoline to get into virtual
+ * mode.
+ */
+__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early)
+__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+EXC_VIRT_NONE(0x4e60, 0x4e80)
+TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
+TRAMP_REAL_BEGIN(hmi_exception_early)
+ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60)
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ std r9,_CCR(r1) /* save CR in stackframe */
+ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
+ std r11,_NIP(r1) /* save HSRR0 in stackframe */
+ mfspr r12,SPRN_HSRR1 /* Save SRR1 */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
+ EXCEPTION_PROLOG_COMMON_3(0xe60)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl hmi_exception_realmode
+ /* Windup the stack. */
+ /* Move original HSRR0 and HSRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_HSRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_HSRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+
+ /*
+ * Go to virtual mode and pull the HMI event information from
+ * firmware.
+ */
+ .globl hmi_exception_after_realmode
+hmi_exception_after_realmode:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b tramp_real_hmi_exception
+
+EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception)
+
+
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80)
+TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+#ifdef CONFIG_PPC_DOORBELL
+EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
+#else
+EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
+#endif
+
+
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0)
+TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
+
+
+EXC_REAL_NONE(0xec0, 0xf00)
+EXC_VIRT_NONE(0x4ec0, 0x4f00)
+
+
+EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20)
+EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00)
+TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
+
+
+EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40)
+EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20)
+TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_COMMON_BEGIN(altivec_unavailable_common)
EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
@@ -1092,9 +1064,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bl altivec_unavailable_exception
b ret_from_except
- .align 7
- .globl vsx_unavailable_common
-vsx_unavailable_common:
+
+EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60)
+EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40)
+TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_COMMON_BEGIN(vsx_unavailable_common)
EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
@@ -1126,323 +1100,284 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl vsx_unavailable_exception
b ret_from_except
- /* Equivalents to the above handlers for relocation-on interrupt vectors */
- STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
- MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
- MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
- STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80)
+EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60)
+TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
- /*
- * The __end_interrupts marker must be past the out-of-line (OOL)
- * handlers, so that they are copied to real address 0x100 when running
- * a relocatable kernel. This ensures they can be reached from the short
- * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
- * directly, without using LOAD_HANDLER().
- */
- .align 7
- .globl __end_interrupts
-__end_interrupts:
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0)
+EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80)
+TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
+
+
+EXC_REAL_NONE(0xfa0, 0x1200)
+EXC_VIRT_NONE(0x4fa0, 0x5200)
+
+#ifdef CONFIG_CBE_RAS
+EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300)
+EXC_VIRT_NONE(0x5200, 0x5300)
+TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1200, 0x1300)
+EXC_VIRT_NONE(0x5200, 0x5300)
+#endif
+
+
+EXC_REAL(instruction_breakpoint, 0x1300, 0x1400)
+EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300)
+TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
+EXC_REAL_NONE(0x1400, 0x1500)
+EXC_VIRT_NONE(0x5400, 0x5500)
+
+EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600)
+ mtspr SPRN_SPRG_HSCRATCH0,r13
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
+
+#ifdef CONFIG_PPC_DENORMALISATION
+ mfspr r10,SPRN_HSRR1
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ addi r11,r11,-4 /* HSRR0 is next instruction */
+ bne+ denorm_assist
+#endif
+
+ KVMTEST_PR(0x1500)
+ EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
+EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600)
+
+#ifdef CONFIG_PPC_DENORMALISATION
+EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600)
+ b exc_real_0x1500_denorm_exception_hv
+EXC_VIRT_END(denorm_exception, 0x5500, 0x5600)
+#else
+EXC_VIRT_NONE(0x5500, 0x5600)
+#endif
+
+TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+
+#ifdef CONFIG_PPC_DENORMALISATION
+TRAMP_REAL_BEGIN(denorm_assist)
+BEGIN_FTR_SECTION
/*
- * Data area reserved for FWNMI option.
- * This address (0x7000) is fixed by the RPA.
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER6 do that here for all FP regs.
*/
- .= 0x7000
- .globl fwnmi_data_area
-fwnmi_data_area:
+ mfmsr r10
+ ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
+ xori r10,r10,(MSR_FE0|MSR_FE1)
+ mtmsrd r10
+ sync
- /* pseries and powernv need to keep the whole page from
- * 0x7000 to 0x8000 free for use by the firmware
- */
- . = 0x8000
-#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
+#define FMR4(n) FMR2(n) ; FMR2(n+2)
+#define FMR8(n) FMR4(n) ; FMR4(n+4)
+#define FMR16(n) FMR8(n) ; FMR8(n+8)
+#define FMR32(n) FMR16(n) ; FMR16(n+16)
+ FMR32(0)
+
+FTR_SECTION_ELSE
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER7 do that here for the first 32 VSX registers only.
+ */
+ mfmsr r10
+ oris r10,r10,MSR_VSX@h
+ mtmsrd r10
+ sync
+
+#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
+#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
+#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
+#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
+#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
+ XVCPSGNDP32(0)
+
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ b denorm_done
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER8 we need to do that for all 64 VSX registers
+ */
+ XVCPSGNDP32(32)
+denorm_done:
+ mtspr SPRN_HSRR0,r11
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_CFAR(r13)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFID
+ b .
+#endif
+
+EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
- STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
- STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
- STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
- STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
-#endif /* CONFIG_CBE_RAS */
-
- .globl hmi_exception_early
-hmi_exception_early:
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
- subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl hmi_exception_realmode
- /* Windup the stack. */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
+EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700)
+EXC_VIRT_NONE(0x5600, 0x5700)
+TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1600, 0x1700)
+EXC_VIRT_NONE(0x5600, 0x5700)
+#endif
- /*
- * Go to virtual mode and pull the HMI event information from
- * firmware.
- */
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_hv
+EXC_REAL(altivec_assist, 0x1700, 0x1800)
+EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700)
+TRAMP_KVM(PACA_EXGEN, 0x1700)
+#ifdef CONFIG_ALTIVEC
+EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
+#else
+EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
+#endif
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r0,MSR_RI; \
- mfmsr r9; /* get MSR value */ \
- andc r9,r9,r0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Move original SRR0 and SRR1 into the respective regs */ \
- ld r9,_MSR(r1); \
- mtspr SPRN_SRR1,r9; \
- ld r3,_NIP(r1); \
- mtspr SPRN_SRR0,r3; \
- ld r9,_CTR(r1); \
- mtctr r9; \
- ld r9,_XER(r1); \
- mtxer r9; \
- ld r9,_LINK(r1); \
- mtlr r9; \
- REST_GPR(0, r1); \
- REST_8GPRS(2, r1); \
- REST_GPR(10, r1); \
- ld r11,_CCR(r1); \
- mtcr r11; \
- /* Decrement paca->in_mce. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- REST_GPR(11, r1); \
- REST_2GPRS(12, r1); \
- /* restore original r1. */ \
- ld r1,GPR1(r1)
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
- .align 7
- .globl machine_check_handle_early
-machine_check_handle_early:
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss or
- * supervisor state loss
- *
- * Go back to nap again if (b) is true.
- */
- rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
- beq 4f /* No, it wasn;t */
- /* Thread was in power saving mode. Go back to nap again. */
- cmpwi r11,2
- bne 3f
- /* Supervisor state loss */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-3: bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- GET_PACA(r13)
- ld r1,PACAR1(r13)
- li r3,PNV_THREAD_NAP
- b pnv_enter_arch207_idle_mode
-4:
+#ifdef CONFIG_CBE_RAS
+EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900)
+EXC_VIRT_NONE(0x5800, 0x5900)
+TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
+#else /* CONFIG_CBE_RAS */
+EXC_REAL_NONE(0x1800, 0x1900)
+EXC_VIRT_NONE(0x5800, 0x5900)
#endif
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
- andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-5:
+
+/*
+ * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
+ * - If it was a decrementer interrupt, we bump the dec to max and and return.
+ * - If it was a doorbell we return immediately since doorbells are edge
+ * triggered and won't automatically refire.
+ * - If it was a HMI we return immediately since we handled it in realmode
+ * and it won't refire.
+ * - else we hard disable and return.
+ * This is called with r10 containing the value to OR to the paca field.
+ */
+#define MASKED_INTERRUPT(_H) \
+masked_##_H##interrupt: \
+ std r11,PACA_EXGEN+EX_R11(r13); \
+ lbz r11,PACAIRQHAPPENED(r13); \
+ or r11,r11,r10; \
+ stb r11,PACAIRQHAPPENED(r13); \
+ cmpwi r10,PACA_IRQ_DEC; \
+ bne 1f; \
+ lis r10,0x7fff; \
+ ori r10,r10,0xffff; \
+ mtspr SPRN_DEC,r10; \
+ b 2f; \
+1: cmpwi r10,PACA_IRQ_DBELL; \
+ beq 2f; \
+ cmpwi r10,PACA_IRQ_HMI; \
+ beq 2f; \
+ mfspr r10,SPRN_##_H##SRR1; \
+ rldicl r10,r10,48,1; /* clear MSR_EE */ \
+ rotldi r10,r10,16; \
+ mtspr SPRN_##_H##SRR1,r10; \
+2: mtcrf 0x80,r9; \
+ ld r9,PACA_EXGEN+EX_R9(r13); \
+ ld r10,PACA_EXGEN+EX_R10(r13); \
+ ld r11,PACA_EXGEN+EX_R11(r13); \
+ GET_SCRATCH0(r13); \
+ ##_H##rfid; \
+ b .
+
+/*
+ * Real mode exceptions actually use this too, but alternate
+ * instruction code patches (which end up in the common .text area)
+ * cannot reach these if they are put there.
+ */
+USE_FIXED_SECTION(virt_trampolines)
+ MASKED_INTERRUPT()
+ MASKED_INTERRUPT(H)
+
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
/*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- /*
- * We are going down. But there are chances that we might get hit by
- * another MCE during panic path and we may run into unstable state
- * with no way out. Hence, turn ME bit off while going down, so that
- * when another MCE is hit during panic path, system will checkstop
- * and hypervisor will get restarted cleanly by SP.
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
*/
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
+ mfspr r13, SPRN_SRR0
+ addi r13, r13, 4
+ mtspr SPRN_SRR0, r13
+ GET_SCRATCH0(r13)
rfid
b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
- beq 1b /* if !handled then panic */
+TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
/*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
*/
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- rfid
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- b machine_check_pSeries
+ mfspr r13, SPRN_HSRR0
+ addi r13, r13, 4
+ mtspr SPRN_HSRR0, r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+#endif
-unrecover_mce:
- /* Invoke machine_check_exception to print MCE event and panic. */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- /*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
- */
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
+ * Ensure that any handlers that get invoked from the exception prologs
+ * above are below the first 64KB (0x10000) of the kernel image because
+ * the prologs assemble the addresses of these handlers using the
+ * LOAD_HANDLER macro, which uses an ori instruction.
*/
-slb_miss_realmode:
- mflr r10
-#ifdef CONFIG_RELOCATABLE
- mtctr r11
-#endif
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
-#ifdef CONFIG_PPC_STD_MMU_64
-BEGIN_MMU_FTR_SECTION
- bl slb_allocate_realmode
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
-#endif
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-
- mtlr r10
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
-BEGIN_MMU_FTR_SECTION
- beq- 2f
-FTR_SECTION_ELSE
- b 2f
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+/*** Common interrupt handlers ***/
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
+ /*
+ * Relocation-on interrupts: A subset of the interrupts can be delivered
+ * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
+ * it. Addresses are the same as the original interrupt addresses, but
+ * offset by 0xc000000000004000.
+ * It's impossible to receive interrupts below 0x300 via this mechanism.
+ * KVM: None of these traps are from the guest ; anything that escalated
+ * to HV=1 from HV=0 is delivered via real mode handlers.
+ */
-2: mfspr r11,SPRN_SRR0
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10,unrecov_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- rfid
- b .
+ /*
+ * This uses the standard macro, since the original 0x300 vector
+ * only has extra guff for STAB-based processors -- which never
+ * come here.
+ */
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- RECONCILE_IRQ_STATE(r10, r11)
- bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
+EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
+ b __ppc64_runlatch_on
+USE_FIXED_SECTION(virt_trampolines)
+ /*
+ * The __end_interrupts marker must be past the out-of-line (OOL)
+ * handlers, so that they are copied to real address 0x100 when running
+ * a relocatable kernel. This ensures they can be reached from the short
+ * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
+ * directly, without using LOAD_HANDLER().
+ */
+ .align 7
+ .globl __end_interrupts
+__end_interrupts:
+DEFINE_FIXED_SYMBOL(__end_interrupts)
#ifdef CONFIG_PPC_970_NAP
-power4_fixup_nap:
+TRAMP_REAL_BEGIN(power4_fixup_nap)
andc r9,r9,r10
std r9,TI_LOCAL_FLAGS(r11)
ld r10,_LINK(r1) /* make idle task do the */
@@ -1450,6 +1385,13 @@ power4_fixup_nap:
blr
#endif
+CLOSE_FIXED_SECTION(real_vectors);
+CLOSE_FIXED_SECTION(real_trampolines);
+CLOSE_FIXED_SECTION(virt_vectors);
+CLOSE_FIXED_SECTION(virt_trampolines);
+
+USE_TEXT_SECTION()
+
/*
* Hash table stuff
*/
@@ -1595,3 +1537,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_bad_stack
b 1b
+
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
+ * which kind of interrupt. MSR:EE is already off. We generate a
+ * stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about, so we don't bother storing them.
+ */
+ mfmsr r12
+ mflr r11
+ mfcr r9
+ ori r12,r12,MSR_EE
+ cmpwi r3,0x900
+ beq decrementer_common
+ cmpwi r3,0x500
+ beq hardware_interrupt_common
+BEGIN_FTR_SECTION
+ cmpwi r3,0xe80
+ beq h_doorbell_common
+ cmpwi r3,0xea0
+ beq h_virt_irq_common
+ cmpwi r3,0xe60
+ beq hmi_exception_common
+FTR_SECTION_ELSE
+ cmpwi r3,0xa00
+ beq doorbell_super_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ blr
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b3a663333d36..8f0c7c5d93f2 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -333,6 +333,11 @@ int __init fadump_reserve_mem(void)
return 1;
}
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
+
/* Look for fadump= cmdline option. */
static int __init early_fadump_param(char *p)
{
@@ -778,7 +783,11 @@ static int fadump_init_elfcore_header(char *bufp)
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
+#if defined(_CALL_ELF)
+ elf->e_flags = _CALL_ELF;
+#else
+ elf->e_flags = 0;
+#endif
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = 0;
@@ -1104,7 +1113,9 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
* Take away the '/proc/vmcore'. We are releasing the dump
* memory, hence it will not be valid anymore.
*/
+#ifdef CONFIG_PROC_VMCORE
vmcore_cleanup();
+#endif
fadump_invalidate_release_mem();
} else
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 15da2b5df85e..08d14b096eb9 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -50,32 +50,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_fpu(struct thread_struct *thread)
- *
- * This is similar to load_up_fpu but for the transactional version of the FP
- * register set. It doesn't mess with the task MSR or valid flags.
- * Furthermore, we don't do lazy FP with TM currently.
- */
-_GLOBAL(do_load_up_transact_fpu)
- mfmsr r6
- ori r5,r6,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- SYNC
- MTMSRD(r5)
-
- addi r7,r3,THREAD_TRANSACT_FPSTATE
- lfd fr0,FPSTATE_FPSCR(r7)
- MTFSF_L(fr0)
- REST_32FPVSRS(0, R4, R7)
-
- blr
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
/*
* Load state from memory into FP registers including FPSCR.
* Assumes the caller has enabled FP in the MSR.
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index cc52d9795f88..a95639b8d4ac 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
if (!ftrace_graph_entry(&trace))
goto out;
- if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+ NULL) == -EBUSY)
goto out;
parent = return_hooker;
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index dc0488b6f6e1..a3f821eb7e9a 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -266,7 +266,6 @@ __secondary_hold_acknowledge:
#define EXCEPTION_PROLOG_2 \
- CLR_TOP32(r11); \
stw r10,_CCR(r11); /* save registers */ \
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
@@ -862,7 +861,6 @@ __secondary_start:
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
- CLR_TOP32(r4)
mtspr SPRN_SPRG_THREAD,r4
li r3,0
mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
@@ -949,7 +947,6 @@ start_here:
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
- CLR_TOP32(r4)
mtspr SPRN_SPRG_THREAD,r4
li r3,0
mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f765b0434731..79da0641bae2 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -28,6 +28,7 @@
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
+#include <asm/head-64.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
@@ -65,9 +66,14 @@
* 2. The kernel is entered at __start
*/
- .text
- .globl _stext
-_stext:
+OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
+USE_FIXED_SECTION(first_256B)
+ /*
+ * Offsets are relative from the start of fixed section, and
+ * first_256B starts at 0. Offsets are a bit easier to use here
+ * than the fixed section entry macros.
+ */
+ . = 0x0
_GLOBAL(__start)
/* NOP this out unconditionally */
BEGIN_FTR_SECTION
@@ -104,6 +110,7 @@ __secondary_hold_acknowledge:
. = 0x5c
.globl __run_at_load
__run_at_load:
+DEFINE_FIXED_SYMBOL(__run_at_load)
.long 0x72756e30 /* "run0" -- relocate to 0 by default */
#endif
@@ -133,7 +140,7 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,__secondary_hold_acknowledge-_stext(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
sync
li r26,0
@@ -141,7 +148,7 @@ __secondary_hold:
tovirt(r26,r26)
#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,__secondary_hold_spinloop-_stext(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
cmpdi 0,r12,0
beq 100b
@@ -166,12 +173,13 @@ __secondary_hold:
#else
BUG_OPCODE
#endif
+CLOSE_FIXED_SECTION(first_256B)
/* This value is used to mark exception frames on the stack. */
.section ".toc","aw"
exception_marker:
.tc ID_72656773_68657265[TC],0x7265677368657265
- .text
+ .previous
/*
* On server, we include the exception vectors code here as it
@@ -180,8 +188,12 @@ exception_marker:
*/
#ifdef CONFIG_PPC_BOOK3S
#include "exceptions-64s.S"
+#else
+OPEN_TEXT_SECTION(0x100)
#endif
+USE_TEXT_SECTION()
+
#ifdef CONFIG_PPC_BOOK3E
/*
* The booting_thread_hwid holds the thread id we want to boot in cpu
@@ -558,7 +570,7 @@ __after_prom_start:
#if defined(CONFIG_PPC_BOOK3E)
tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
#endif
- lwz r7,__run_at_load-_stext(r26)
+ lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
#if defined(CONFIG_PPC_BOOK3E)
tophys(r26,r26)
#endif
@@ -601,7 +613,7 @@ __after_prom_start:
#if defined(CONFIG_PPC_BOOK3E)
tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
#endif
- lwz r7,__run_at_load-_stext(r26)
+ lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1
bne 3f
@@ -611,28 +623,35 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
+ LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
#endif
- lis r5,(copy_to_here - _stext)@ha
- addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+ /* # bytes of memory to copy */
+ lis r5,(ABS_ADDR(copy_to_here))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
- addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
- addi r12,r8,(4f - _stext)@l /* that we just made */
+ /* Jump to the copy of this code that we just made */
+ addis r8,r3,(ABS_ADDR(4f))@ha
+ addi r12,r8,(ABS_ADDR(4f))@l
mtctr r12
bctr
.balign 8
-p_end: .llong _end - _stext
+p_end: .llong _end - copy_to_here
-4: /* Now copy the rest of the kernel up to _end */
- addis r5,r26,(p_end - _stext)@ha
- ld r5,(p_end - _stext)@l(r5) /* get _end */
+4:
+ /*
+ * Now copy the rest of the kernel up to _end, add
+ * _end - copy_to_here to the copy limit and run again.
+ */
+ addis r8,r26,(ABS_ADDR(p_end))@ha
+ ld r8,(ABS_ADDR(p_end))@l(r8)
+ add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
9: b start_here_multiplatform
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 43ddaae42baf..3a185c51ce8f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -151,7 +151,6 @@ turn_on_mmu:
#define EXCEPTION_PROLOG_2 \
- CLR_TOP32(r11); \
stw r10,_CCR(r11); /* save registers */ \
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index aec9a1b1d25b..9781c69eae57 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -206,7 +206,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
/*
* Handle debug exception notifications.
*/
-int __kprobes hw_breakpoint_handler(struct die_args *args)
+int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
@@ -290,11 +290,12 @@ out:
rcu_read_unlock();
return rc;
}
+NOKPROBE_SYMBOL(hw_breakpoint_handler);
/*
* Handle single-step exceptions following a DABR hit.
*/
-static int __kprobes single_step_dabr_instruction(struct die_args *args)
+static int single_step_dabr_instruction(struct die_args *args)
{
struct pt_regs *regs = args->regs;
struct perf_event *bp = NULL;
@@ -329,11 +330,12 @@ static int __kprobes single_step_dabr_instruction(struct die_args *args)
return NOTIFY_STOP;
}
+NOKPROBE_SYMBOL(single_step_dabr_instruction);
/*
* Handle debug exception notifications.
*/
-int __kprobes hw_breakpoint_exceptions_notify(
+int hw_breakpoint_exceptions_notify(
struct notifier_block *unused, unsigned long val, void *data)
{
int ret = NOTIFY_DONE;
@@ -349,6 +351,7 @@ int __kprobes hw_breakpoint_exceptions_notify(
return ret;
}
+NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
/*
* Release the user breakpoints used by ptrace
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index c1ca9282f4a0..6ca9a2ffaac7 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -227,7 +227,7 @@ int ibmebus_request_irq(u32 ist, irq_handler_t handler,
{
unsigned int irq = irq_create_mapping(NULL, ist);
- if (irq == NO_IRQ)
+ if (!irq)
return -EINVAL;
return request_irq(irq, handler, irq_flags, devname, dev_id);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index ba79d15f4ddd..bd739fed26e3 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -44,18 +44,6 @@
PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
PSSCR_MTL_MASK
-/* Idle state entry routines */
-
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
- ptesync; \
- ld r0,0(r1); \
-1: cmp cr0,r0,r0; \
- bne 1b; \
- IDLE_INST; \
- b .
-
.text
/*
@@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop)
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
*/
_GLOBAL(pnv_restore_hyp_resource)
- ld r2,PACATOC(r13);
BEGIN_FTR_SECTION
+ ld r2,PACATOC(r13);
/*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
@@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*/
clrldi r5,r13,63
clrrdi r13,r13,1
+
+ /* Now that we are sure r13 is corrected, load TOC */
+ ld r2,PACATOC(r13);
cmpwi cr4,r5,1
mtspr SPRN_HSPRG0,r13
@@ -420,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*
* r13 - PACA
* cr3 - gt if waking up with partial/complete hypervisor state loss
- * cr4 - eq if waking up from complete hypervisor state loss.
+ * cr4 - gt or eq if waking up from complete hypervisor state loss.
*/
_GLOBAL(pnv_wakeup_tb_loss)
ld r1,PACAR1(r13)
@@ -462,7 +453,7 @@ lwarx_loop2:
* At this stage
* cr2 - eq if first thread to wakeup in core
* cr3- gt if waking up with partial/complete hypervisor state loss
- * cr4 - eq if waking up from complete hypervisor state loss.
+ * cr4 - gt or eq if waking up from complete hypervisor state loss.
*/
ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
@@ -490,7 +481,7 @@ first_thread_in_subcore:
* If waking up from sleep, subcore state is not lost. Hence
* skip subcore state restore
*/
- bne cr4,subcore_state_restored
+ blt cr4,subcore_state_restored
/* Restore per-subcore state */
ld r4,_SDR1(r1)
@@ -535,7 +526,7 @@ timebase_resync:
* If waking up from sleep, per core state is not lost, skip to
* clear_lock.
*/
- bne cr4,clear_lock
+ blt cr4,clear_lock
/*
* First thread in the core to wake up and its waking up with
@@ -566,7 +557,7 @@ common_exit:
* If waking up from sleep, hypervisor state is not lost. Hence
* skip hypervisor state restore.
*/
- bne cr4,hypervisor_state_restored
+ blt cr4,hypervisor_state_restored
/* Waking up from winkle */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 08887cf2b20e..3c05c311e35e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -67,6 +67,7 @@
#include <asm/smp.h>
#include <asm/debug.h>
#include <asm/livepatch.h>
+#include <asm/asm-prototypes.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -156,6 +157,15 @@ notrace unsigned int __check_irq_replay(void)
}
/*
+ * Check if an hypervisor Maintenance interrupt happened.
+ * This is a higher priority interrupt than the others, so
+ * replay it first.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HMI;
+ if (happened & PACA_IRQ_HMI)
+ return 0xe60;
+
+ /*
* We may have missed a decrementer interrupt. We check the
* decrementer itself rather than the paca irq_happened field
* in case we also had a rollover while hard disabled
@@ -190,11 +200,6 @@ notrace unsigned int __check_irq_replay(void)
}
#endif /* CONFIG_PPC_BOOK3E */
- /* Check if an hypervisor Maintenance interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- if (happened & PACA_IRQ_HMI)
- return 0xe60;
-
/* There should be nothing left ! */
BUG_ON(local_paca->irq_happened != 0);
@@ -514,7 +519,7 @@ void __do_irq(struct pt_regs *regs)
may_hard_irq_enable();
/* And finally process it */
- if (unlikely(irq == NO_IRQ))
+ if (unlikely(!irq))
__this_cpu_inc(irq_stat.spurious_irqs);
else
generic_handle_irq(irq);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 3ed8ec09b5c9..e785cc9e1ecd 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -29,7 +29,7 @@
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
#include <asm/code-patching.h>
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 7b750c4ed5c7..bc525ea0dc09 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -193,10 +193,10 @@ static int __init add_legacy_soc_port(struct device_node *np,
*/
if (tsi && !strcmp(tsi->type, "tsi-bridge"))
return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
- NO_IRQ, legacy_port_flags, 0);
+ 0, legacy_port_flags, 0);
else
return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
- NO_IRQ, legacy_port_flags, 0);
+ 0, legacy_port_flags, 0);
}
static int __init add_legacy_isa_port(struct device_node *np,
@@ -242,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
/* Add port, irq will be dealt with later */
return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
- taddr, NO_IRQ, legacy_port_flags, 0);
+ taddr, 0, legacy_port_flags, 0);
}
@@ -314,7 +314,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
/* Add port, irq will be dealt with later. We passed a translated
* IO port value. It will be fixed up later along with the irq
*/
- return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
+ return add_legacy_port(np, index, iotype, base, addr, 0,
legacy_port_flags, np != pci_dev);
}
#endif
@@ -462,14 +462,14 @@ static void __init fixup_port_irq(int index,
DBG("fixup_port_irq(%d)\n", index);
virq = irq_of_parse_and_map(np, 0);
- if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) {
+ if (!virq && legacy_serial_infos[index].irq_check_parent) {
np = of_get_parent(np);
if (np == NULL)
return;
virq = irq_of_parse_and_map(np, 0);
of_node_put(np);
}
- if (virq == NO_IRQ)
+ if (!virq)
return;
port->irq = virq;
@@ -543,7 +543,7 @@ static int __init serial_dev_init(void)
struct plat_serial8250_port *port = &legacy_serial_ports[i];
struct device_node *np = legacy_serial_infos[i].np;
- if (port->irq == NO_IRQ)
+ if (!port->irq)
fixup_port_irq(i, np, port);
if (port->iotype == UPIO_PORT)
fixup_port_pio(i, np, port);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 4c780a342282..a205fa3d9bf3 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -23,6 +23,7 @@
#include <asm/current.h>
#include <asm/machdep.h>
#include <asm/cacheflush.h>
+#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
@@ -31,21 +32,6 @@
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
-#ifdef CONFIG_PPC_BOOK3E
-int default_machine_kexec_prepare(struct kimage *image)
-{
- int i;
- /*
- * Since we use the kernel fault handlers and paging code to
- * handle the virtual mode, we must make sure no destination
- * overlaps kernel static data or bss.
- */
- for (i = 0; i < image->nr_segments; i++)
- if (image->segment[i].mem < __pa(_end))
- return -ETXTBSY;
- return 0;
-}
-#else
int default_machine_kexec_prepare(struct kimage *image)
{
int i;
@@ -55,9 +41,6 @@ int default_machine_kexec_prepare(struct kimage *image)
const unsigned long *basep;
const unsigned int *sizep;
- if (!mmu_hash_ops.hpte_clear_all)
- return -ENOENT;
-
/*
* Since we use the kernel fault handlers and paging code to
* handle the virtual mode, we must make sure no destination
@@ -67,31 +50,6 @@ int default_machine_kexec_prepare(struct kimage *image)
if (image->segment[i].mem < __pa(_end))
return -ETXTBSY;
- /*
- * For non-LPAR, we absolutely can not overwrite the mmu hash
- * table, since we are still using the bolted entries in it to
- * do the copy. Check that here.
- *
- * It is safe if the end is below the start of the blocked
- * region (end <= low), or if the beginning is after the
- * end of the blocked region (begin >= high). Use the
- * boolean identity !(a || b) === (!a && !b).
- */
-#ifdef CONFIG_PPC_STD_MMU_64
- if (htab_address) {
- low = __pa(htab_address);
- high = low + htab_size_bytes;
-
- for (i = 0; i < image->nr_segments; i++) {
- begin = image->segment[i].mem;
- end = begin + image->segment[i].memsz;
-
- if ((begin < high) && (end > low))
- return -ETXTBSY;
- }
- }
-#endif /* CONFIG_PPC_STD_MMU_64 */
-
/* We also should not overwrite the tce tables */
for_each_node_by_type(node, "pci") {
basep = of_get_property(node, "linux,tce-base", NULL);
@@ -113,7 +71,6 @@ int default_machine_kexec_prepare(struct kimage *image)
return 0;
}
-#endif /* !CONFIG_PPC_BOOK3E */
static void copy_segments(unsigned long ind)
{
@@ -332,11 +289,14 @@ struct paca_struct kexec_paca;
/* Our assembly helper, in misc_64.S */
extern void kexec_sequence(void *newstack, unsigned long start,
void *image, void *control,
- void (*clear_all)(void)) __noreturn;
+ void (*clear_all)(void),
+ bool copy_with_mmu_off) __noreturn;
/* too late to fail here */
void default_machine_kexec(struct kimage *image)
{
+ bool copy_with_mmu_off;
+
/* prepare control code if any */
/*
@@ -374,18 +334,29 @@ void default_machine_kexec(struct kimage *image)
/* XXX: If anyone does 'dynamic lppacas' this will also need to be
* switched to a static version!
*/
+ /*
+ * On Book3S, the copy must happen with the MMU off if we are either
+ * using Radix page tables or we are not in an LPAR since we can
+ * overwrite the page tables while copying.
+ *
+ * In an LPAR, we keep the MMU on otherwise we can't access beyond
+ * the RMA. On BookE there is no real MMU off mode, so we have to
+ * keep it enabled as well (but then we have bolted TLB entries).
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ copy_with_mmu_off = false;
+#else
+ copy_with_mmu_off = radix_enabled() ||
+ !(firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1));
+#endif
/* Some things are best done in assembly. Finding globals with
* a toc is easier in C, so pass in what we can.
*/
kexec_sequence(&kexec_stack, image->start, image,
- page_address(image->control_code_page),
-#ifdef CONFIG_PPC_STD_MMU
- mmu_hash_ops.hpte_clear_all
-#else
- NULL
-#endif
- );
+ page_address(image->control_code_page),
+ mmu_cleanup_all, copy_with_mmu_off);
/* NOTREACHED */
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ef267fd9dd22..5e7ece0fda9f 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->in_use = 1;
mce->initiator = MCE_INITIATOR_CPU;
- if (handled)
+ /* Mark it recovered if we have handled it and MSR(RI=1). */
+ if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d9c912b6e632..03756ffdcd71 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -328,7 +328,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_KPROBE(flush_icache_range)
+_GLOBAL(flush_icache_range)
BEGIN_FTR_SECTION
PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
@@ -358,6 +358,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
sync /* additional sync needed on g4 */
isync
blr
+_ASM_NOKPROBE_SYMBOL(flush_icache_range)
+
/*
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index cb195157b318..9f0bed214bcb 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -66,7 +66,7 @@ PPC64_CACHES:
* flush all bytes from start through stop-1 inclusive
*/
-_KPROBE(flush_icache_range)
+_GLOBAL(flush_icache_range)
BEGIN_FTR_SECTION
PURGE_PREFETCHED_INS
blr
@@ -109,7 +109,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
bdnz 2b
isync
blr
- .previous .text
+_ASM_NOKPROBE_SYMBOL(flush_icache_range)
+
/*
* Like above, but only do the D-cache.
*
@@ -591,7 +592,8 @@ real_mode: /* assume normal blr return */
#endif
/*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(),
+ copy_with_mmu_off)
*
* does the grungy work with stack switching and real mode switches
* also does simple calls to other code
@@ -627,7 +629,7 @@ _GLOBAL(kexec_sequence)
mr r29,r5 /* image (virt) */
mr r28,r6 /* control, unused */
mr r27,r7 /* clear_all() fn desc */
- mr r26,r8 /* spare */
+ mr r26,r8 /* copy_with_mmu_off */
lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
/* disable interrupts, we are overwriting kernel data next */
@@ -639,15 +641,24 @@ _GLOBAL(kexec_sequence)
mtmsrd r3,1
#endif
+ /* We need to turn the MMU off unless we are in hash mode
+ * under a hypervisor
+ */
+ cmpdi r26,0
+ beq 1f
+ bl real_mode
+1:
/* copy dest pages, flush whole dest image */
mr r3,r29
bl kexec_copy_flush /* (image) */
- /* turn off mmu */
+ /* turn off mmu now if not done earlier */
+ cmpdi r26,0
+ bne 1f
bl real_mode
/* copy 0x100 bytes starting at start to 0 */
- li r3,0
+1: li r3,0
mr r4,r30 /* start, aka phys mem offset */
li r5,0x100
li r6,0
@@ -659,7 +670,9 @@ _GLOBAL(kexec_sequence)
li r6,1
stw r6,kexec_flag-1b(5)
-#ifndef CONFIG_PPC_BOOK3E
+ cmpdi r27,0
+ beq 1f
+
/* clear out hardware hash page table and tlb */
#ifdef PPC64_ELF_ABI_v1
ld r12,0(r27) /* deref function descriptor */
@@ -668,7 +681,6 @@ _GLOBAL(kexec_sequence)
#endif
mtctr r12
bctrl /* mmu_hash_ops.hpte_clear_all(void); */
-#endif /* !CONFIG_PPC_BOOK3E */
/*
* kexec image calling is:
@@ -695,7 +707,7 @@ _GLOBAL(kexec_sequence)
* are the boot cpu ?????
* other device tree differences (prop sizes, va vs pa, etc)...
*/
- mr r3,r25 # my phys cpu
+1: mr r3,r25 # my phys cpu
mr r4,r30 # start, aka phys mem offset
mtlr 4
li r5,0
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index d1f1b35bf0c7..30b89d5cbb03 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -27,7 +27,7 @@
#include <linux/sort.h>
#include <asm/setup.h>
-LIST_HEAD(module_bug_list);
+static LIST_HEAD(module_bug_list);
static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 64174bf95611..34d2c595de23 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -542,9 +542,9 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
time->tv_nsec = 0;
}
*buf = kmemdup(buff + hdr_size, length, GFP_KERNEL);
+ kfree(buff);
if (*buf == NULL)
return -ENOMEM;
- kfree(buff);
*ecc_notice_size = 0;
if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
@@ -851,7 +851,7 @@ static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
}
}
-const struct file_operations nvram_fops = {
+static const struct file_operations nvram_fops = {
.owner = THIS_MODULE,
.llseek = dev_nvram_llseek,
.read = dev_nvram_read,
@@ -956,7 +956,7 @@ int __init nvram_remove_partition(const char *name, int sig,
/* Make partition a free partition */
part->header.signature = NVRAM_SIG_FREE;
- strncpy(part->header.name, "wwwwwwwwwwww", 12);
+ memset(part->header.name, 'w', 12);
part->header.checksum = nvram_checksum(&part->header);
rc = nvram_write_header(part);
if (rc <= 0) {
@@ -974,8 +974,8 @@ int __init nvram_remove_partition(const char *name, int sig,
}
if (prev) {
prev->header.length += part->header.length;
- prev->header.checksum = nvram_checksum(&part->header);
- rc = nvram_write_header(part);
+ prev->header.checksum = nvram_checksum(&prev->header);
+ rc = nvram_write_header(prev);
if (rc <= 0) {
printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
return rc;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a5c0153ede37..95d3769a2e26 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops);
static int get_phb_number(struct device_node *dn)
{
int ret, phb_id = -1;
+ u32 prop_32;
u64 prop;
/*
@@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn)
* reading "ibm,opal-phbid", only present in OPAL environment.
*/
ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
- if (ret)
- ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+ if (ret) {
+ ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+ prop = prop_32;
+ }
if (!ret)
phb_id = (int)(prop & (MAX_PHBS - 1));
@@ -151,6 +154,42 @@ void pcibios_free_controller(struct pci_controller *phb)
EXPORT_SYMBOL_GPL(pcibios_free_controller);
/*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ * pcibios_free_controller_deferred
+ * (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+ struct pci_controller *phb = (struct pci_controller *)
+ bridge->release_data;
+
+ pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+ pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
+
+/*
* The function is used to return the minimal alignment
* for memory or I/O windows of the associated P2P bridge.
* By default, 4KiB alignment for I/O windows and 1MiB for
@@ -321,7 +360,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
line, pin);
virq = irq_create_mapping(NULL, line);
- if (virq != NO_IRQ)
+ if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
} else {
pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
@@ -330,7 +369,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
virq = irq_create_of_mapping(&oirq);
}
- if(virq == NO_IRQ) {
+
+ if (!virq) {
pr_debug(" Failed to map !\n");
return -1;
}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 526ac6750e4d..ea3d98115b88 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -178,7 +178,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
dev->rom_base_reg = PCI_ROM_ADDRESS;
/* Maybe do a default OF mapping here */
- dev->irq = NO_IRQ;
+ dev->irq = 0;
}
of_pci_parse_addrs(node, dev);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 58ccf86415b4..9e7c10fe205f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -59,6 +59,7 @@
#include <asm/exec.h>
#include <asm/livepatch.h>
#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -88,7 +89,13 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
set_thread_flag(TIF_RESTORE_TM);
}
}
+
+static inline bool msr_tm_active(unsigned long msr)
+{
+ return MSR_TM_ACTIVE(msr);
+}
#else
+static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -104,7 +111,7 @@ static int __init enable_strict_msr_control(char *str)
}
early_param("ppc_strict_facility_enable", enable_strict_msr_control);
-void msr_check_and_set(unsigned long bits)
+unsigned long msr_check_and_set(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -118,6 +125,8 @@ void msr_check_and_set(unsigned long bits)
if (oldmsr != newmsr)
mtmsr_isync(newmsr);
+
+ return newmsr;
}
void __msr_check_and_clear(unsigned long bits)
@@ -196,19 +205,30 @@ EXPORT_SYMBOL_GPL(flush_fp_to_thread);
void enable_kernel_fp(void)
{
+ unsigned long cpumsr;
+
WARN_ON(preemptible());
- msr_check_and_set(MSR_FP);
+ cpumsr = msr_check_and_set(MSR_FP);
if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ return;
__giveup_fpu(current);
}
}
EXPORT_SYMBOL(enable_kernel_fp);
static int restore_fp(struct task_struct *tsk) {
- if (tsk->thread.load_fp) {
+ if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) {
load_fp_state(&current->thread.fp_state);
current->thread.load_fp++;
return 1;
@@ -248,12 +268,23 @@ EXPORT_SYMBOL(giveup_altivec);
void enable_kernel_altivec(void)
{
+ unsigned long cpumsr;
+
WARN_ON(preemptible());
- msr_check_and_set(MSR_VEC);
+ cpumsr = msr_check_and_set(MSR_VEC);
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ return;
__giveup_altivec(current);
}
}
@@ -278,7 +309,8 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
static int restore_altivec(struct task_struct *tsk)
{
- if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) {
load_vr_state(&tsk->thread.vr_state);
tsk->thread.used_vr = 1;
tsk->thread.load_vec++;
@@ -321,12 +353,23 @@ static void save_vsx(struct task_struct *tsk)
void enable_kernel_vsx(void)
{
+ unsigned long cpumsr;
+
WARN_ON(preemptible());
- msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
+ cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ return;
if (current->thread.regs->msr & MSR_FP)
__giveup_fpu(current);
if (current->thread.regs->msr & MSR_VEC)
@@ -438,6 +481,7 @@ void giveup_all(struct task_struct *tsk)
return;
msr_check_and_set(msr_all_available);
+ check_if_tm_restore_required(tsk);
#ifdef CONFIG_PPC_FPU
if (usermsr & MSR_FP)
@@ -464,7 +508,8 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- if (!current->thread.load_fp && !loadvec(current->thread))
+ if (!msr_tm_active(regs->msr) &&
+ !current->thread.load_fp && !loadvec(current->thread))
return;
msr = regs->msr;
@@ -767,29 +812,15 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
+static inline bool tm_enabled(struct task_struct *tsk)
+{
+ return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM);
+}
+
static void tm_reclaim_thread(struct thread_struct *thr,
struct thread_info *ti, uint8_t cause)
{
- unsigned long msr_diff = 0;
-
- /*
- * If FP/VSX registers have been already saved to the
- * thread_struct, move them to the transact_fp array.
- * We clear the TIF_RESTORE_TM bit since after the reclaim
- * the thread will no longer be transactional.
- */
- if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
- msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr;
- if (msr_diff & MSR_FP)
- memcpy(&thr->transact_fp, &thr->fp_state,
- sizeof(struct thread_fp_state));
- if (msr_diff & MSR_VEC)
- memcpy(&thr->transact_vr, &thr->vr_state,
- sizeof(struct thread_vr_state));
- clear_ti_thread_flag(ti, TIF_RESTORE_TM);
- msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
- }
-
/*
* Use the current MSR TM suspended bit to track if we have
* checkpointed state outstanding.
@@ -808,15 +839,9 @@ static void tm_reclaim_thread(struct thread_struct *thr,
if (!MSR_TM_SUSPENDED(mfmsr()))
return;
- tm_reclaim(thr, thr->regs->msr, cause);
+ giveup_all(container_of(thr, struct task_struct, thread));
- /* Having done the reclaim, we now have the checkpointed
- * FP/VSX values in the registers. These might be valid
- * even if we have previously called enable_kernel_fp() or
- * flush_fp_to_thread(), so update thr->regs->msr to
- * indicate their current validity.
- */
- thr->regs->msr |= msr_diff;
+ tm_reclaim(thr, thr->ckpt_regs.msr, cause);
}
void tm_reclaim_current(uint8_t cause)
@@ -832,8 +857,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
*
* In switching we need to maintain a 2nd register state as
* oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the
- * checkpointed (tbegin) state in ckpt_regs and saves the transactional
- * (current) FPRs into oldtask->thread.transact_fpr[].
+ * checkpointed (tbegin) state in ckpt_regs, ckfp_state and
+ * ckvr_state
*
* We also context switch (save) TFHAR/TEXASR/TFIAR in here.
*/
@@ -845,14 +870,6 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
if (!MSR_TM_ACTIVE(thr->regs->msr))
goto out_and_saveregs;
- /* Stash the original thread MSR, as giveup_fpu et al will
- * modify it. We hold onto it to see whether the task used
- * FP & vector regs. If the TIF_RESTORE_TM flag is set,
- * ckpt_regs.msr is already set.
- */
- if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
- thr->ckpt_regs.msr = thr->regs->msr;
-
TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
@@ -881,6 +898,9 @@ void tm_recheckpoint(struct thread_struct *thread,
{
unsigned long flags;
+ if (!(thread->regs->msr & MSR_TM))
+ return;
+
/* We really can't be interrupted here as the TEXASR registers can't
* change and later in the trecheckpoint code, we have a userspace R1.
* So let's hard disable over this region.
@@ -910,10 +930,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
* If the task was using FP, we non-lazily reload both the original and
* the speculative FP register states. This is because the kernel
* doesn't see if/when a TM rollback occurs, so if we take an FP
- * unavoidable later, we are unable to determine which set of FP regs
+ * unavailable later, we are unable to determine which set of FP regs
* need to be restored.
*/
- if (!new->thread.regs)
+ if (!tm_enabled(new))
return;
if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
@@ -926,35 +946,35 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
"(new->msr 0x%lx, new->origmsr 0x%lx)\n",
new->pid, new->thread.regs->msr, msr);
- /* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&new->thread, msr);
- /* This loads the speculative FP/VEC state, if used */
- if (msr & MSR_FP) {
- do_load_up_transact_fpu(&new->thread);
- new->thread.regs->msr |=
- (MSR_FP | new->thread.fpexc_mode);
- }
-#ifdef CONFIG_ALTIVEC
- if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&new->thread);
- new->thread.regs->msr |= MSR_VEC;
- }
-#endif
- /* We may as well turn on VSX too since all the state is restored now */
- if (msr & MSR_VSX)
- new->thread.regs->msr |= MSR_VSX;
+ /*
+ * The checkpointed state has been restored but the live state has
+ * not, ensure all the math functionality is turned off to trigger
+ * restore_math() to reload.
+ */
+ new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX);
TM_DEBUG("*** tm_recheckpoint of pid %d complete "
"(kernel msr 0x%lx)\n",
new->pid, mfmsr());
}
-static inline void __switch_to_tm(struct task_struct *prev)
+static inline void __switch_to_tm(struct task_struct *prev,
+ struct task_struct *new)
{
if (cpu_has_feature(CPU_FTR_TM)) {
- tm_enable();
- tm_reclaim_task(prev);
+ if (tm_enabled(prev) || tm_enabled(new))
+ tm_enable();
+
+ if (tm_enabled(prev)) {
+ prev->thread.load_tm++;
+ tm_reclaim_task(prev);
+ if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0)
+ prev->thread.regs->msr &= ~MSR_TM;
+ }
+
+ tm_recheckpoint_new_task(new);
}
}
@@ -976,6 +996,12 @@ void restore_tm_state(struct pt_regs *regs)
{
unsigned long msr_diff;
+ /*
+ * This is the only moment we should clear TIF_RESTORE_TM as
+ * it is here that ckpt_regs.msr and pt_regs.msr become the same
+ * again, anything else could lead to an incorrect ckpt_msr being
+ * saved and therefore incorrect signal contexts.
+ */
clear_thread_flag(TIF_RESTORE_TM);
if (!MSR_TM_ACTIVE(regs->msr))
return;
@@ -983,6 +1009,13 @@ void restore_tm_state(struct pt_regs *regs)
msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
+ /* Ensure that restore_math() will restore */
+ if (msr_diff & MSR_FP)
+ current->thread.load_fp = 1;
+#ifdef CONFIG_ALIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
+ current->thread.load_vec = 1;
+#endif
restore_math(regs);
regs->msr |= msr_diff;
@@ -990,7 +1023,7 @@ void restore_tm_state(struct pt_regs *regs)
#else
#define tm_recheckpoint_new_task(new)
-#define __switch_to_tm(prev)
+#define __switch_to_tm(prev, new)
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
static inline void save_sprs(struct thread_struct *t)
@@ -1074,26 +1107,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
#endif
}
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
- /*
- * Process self tracing is not yet supported through
- * ptrace interface. Ptrace generic code should have
- * prevented this from happening in the first place.
- * Warn once here with the message, if some how it
- * is attempted.
- */
- WARN_ONCE(tsk == current,
- "Not expecting ptrace on self: TM regs may be incorrect\n");
-
- /*
- * If task is not current, it should have been flushed
- * already to it's thread_struct during __switch_to().
- */
-}
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1151,11 +1164,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
save_sprs(&prev->thread);
- __switch_to_tm(prev);
-
/* Save FPU, Altivec, VSX and SPE state */
giveup_all(prev);
+ __switch_to_tm(prev, new);
+
/*
* We can't take a PMU exception inside _switch() since there is a
* window where the kernel stack SLB and the kernel stack are out
@@ -1163,8 +1176,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
hard_irq_disable();
- tm_recheckpoint_new_task(new);
-
/*
* Call restore_sprs() before calling _switch(). If we move it after
* _switch() then we miss out on calling it for new tasks. The reason
@@ -1399,9 +1410,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
* transitions the CPU out of TM mode. Hence we need to call
* tm_recheckpoint_new_task() (on the same task) to restore the
* checkpointed state back and the TM mode.
+ *
+ * Can't pass dst because it isn't ready. Doesn't matter, passing
+ * dst is only important for __switch_to()
*/
- __switch_to_tm(src);
- tm_recheckpoint_new_task(src);
+ __switch_to_tm(src, src);
*dst = *src;
@@ -1643,8 +1656,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.used_spe = 0;
#endif /* CONFIG_SPE */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (cpu_has_feature(CPU_FTR_TM))
- regs->msr |= MSR_TM;
current->thread.tm_tfhar = 0;
current->thread.tm_texasr = 0;
current->thread.tm_tfiar = 0;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6ee4b72cda42..88ac964f4858 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -42,6 +42,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/opal.h>
+#include <asm/asm-prototypes.h>
#include <linux/linux_logo.h>
@@ -695,7 +696,7 @@ unsigned char ibm_architecture_vec[] = {
OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
/* option vector 5: PAPR/OF options */
- VECTOR_LENGTH(18), /* length */
+ VECTOR_LENGTH(21), /* length */
0, /* don't ignore, don't halt */
OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
@@ -726,8 +727,11 @@ unsigned char ibm_architecture_vec[] = {
0,
0,
OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
- OV5_FEAT(OV5_PFO_HW_842),
- OV5_FEAT(OV5_SUB_PROCESSORS),
+ OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */
+ 0, /* Byte 18 */
+ 0, /* Byte 19 */
+ 0, /* Byte 20 */
+ OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */
/* option vector 6: IBM PAPR hints */
VECTOR_LENGTH(3), /* length */
@@ -2640,6 +2644,86 @@ static void __init fixup_device_tree_efika(void)
#define fixup_device_tree_efika()
#endif
+#ifdef CONFIG_PPC_PASEMI_NEMO
+/*
+ * CFE supplied on Nemo is broken in several ways, biggest
+ * problem is that it reassigns ISA interrupts to unused mpic ints.
+ * Add an interrupt-controller property for the io-bridge to use
+ * and correct the ints so we can attach them to an irq_domain
+ */
+static void __init fixup_device_tree_pasemi(void)
+{
+ u32 interrupts[2], parent, rval, val = 0;
+ char *name, *pci_name;
+ phandle iob, node;
+
+ /* Find the root pci node */
+ name = "/pxp@0,e0000000";
+ iob = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(iob))
+ return;
+
+ /* check if interrupt-controller node set yet */
+ if (prom_getproplen(iob, "interrupt-controller") !=PROM_ERROR)
+ return;
+
+ prom_printf("adding interrupt-controller property for SB600...\n");
+
+ prom_setprop(iob, name, "interrupt-controller", &val, 0);
+
+ pci_name = "/pxp@0,e0000000/pci@11";
+ node = call_prom("finddevice", 1, 1, ADDR(pci_name));
+ parent = ADDR(iob);
+
+ for( ; prom_next_node(&node); ) {
+ /* scan each node for one with an interrupt */
+ if (!PHANDLE_VALID(node))
+ continue;
+
+ rval = prom_getproplen(node, "interrupts");
+ if (rval == 0 || rval == PROM_ERROR)
+ continue;
+
+ prom_getprop(node, "interrupts", &interrupts, sizeof(interrupts));
+ if ((interrupts[0] < 212) || (interrupts[0] > 222))
+ continue;
+
+ /* found a node, update both interrupts and interrupt-parent */
+ if ((interrupts[0] >= 212) && (interrupts[0] <= 215))
+ interrupts[0] -= 203;
+ if ((interrupts[0] >= 216) && (interrupts[0] <= 220))
+ interrupts[0] -= 213;
+ if (interrupts[0] == 221)
+ interrupts[0] = 14;
+ if (interrupts[0] == 222)
+ interrupts[0] = 8;
+
+ prom_setprop(node, pci_name, "interrupts", interrupts,
+ sizeof(interrupts));
+ prom_setprop(node, pci_name, "interrupt-parent", &parent,
+ sizeof(parent));
+ }
+
+ /*
+ * The io-bridge has device_type set to 'io-bridge' change it to 'isa'
+ * so that generic isa-bridge code can add the SB600 and its on-board
+ * peripherals.
+ */
+ name = "/pxp@0,e0000000/io-bridge@0";
+ iob = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(iob))
+ return;
+
+ /* device_type is already set, just change it. */
+
+ prom_printf("Changing device_type of SB600 node...\n");
+
+ prom_setprop(iob, name, "device_type", "isa", sizeof("isa"));
+}
+#else /* !CONFIG_PPC_PASEMI_NEMO */
+static inline void fixup_device_tree_pasemi(void) { }
+#endif
+
static void __init fixup_device_tree(void)
{
fixup_device_tree_maple();
@@ -2647,6 +2731,7 @@ static void __init fixup_device_tree(void)
fixup_device_tree_chrp();
fixup_device_tree_pmac();
fixup_device_tree_efika();
+ fixup_device_tree_pasemi();
}
static void __init prom_find_boot_cpu(void)
@@ -2940,7 +3025,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/* Don't print anything after quiesce under OPAL, it crashes OFW */
if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() ...\n");
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
prom_debug("->dt_header_start=0x%x\n", hdr);
}
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 4f3c5756cc09..b1ec62f2cc31 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -38,6 +38,8 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -118,6 +120,24 @@ static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_END,
};
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * it's thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state.
+ */
+
+ if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
/**
* regs_query_register_offset() - query register offset from its name
* @name: the name of a register
@@ -383,13 +403,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
}
/*
- * When the transaction is active, 'transact_fp' holds the current running
- * value of all FPR registers and 'fp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction. When transaction
- * is not active 'fp_state' holds the current running state of all the FPR
- * registers. So this function which returns the current running values of
- * all the FPR registers, needs to know whether any transaction is active
- * or not.
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
*
* Userspace interface buffer layout:
*
@@ -397,13 +413,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
* u64 fpr[32];
* u64 fpscr;
* };
- *
- * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
- * which determines the final code in this function. All the combinations of
- * these two config options are possible except the one below as transactional
- * memory config pulls in CONFIG_VSX automatically.
- *
- * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
*/
static int fpr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
@@ -412,50 +421,29 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
#ifdef CONFIG_VSX
u64 buf[33];
int i;
-#endif
- flush_fp_to_thread(target);
-#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
- /* copy to local buffer then write that out */
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_TRANS_FPR(i);
- buf[32] = target->thread.transact_fp.fpscr;
- } else {
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
- }
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-#endif
+ flush_fp_to_thread(target);
-#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
/* copy to local buffer then write that out */
for (i = 0; i < 32 ; i++)
buf[i] = target->thread.TS_FPR(i);
buf[32] = target->thread.fp_state.fpscr;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-#endif
-
-#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+#else
BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
offsetof(struct thread_fp_state, fpr[32]));
+ flush_fp_to_thread(target);
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fp_state, 0, -1);
#endif
}
/*
- * When the transaction is active, 'transact_fp' holds the current running
- * value of all FPR registers and 'fp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction. When transaction
- * is not active 'fp_state' holds the current running state of all the FPR
- * registers. So this function which setss the current running values of
- * all the FPR registers, needs to know whether any transaction is active
- * or not.
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
*
* Userspace interface buffer layout:
*
@@ -464,12 +452,6 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
* u64 fpscr;
* };
*
- * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM
- * which determines the final code in this function. All the combinations of
- * these two config options are possible except the one below as transactional
- * memory config pulls in CONFIG_VSX automatically.
- *
- * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
*/
static int fpr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
@@ -478,44 +460,24 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
#ifdef CONFIG_VSX
u64 buf[33];
int i;
-#endif
+
flush_fp_to_thread(target);
-#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM)
/* copy to local buffer then write that out */
i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
if (i)
return i;
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
- for (i = 0; i < 32 ; i++)
- target->thread.TS_TRANS_FPR(i) = buf[i];
- target->thread.transact_fp.fpscr = buf[32];
- } else {
- for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
- }
- return 0;
-#endif
-
-#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
for (i = 0; i < 32 ; i++)
target->thread.TS_FPR(i) = buf[i];
target->thread.fp_state.fpscr = buf[32];
return 0;
-#endif
-
-#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM)
+#else
BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
offsetof(struct thread_fp_state, fpr[32]));
+ flush_fp_to_thread(target);
+
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fp_state, 0, -1);
#endif
@@ -543,13 +505,10 @@ static int vr_active(struct task_struct *target,
}
/*
- * When the transaction is active, 'transact_vr' holds the current running
- * value of all the VMX registers and 'vr_state' holds the last checkpointed
- * value of all the VMX registers for the current transaction to fall back
- * on in case it aborts. When transaction is not active 'vr_state' holds
- * the current running state of all the VMX registers. So this function which
- * gets the current running values of all the VMX registers, needs to know
- * whether any transaction is active or not.
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
*
* Userspace interface buffer layout:
*
@@ -563,7 +522,6 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- struct thread_vr_state *addr;
int ret;
flush_altivec_to_thread(target);
@@ -571,19 +529,8 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
offsetof(struct thread_vr_state, vr[32]));
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- flush_fp_to_thread(target);
- flush_tmregs_to_thread(target);
- addr = &target->thread.transact_vr;
- } else {
- addr = &target->thread.vr_state;
- }
-#else
- addr = &target->thread.vr_state;
-#endif
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- addr, 0,
+ &target->thread.vr_state, 0,
33 * sizeof(vector128));
if (!ret) {
/*
@@ -595,14 +542,7 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- vrsave.word = target->thread.transact_vrsave;
- else
- vrsave.word = target->thread.vrsave;
-#else
vrsave.word = target->thread.vrsave;
-#endif
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
@@ -612,13 +552,10 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset,
}
/*
- * When the transaction is active, 'transact_vr' holds the current running
- * value of all the VMX registers and 'vr_state' holds the last checkpointed
- * value of all the VMX registers for the current transaction to fall back
- * on in case it aborts. When transaction is not active 'vr_state' holds
- * the current running state of all the VMX registers. So this function which
- * sets the current running values of all the VMX registers, needs to know
- * whether any transaction is active or not.
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
*
* Userspace interface buffer layout:
*
@@ -632,7 +569,6 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct thread_vr_state *addr;
int ret;
flush_altivec_to_thread(target);
@@ -640,19 +576,8 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
offsetof(struct thread_vr_state, vr[32]));
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- flush_fp_to_thread(target);
- flush_tmregs_to_thread(target);
- addr = &target->thread.transact_vr;
- } else {
- addr = &target->thread.vr_state;
- }
-#else
- addr = &target->thread.vr_state;
-#endif
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- addr, 0,
+ &target->thread.vr_state, 0,
33 * sizeof(vector128));
if (!ret && count > 0) {
/*
@@ -664,27 +589,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- vrsave.word = target->thread.transact_vrsave;
- else
- vrsave.word = target->thread.vrsave;
-#else
vrsave.word = target->thread.vrsave;
-#endif
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
- if (!ret) {
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr))
- target->thread.transact_vrsave = vrsave.word;
- else
- target->thread.vrsave = vrsave.word;
-#else
+ if (!ret)
target->thread.vrsave = vrsave.word;
-#endif
- }
}
return ret;
@@ -706,13 +616,10 @@ static int vsr_active(struct task_struct *target,
}
/*
- * When the transaction is active, 'transact_fp' holds the current running
- * value of all FPR registers and 'fp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction. When transaction
- * is not active 'fp_state' holds the current running state of all the FPR
- * registers. So this function which returns the current running values of
- * all the FPR registers, needs to know whether any transaction is active
- * or not.
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
*
* Userspace interface buffer layout:
*
@@ -727,27 +634,14 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,
u64 buf[32];
int ret, i;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
-#endif
flush_vsx_to_thread(target);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.
- transact_fp.fpr[i][TS_VSRLOWOFFSET];
- } else {
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.
- fp_state.fpr[i][TS_VSRLOWOFFSET];
- }
-#else
for (i = 0; i < 32 ; i++)
buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
-#endif
+
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
@@ -755,12 +649,10 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset,
}
/*
- * When the transaction is active, 'transact_fp' holds the current running
- * value of all FPR registers and 'fp_state' holds the last checkpointed
- * value of all FPR registers for the current transaction. When transaction
- * is not active 'fp_state' holds the current running state of all the FPR
- * registers. So this function which sets the current running values of all
- * the FPR registers, needs to know whether any transaction is active or not.
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
*
* Userspace interface buffer layout:
*
@@ -775,31 +667,16 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset,
u64 buf[32];
int ret,i;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
-#endif
flush_vsx_to_thread(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(target->thread.regs->msr)) {
- for (i = 0; i < 32 ; i++)
- target->thread.transact_fp.
- fpr[i][TS_VSRLOWOFFSET] = buf[i];
- } else {
+ if (!ret)
for (i = 0; i < 32 ; i++)
- target->thread.fp_state.
- fpr[i][TS_VSRLOWOFFSET] = buf[i];
- }
-#else
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-#endif
-
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return ret;
}
@@ -925,9 +802,9 @@ static int tm_cgpr_get(struct task_struct *target,
if (!MSR_TM_ACTIVE(target->thread.regs->msr))
return -ENODATA;
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.ckpt_regs,
@@ -990,9 +867,9 @@ static int tm_cgpr_set(struct task_struct *target,
if (!MSR_TM_ACTIVE(target->thread.regs->msr))
return -ENODATA;
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.ckpt_regs,
@@ -1068,7 +945,7 @@ static int tm_cfpr_active(struct task_struct *target,
*
* This function gets in transaction checkpointed FPR registers.
*
- * When the transaction is active 'fp_state' holds the checkpointed
+ * When the transaction is active 'ckfp_state' holds the checkpointed
* values for the current transaction to fall back on if it aborts
* in between. This function gets those checkpointed FPR registers.
* The userspace interface buffer layout is as follows.
@@ -1092,14 +969,14 @@ static int tm_cfpr_get(struct task_struct *target,
if (!MSR_TM_ACTIVE(target->thread.regs->msr))
return -ENODATA;
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
/* copy to local buffer then write that out */
for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
}
@@ -1114,7 +991,7 @@ static int tm_cfpr_get(struct task_struct *target,
*
* This function sets in transaction checkpointed FPR registers.
*
- * When the transaction is active 'fp_state' holds the checkpointed
+ * When the transaction is active 'ckfp_state' holds the checkpointed
* FPR register values for the current transaction to fall back on
* if it aborts in between. This function sets these checkpointed
* FPR registers. The userspace interface buffer layout is as follows.
@@ -1138,17 +1015,17 @@ static int tm_cfpr_set(struct task_struct *target,
if (!MSR_TM_ACTIVE(target->thread.regs->msr))
return -ENODATA;
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
/* copy to local buffer then write that out */
i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
if (i)
return i;
for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
+ target->thread.TS_CKFPR(i) = buf[i];
+ target->thread.ckfp_state.fpscr = buf[32];
return 0;
}
@@ -1183,7 +1060,7 @@ static int tm_cvmx_active(struct task_struct *target,
*
* This function gets in transaction checkpointed VMX registers.
*
- * When the transaction is active 'vr_state' and 'vr_save' hold
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
* the checkpointed values for the current transaction to fall
* back on if it aborts in between. The userspace interface buffer
* layout is as follows.
@@ -1210,12 +1087,12 @@ static int tm_cvmx_get(struct task_struct *target,
return -ENODATA;
/* Flush the state */
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
+ &target->thread.ckvr_state, 0,
33 * sizeof(vector128));
if (!ret) {
/*
@@ -1226,7 +1103,7 @@ static int tm_cvmx_get(struct task_struct *target,
u32 word;
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.vrsave;
+ vrsave.word = target->thread.ckvrsave;
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
}
@@ -1245,7 +1122,7 @@ static int tm_cvmx_get(struct task_struct *target,
*
* This function sets in transaction checkpointed VMX registers.
*
- * When the transaction is active 'vr_state' and 'vr_save' hold
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
* the checkpointed values for the current transaction to fall
* back on if it aborts in between. The userspace interface buffer
* layout is as follows.
@@ -1271,12 +1148,12 @@ static int tm_cvmx_set(struct task_struct *target,
if (!MSR_TM_ACTIVE(target->thread.regs->msr))
return -ENODATA;
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
+ &target->thread.ckvr_state, 0,
33 * sizeof(vector128));
if (!ret && count > 0) {
/*
@@ -1287,11 +1164,11 @@ static int tm_cvmx_set(struct task_struct *target,
u32 word;
} vrsave;
memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.vrsave;
+ vrsave.word = target->thread.ckvrsave;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
33 * sizeof(vector128), -1);
if (!ret)
- target->thread.vrsave = vrsave.word;
+ target->thread.ckvrsave = vrsave.word;
}
return ret;
@@ -1329,7 +1206,7 @@ static int tm_cvsx_active(struct task_struct *target,
*
* This function gets in transaction checkpointed VSX registers.
*
- * When the transaction is active 'fp_state' holds the checkpointed
+ * When the transaction is active 'ckfp_state' holds the checkpointed
* values for the current transaction to fall back on if it aborts
* in between. This function gets those checkpointed VSX registers.
* The userspace interface buffer layout is as follows.
@@ -1353,13 +1230,13 @@ static int tm_cvsx_get(struct task_struct *target,
return -ENODATA;
/* Flush the state */
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
flush_vsx_to_thread(target);
for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
@@ -1377,7 +1254,7 @@ static int tm_cvsx_get(struct task_struct *target,
*
* This function sets in transaction checkpointed VSX registers.
*
- * When the transaction is active 'fp_state' holds the checkpointed
+ * When the transaction is active 'ckfp_state' holds the checkpointed
* VSX register values for the current transaction to fall back on
* if it aborts in between. This function sets these checkpointed
* FPR registers. The userspace interface buffer layout is as follows.
@@ -1401,15 +1278,16 @@ static int tm_cvsx_set(struct task_struct *target,
return -ENODATA;
/* Flush the state */
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
flush_vsx_to_thread(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
buf, 0, 32 * sizeof(double));
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return ret;
}
@@ -1465,9 +1343,9 @@ static int tm_spr_get(struct task_struct *target,
return -ENODEV;
/* Flush the states */
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
/* TFHAR register */
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -1521,9 +1399,9 @@ static int tm_spr_set(struct task_struct *target,
return -ENODEV;
/* Flush the states */
+ flush_tmregs_to_thread(target);
flush_fp_to_thread(target);
flush_altivec_to_thread(target);
- flush_tmregs_to_thread(target);
/* TFHAR register */
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -2046,33 +1924,12 @@ static const struct user_regset_view user_ppc_native_view = {
static int gpr32_get_common(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf, bool tm_active)
+ void *kbuf, void __user *ubuf,
+ unsigned long *regs)
{
- const unsigned long *regs = &target->thread.regs->gpr[0];
- const unsigned long *ckpt_regs;
compat_ulong_t *k = kbuf;
compat_ulong_t __user *u = ubuf;
compat_ulong_t reg;
- int i;
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- ckpt_regs = &target->thread.ckpt_regs.gpr[0];
-#endif
- if (tm_active) {
- regs = ckpt_regs;
- } else {
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /*
- * We have a partial register set.
- * Fill 14-31 with bogus values.
- */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
- }
pos /= sizeof(reg);
count /= sizeof(reg);
@@ -2114,29 +1971,13 @@ static int gpr32_get_common(struct task_struct *target,
static int gpr32_set_common(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf, bool tm_active)
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
{
- unsigned long *regs = &target->thread.regs->gpr[0];
- unsigned long *ckpt_regs;
const compat_ulong_t *k = kbuf;
const compat_ulong_t __user *u = ubuf;
compat_ulong_t reg;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- ckpt_regs = &target->thread.ckpt_regs.gpr[0];
-#endif
-
- if (tm_active) {
- regs = ckpt_regs;
- } else {
- regs = &target->thread.regs->gpr[0];
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
- }
-
pos /= sizeof(reg);
count /= sizeof(reg);
@@ -2201,7 +2042,8 @@ static int tm_cgpr32_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 1);
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
}
static int tm_cgpr32_set(struct task_struct *target,
@@ -2209,7 +2051,8 @@ static int tm_cgpr32_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 1);
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -2218,7 +2061,21 @@ static int gpr32_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 0);
+ int i;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ if (!FULL_REGS(target->thread.regs)) {
+ /*
+ * We have a partial register set.
+ * Fill 14-31 with bogus values.
+ */
+ for (i = 14; i < 32; i++)
+ target->thread.regs->gpr[i] = NV_REG_POISON;
+ }
+ return gpr32_get_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
}
static int gpr32_set(struct task_struct *target,
@@ -2226,7 +2083,12 @@ static int gpr32_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 0);
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ CHECK_FULL_REGS(target->thread.regs);
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
}
/*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c3e861df4b20..24ec3ea4b3a2 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
*
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
*/
extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr)
{
+ /* Configure static keys first, now that we're relocated. */
+ setup_feature_keys();
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eafb9a79e011..7ac8e6eaab5b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
/* Apply all the dynamic patching */
apply_feature_fixups();
+ setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cb64d6feb45a..bbe77aed198d 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -99,22 +99,24 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
}
}
-static void do_signal(struct pt_regs *regs)
+static void do_signal(struct task_struct *tsk)
{
sigset_t *oldset = sigmask_to_save();
struct ksignal ksig;
int ret;
int is32 = is_32bit_task();
+ BUG_ON(tsk != current);
+
get_signal(&ksig);
/* Is there any syscall restart business here ? */
- check_syscall_restart(regs, &ksig.ka, ksig.sig > 0);
+ check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0);
if (ksig.sig <= 0) {
/* No signal to deliver -- put the saved sigmask back */
restore_saved_sigmask();
- regs->trap = 0;
+ tsk->thread.regs->trap = 0;
return; /* no signals delivered */
}
@@ -124,23 +126,22 @@ static void do_signal(struct pt_regs *regs)
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
- if (current->thread.hw_brk.address &&
- current->thread.hw_brk.type)
- __set_breakpoint(&current->thread.hw_brk);
+ if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type)
+ __set_breakpoint(&tsk->thread.hw_brk);
#endif
/* Re-enable the breakpoints for the signal stack */
- thread_change_pc(current, regs);
+ thread_change_pc(tsk, tsk->thread.regs);
if (is32) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
- ret = handle_rt_signal32(&ksig, oldset, regs);
+ ret = handle_rt_signal32(&ksig, oldset, tsk);
else
- ret = handle_signal32(&ksig, oldset, regs);
+ ret = handle_signal32(&ksig, oldset, tsk);
} else {
- ret = handle_rt_signal64(&ksig, oldset, regs);
+ ret = handle_rt_signal64(&ksig, oldset, tsk);
}
- regs->trap = 0;
+ tsk->thread.regs->trap = 0;
signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
}
@@ -151,8 +152,10 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs);
+ if (thread_info_flags & _TIF_SIGPENDING) {
+ BUG_ON(regs != current->thread.regs);
+ do_signal(current);
+ }
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
@@ -162,7 +165,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
user_enter();
}
-unsigned long get_tm_stackpointer(struct pt_regs *regs)
+unsigned long get_tm_stackpointer(struct task_struct *tsk)
{
/* When in an active transaction that takes a signal, we need to be
* careful with the stack. It's possible that the stack has moved back
@@ -187,11 +190,13 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
*/
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
+ BUG_ON(tsk != current);
+
+ if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) {
tm_reclaim_current(TM_CAUSE_SIGNAL);
- if (MSR_TM_TRANSACTIONAL(regs->msr))
- return current->thread.ckpt_regs.gpr[1];
+ if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr))
+ return tsk->thread.ckpt_regs.gpr[1];
}
#endif
- return regs->gpr[1];
+ return tsk->thread.regs->gpr[1];
}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index be305c858e51..7c59d88b9d86 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -16,39 +16,41 @@ extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
size_t frame_size, int is_32);
extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs);
+ struct task_struct *tsk);
extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs);
+ struct task_struct *tsk);
extern unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task);
-extern unsigned long copy_transact_fpr_to_user(void __user *to,
+extern unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task);
extern unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from);
-extern unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+extern unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from);
+extern unsigned long get_tm_stackpointer(struct task_struct *tsk);
+
#ifdef CONFIG_VSX
extern unsigned long copy_vsx_to_user(void __user *to,
struct task_struct *task);
-extern unsigned long copy_transact_vsx_to_user(void __user *to,
+extern unsigned long copy_ckvsx_to_user(void __user *to,
struct task_struct *task);
extern unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from);
-extern unsigned long copy_transact_vsx_from_user(struct task_struct *task,
+extern unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from);
#endif
#ifdef CONFIG_PPC64
extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs);
+ struct task_struct *tsk);
#else /* CONFIG_PPC64 */
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs)
+ struct task_struct *tsk)
{
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index b6aa378aff63..27aa913ac91d 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -44,6 +44,7 @@
#include <asm/vdso.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
@@ -315,7 +316,7 @@ unsigned long copy_vsx_from_user(struct task_struct *task,
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-unsigned long copy_transact_fpr_to_user(void __user *to,
+unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
@@ -323,12 +324,12 @@ unsigned long copy_transact_fpr_to_user(void __user *to,
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_TRANS_FPR(i);
- buf[i] = task->thread.transact_fp.fpscr;
+ buf[i] = task->thread.TS_CKFPR(i);
+ buf[i] = task->thread.ckfp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
-unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
@@ -337,13 +338,13 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task,
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_TRANS_FPR(i) = buf[i];
- task->thread.transact_fp.fpscr = buf[i];
+ task->thread.TS_CKFPR(i) = buf[i];
+ task->thread.ckfp_state.fpscr = buf[i];
return 0;
}
-unsigned long copy_transact_vsx_to_user(void __user *to,
+unsigned long copy_ckvsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
@@ -351,11 +352,11 @@ unsigned long copy_transact_vsx_to_user(void __user *to,
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];
+ buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
-unsigned long copy_transact_vsx_from_user(struct task_struct *task,
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
@@ -364,7 +365,7 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task,
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -384,17 +385,17 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-inline unsigned long copy_transact_fpr_to_user(void __user *to,
+inline unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
- return __copy_to_user(to, task->thread.transact_fp.fpr,
+ return __copy_to_user(to, task->thread.ckfp_state.fpr,
ELF_NFPREG * sizeof(double));
}
-inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
- return __copy_from_user(task->thread.transact_fp.fpr, from,
+ return __copy_from_user(task->thread.ckfp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -525,9 +526,6 @@ static int save_tm_user_regs(struct pt_regs *regs,
*/
regs->msr &= ~MSR_TS_MASK;
- /* Make sure floating point registers are stored in regs */
- flush_fp_to_thread(current);
-
/* Save both sets of general registers */
if (save_general_regs(&current->thread.ckpt_regs, frame)
|| save_general_regs(regs, tm_frame))
@@ -545,18 +543,17 @@ static int save_tm_user_regs(struct pt_regs *regs,
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
+ if (__copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
if (msr & MSR_VEC) {
if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.transact_vr,
+ &current->thread.vr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
} else {
if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.vr_state,
+ &current->thread.ckvr_state,
ELF_NVRREG * sizeof(vector128)))
return 1;
}
@@ -573,28 +570,28 @@ static int save_tm_user_regs(struct pt_regs *regs,
* most significant bits of that same vector. --BenH
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- if (__put_user(current->thread.vrsave,
+ current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+ if (__put_user(current->thread.ckvrsave,
(u32 __user *)&frame->mc_vregs[32]))
return 1;
if (msr & MSR_VEC) {
- if (__put_user(current->thread.transact_vrsave,
+ if (__put_user(current->thread.vrsave,
(u32 __user *)&tm_frame->mc_vregs[32]))
return 1;
} else {
- if (__put_user(current->thread.vrsave,
+ if (__put_user(current->thread.ckvrsave,
(u32 __user *)&tm_frame->mc_vregs[32]))
return 1;
}
#endif /* CONFIG_ALTIVEC */
- if (copy_fpr_to_user(&frame->mc_fregs, current))
+ if (copy_ckfpr_to_user(&frame->mc_fregs, current))
return 1;
if (msr & MSR_FP) {
- if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current))
+ if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
return 1;
} else {
- if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
+ if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current))
return 1;
}
@@ -606,15 +603,14 @@ static int save_tm_user_regs(struct pt_regs *regs,
* contains valid data
*/
if (current->thread.used_vsr) {
- flush_vsx_to_thread(current);
- if (copy_vsx_to_user(&frame->mc_vsregs, current))
+ if (copy_ckvsx_to_user(&frame->mc_vsregs, current))
return 1;
if (msr & MSR_VSX) {
- if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs,
+ if (copy_vsx_to_user(&tm_frame->mc_vsregs,
current))
return 1;
} else {
- if (copy_vsx_to_user(&tm_frame->mc_vsregs, current))
+ if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current))
return 1;
}
@@ -698,6 +694,7 @@ static long restore_user_regs(struct pt_regs *regs,
if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
sizeof(sr->mc_vregs)))
return 1;
+ current->thread.used_vr = true;
} else if (current->thread.used_vr)
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
@@ -724,6 +721,7 @@ static long restore_user_regs(struct pt_regs *regs,
*/
if (copy_vsx_from_user(current, &sr->mc_vsregs))
return 1;
+ current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++)
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
@@ -743,6 +741,7 @@ static long restore_user_regs(struct pt_regs *regs,
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
ELF_NEVRREG * sizeof(u32)))
return 1;
+ current->thread.used_spe = true;
} else if (current->thread.used_spe)
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
@@ -793,33 +792,34 @@ static long restore_tm_user_regs(struct pt_regs *regs,
regs->msr &= ~MSR_VEC;
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
- if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+ if (__copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
sizeof(sr->mc_vregs)) ||
- __copy_from_user(&current->thread.transact_vr,
+ __copy_from_user(&current->thread.vr_state,
&tm_sr->mc_vregs,
sizeof(sr->mc_vregs)))
return 1;
+ current->thread.used_vr = true;
} else if (current->thread.used_vr) {
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
- memset(&current->thread.transact_vr, 0,
+ memset(&current->thread.ckvr_state, 0,
ELF_NVRREG * sizeof(vector128));
}
/* Always get VRSAVE back */
- if (__get_user(current->thread.vrsave,
+ if (__get_user(current->thread.ckvrsave,
(u32 __user *)&sr->mc_vregs[32]) ||
- __get_user(current->thread.transact_vrsave,
+ __get_user(current->thread.vrsave,
(u32 __user *)&tm_sr->mc_vregs[32]))
return 1;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
+ mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
#endif /* CONFIG_ALTIVEC */
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
if (copy_fpr_from_user(current, &sr->mc_fregs) ||
- copy_transact_fpr_from_user(current, &tm_sr->mc_fregs))
+ copy_ckfpr_from_user(current, &tm_sr->mc_fregs))
return 1;
#ifdef CONFIG_VSX
@@ -829,13 +829,14 @@ static long restore_tm_user_regs(struct pt_regs *regs,
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
- if (copy_vsx_from_user(current, &sr->mc_vsregs) ||
- copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs))
+ if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) ||
+ copy_ckvsx_from_user(current, &sr->mc_vsregs))
return 1;
+ current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++) {
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
- current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
+ current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
#endif /* CONFIG_VSX */
@@ -848,6 +849,7 @@ static long restore_tm_user_regs(struct pt_regs *regs,
if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
ELF_NEVRREG * sizeof(u32)))
return 1;
+ current->thread.used_spe = true;
} else if (current->thread.used_spe)
memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
@@ -877,13 +879,14 @@ static long restore_tm_user_regs(struct pt_regs *regs,
tm_recheckpoint(&current->thread, msr);
/* This loads the speculative FP/VEC state, if used */
+ msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
+ load_fp_state(&current->thread.fp_state);
regs->msr |= (MSR_FP | current->thread.fpexc_mode);
}
#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
+ load_vr_state(&current->thread.vr_state);
regs->msr |= MSR_VEC;
}
#endif
@@ -971,7 +974,7 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
* (one which gets siginfo).
*/
int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs)
+ struct task_struct *tsk)
{
struct rt_sigframe __user *rt_sf;
struct mcontext __user *frame;
@@ -980,10 +983,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
+ struct pt_regs *regs = tsk->thread.regs;
+
+ BUG_ON(tsk != current);
/* Set up Signal Frame */
/* Put a Real Time Context onto stack */
- rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1);
+ rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1);
addr = rt_sf;
if (unlikely(rt_sf == NULL))
goto badframe;
@@ -1000,9 +1006,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
addr = frame;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
+ if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) {
sigret = 0;
- tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
+ tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp;
} else {
sigret = __NR_rt_sigreturn;
tramp = (unsigned long) frame->tramp;
@@ -1029,7 +1035,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
}
regs->link = tramp;
- current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+ tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
/* create a stack frame for the caller of the handler */
newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
@@ -1054,7 +1060,7 @@ badframe:
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_rt_signal32: "
"%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
+ tsk->comm, tsk->pid,
addr, regs->nip, regs->link);
return 1;
@@ -1226,7 +1232,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
goto bad;
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * If there is a transactional state then throw it away.
+ * The purpose of a sigreturn is to destroy all traces of the
+ * signal frame, this includes any transactional state created
+ * within in. We only check for suspended as we can never be
+ * active in the kernel, we are active, there is nothing better to
+ * do than go ahead and Bad Thing later.
+ * The cause is not important as there will never be a
+ * recheckpoint so it's not user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+
if (__get_user(tmp, &rt_sf->uc.uc_link))
goto bad;
uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
@@ -1396,7 +1416,8 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
/*
* OK, we're invoking a handler
*/
-int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs)
+int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct task_struct *tsk)
{
struct sigcontext __user *sc;
struct sigframe __user *frame;
@@ -1404,9 +1425,12 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs
unsigned long newsp = 0;
int sigret;
unsigned long tramp;
+ struct pt_regs *regs = tsk->thread.regs;
+
+ BUG_ON(tsk != current);
/* Set up Signal Frame */
- frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1);
+ frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1);
if (unlikely(frame == NULL))
goto badframe;
sc = (struct sigcontext __user *) &frame->sctx;
@@ -1425,9 +1449,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs
|| __put_user(ksig->sig, &sc->signal))
goto badframe;
- if (vdso32_sigtramp && current->mm->context.vdso_base) {
+ if (vdso32_sigtramp && tsk->mm->context.vdso_base) {
sigret = 0;
- tramp = current->mm->context.vdso_base + vdso32_sigtramp;
+ tramp = tsk->mm->context.vdso_base + vdso32_sigtramp;
} else {
sigret = __NR_sigreturn;
tramp = (unsigned long) frame->mctx.tramp;
@@ -1449,7 +1473,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs
regs->link = tramp;
- current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+ tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
/* create a stack frame for the caller of the handler */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
@@ -1469,7 +1493,7 @@ badframe:
printk_ratelimited(KERN_INFO
"%s[%d]: bad frame in handle_signal32: "
"%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
+ tsk->comm, tsk->pid,
frame, regs->nip, regs->link);
return 1;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 7e49984d4331..96698fdf93b4 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -35,6 +35,7 @@
#include <asm/vdso.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
#include "signal.h"
@@ -90,9 +91,9 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
* Set up the sigcontext for the signal frame.
*/
-static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
- int signr, sigset_t *set, unsigned long handler,
- int ctx_has_vsx_region)
+static long setup_sigcontext(struct sigcontext __user *sc,
+ struct task_struct *tsk, int signr, sigset_t *set,
+ unsigned long handler, int ctx_has_vsx_region)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -106,17 +107,20 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
unsigned long vrsave;
#endif
+ struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
long err = 0;
+ BUG_ON(tsk != current);
+
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
/* save altivec registers */
- if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
+ if (tsk->thread.used_vr) {
+ flush_altivec_to_thread(tsk);
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &current->thread.vr_state,
+ err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
33 * sizeof(vector128));
/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
* contains valid data.
@@ -129,16 +133,16 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
vrsave = mfspr(SPRN_VRSAVE);
- current->thread.vrsave = vrsave;
+ tsk->thread.vrsave = vrsave;
}
err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
#else /* CONFIG_ALTIVEC */
err |= __put_user(0, &sc->v_regs);
#endif /* CONFIG_ALTIVEC */
- flush_fp_to_thread(current);
+ flush_fp_to_thread(tsk);
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, current);
+ err |= copy_fpr_to_user(&sc->fp_regs, tsk);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -151,10 +155,10 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
* then out to userspace. Update v_regs to point after the
* VMX data.
*/
- if (current->thread.used_vsr && ctx_has_vsx_region) {
- flush_vsx_to_thread(current);
+ if (tsk->thread.used_vsr && ctx_has_vsx_region) {
+ flush_vsx_to_thread(tsk);
v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, current);
+ err |= copy_vsx_to_user(v_regs, tsk);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
*/
@@ -187,7 +191,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
*/
static long setup_tm_sigcontexts(struct sigcontext __user *sc,
struct sigcontext __user *tm_sc,
- struct pt_regs *regs,
+ struct task_struct *tsk,
int signr, sigset_t *set, unsigned long handler)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
@@ -202,9 +206,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
- unsigned long msr = regs->msr;
+ struct pt_regs *regs = tsk->thread.regs;
+ unsigned long msr = tsk->thread.ckpt_regs.msr;
long err = 0;
+ BUG_ON(tsk != current);
+
BUG_ON(!MSR_TM_ACTIVE(regs->msr));
/* Remove TM bits from thread's MSR. The MSR in the sigcontext
@@ -214,28 +221,25 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
*/
regs->msr &= ~MSR_TS_MASK;
- flush_fp_to_thread(current);
-
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
err |= __put_user(tm_v_regs, &tm_sc->v_regs);
/* save altivec registers */
- if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
+ if (tsk->thread.used_vr) {
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &current->thread.vr_state,
+ err |= __copy_to_user(v_regs, &tsk->thread.ckvr_state,
33 * sizeof(vector128));
/* If VEC was enabled there are transactional VRs valid too,
* else they're a copy of the checkpointed VRs.
*/
if (msr & MSR_VEC)
err |= __copy_to_user(tm_v_regs,
- &current->thread.transact_vr,
+ &tsk->thread.vr_state,
33 * sizeof(vector128));
else
err |= __copy_to_user(tm_v_regs,
- &current->thread.vr_state,
+ &tsk->thread.ckvr_state,
33 * sizeof(vector128));
/* set MSR_VEC in the MSR value in the frame to indicate
@@ -247,13 +251,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* use altivec.
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ tsk->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+ err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]);
if (msr & MSR_VEC)
- err |= __put_user(current->thread.transact_vrsave,
+ err |= __put_user(tsk->thread.vrsave,
(u32 __user *)&tm_v_regs[33]);
else
- err |= __put_user(current->thread.vrsave,
+ err |= __put_user(tsk->thread.ckvrsave,
(u32 __user *)&tm_v_regs[33]);
#else /* CONFIG_ALTIVEC */
@@ -262,11 +266,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
#endif /* CONFIG_ALTIVEC */
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, current);
+ err |= copy_ckfpr_to_user(&sc->fp_regs, tsk);
if (msr & MSR_FP)
- err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current);
+ err |= copy_fpr_to_user(&tm_sc->fp_regs, tsk);
else
- err |= copy_fpr_to_user(&tm_sc->fp_regs, current);
+ err |= copy_ckfpr_to_user(&tm_sc->fp_regs, tsk);
#ifdef CONFIG_VSX
/*
@@ -274,17 +278,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* then out to userspace. Update v_regs to point after the
* VMX data.
*/
- if (current->thread.used_vsr) {
- flush_vsx_to_thread(current);
+ if (tsk->thread.used_vsr) {
v_regs += ELF_NVRREG;
tm_v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, current);
+ err |= copy_ckvsx_to_user(v_regs, tsk);
if (msr & MSR_VSX)
- err |= copy_transact_vsx_to_user(tm_v_regs, current);
+ err |= copy_vsx_to_user(tm_v_regs, tsk);
else
- err |= copy_vsx_to_user(tm_v_regs, current);
+ err |= copy_ckvsx_to_user(tm_v_regs, tsk);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
@@ -298,7 +301,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
err |= __copy_to_user(&sc->gp_regs,
- &current->thread.ckpt_regs, GP_REGS_SIZE);
+ &tsk->thread.ckpt_regs, GP_REGS_SIZE);
err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
err |= __put_user(signr, &sc->signal);
@@ -314,7 +317,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* Restore the sigcontext from the signal frame.
*/
-static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
+static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
struct sigcontext __user *sc)
{
#ifdef CONFIG_ALTIVEC
@@ -323,10 +326,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
unsigned long err = 0;
unsigned long save_r13 = 0;
unsigned long msr;
+ struct pt_regs *regs = tsk->thread.regs;
#ifdef CONFIG_VSX
int i;
#endif
+ BUG_ON(tsk != current);
+
/* If this is not a signal return, we preserve the TLS in r13 */
if (!sig)
save_r13 = regs->gpr[13];
@@ -356,7 +362,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
/*
* Force reload of FP/VEC.
- * This has to be done before copying stuff into current->thread.fpr/vr
+ * This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
@@ -368,21 +374,23 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
- if (v_regs != NULL && (msr & MSR_VEC) != 0)
- err |= __copy_from_user(&current->thread.vr_state, v_regs,
+ if (v_regs != NULL && (msr & MSR_VEC) != 0) {
+ err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
33 * sizeof(vector128));
- else if (current->thread.used_vr)
- memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
+ tsk->thread.used_vr = true;
+ } else if (tsk->thread.used_vr) {
+ memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+ }
/* Always get VRSAVE back */
if (v_regs != NULL)
- err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
else
- current->thread.vrsave = 0;
+ tsk->thread.vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
+ mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(current, &sc->fp_regs);
+ err |= copy_fpr_from_user(tsk, &sc->fp_regs);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -390,11 +398,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
* buffer for formatting, then into the taskstruct.
*/
v_regs += ELF_NVRREG;
- if ((msr & MSR_VSX) != 0)
- err |= copy_vsx_from_user(current, v_regs);
- else
+ if ((msr & MSR_VSX) != 0) {
+ err |= copy_vsx_from_user(tsk, v_regs);
+ tsk->thread.used_vsr = true;
+ } else {
for (i = 0; i < 32 ; i++)
- current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ }
#endif
return err;
}
@@ -404,7 +414,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
* Restore the two sigcontexts from the frame of a transactional processes.
*/
-static long restore_tm_sigcontexts(struct pt_regs *regs,
+static long restore_tm_sigcontexts(struct task_struct *tsk,
struct sigcontext __user *sc,
struct sigcontext __user *tm_sc)
{
@@ -413,12 +423,16 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
#endif
unsigned long err = 0;
unsigned long msr;
+ struct pt_regs *regs = tsk->thread.regs;
#ifdef CONFIG_VSX
int i;
#endif
+
+ BUG_ON(tsk != current);
+
/* copy the GPRs */
err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
- err |= __copy_from_user(&current->thread.ckpt_regs, sc->gp_regs,
+ err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
sizeof(regs->gpr));
/*
@@ -430,7 +444,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
* we don't need to re-copy them here.
*/
err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]);
- err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
+ err |= __get_user(tsk->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
@@ -449,13 +463,13 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]);
err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]);
err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]);
- err |= __get_user(current->thread.ckpt_regs.ctr,
+ err |= __get_user(tsk->thread.ckpt_regs.ctr,
&sc->gp_regs[PT_CTR]);
- err |= __get_user(current->thread.ckpt_regs.link,
+ err |= __get_user(tsk->thread.ckpt_regs.link,
&sc->gp_regs[PT_LNK]);
- err |= __get_user(current->thread.ckpt_regs.xer,
+ err |= __get_user(tsk->thread.ckpt_regs.xer,
&sc->gp_regs[PT_XER]);
- err |= __get_user(current->thread.ckpt_regs.ccr,
+ err |= __get_user(tsk->thread.ckpt_regs.ccr,
&sc->gp_regs[PT_CCR]);
/* These regs are not checkpointed; they can go in 'regs'. */
@@ -466,7 +480,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
/*
* Force reload of FP/VEC.
- * This has to be done before copying stuff into current->thread.fpr/vr
+ * This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
@@ -483,32 +497,33 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
- err |= __copy_from_user(&current->thread.vr_state, v_regs,
+ err |= __copy_from_user(&tsk->thread.ckvr_state, v_regs,
33 * sizeof(vector128));
- err |= __copy_from_user(&current->thread.transact_vr, tm_v_regs,
+ err |= __copy_from_user(&tsk->thread.vr_state, tm_v_regs,
33 * sizeof(vector128));
+ current->thread.used_vr = true;
}
- else if (current->thread.used_vr) {
- memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
- memset(&current->thread.transact_vr, 0, 33 * sizeof(vector128));
+ else if (tsk->thread.used_vr) {
+ memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+ memset(&tsk->thread.ckvr_state, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
if (v_regs != NULL && tm_v_regs != NULL) {
- err |= __get_user(current->thread.vrsave,
+ err |= __get_user(tsk->thread.ckvrsave,
(u32 __user *)&v_regs[33]);
- err |= __get_user(current->thread.transact_vrsave,
+ err |= __get_user(tsk->thread.vrsave,
(u32 __user *)&tm_v_regs[33]);
}
else {
- current->thread.vrsave = 0;
- current->thread.transact_vrsave = 0;
+ tsk->thread.vrsave = 0;
+ tsk->thread.ckvrsave = 0;
}
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
+ mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(current, &sc->fp_regs);
- err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs);
+ err |= copy_fpr_from_user(tsk, &tm_sc->fp_regs);
+ err |= copy_ckfpr_from_user(tsk, &sc->fp_regs);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -518,32 +533,31 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
if (v_regs && ((msr & MSR_VSX) != 0)) {
v_regs += ELF_NVRREG;
tm_v_regs += ELF_NVRREG;
- err |= copy_vsx_from_user(current, v_regs);
- err |= copy_transact_vsx_from_user(current, tm_v_regs);
+ err |= copy_vsx_from_user(tsk, tm_v_regs);
+ err |= copy_ckvsx_from_user(tsk, v_regs);
+ tsk->thread.used_vsr = true;
} else {
for (i = 0; i < 32 ; i++) {
- current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
- current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
}
#endif
tm_enable();
/* Make sure the transaction is marked as failed */
- current->thread.tm_texasr |= TEXASR_FS;
+ tsk->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
+ tm_recheckpoint(&tsk->thread, msr);
- /* This loads the speculative FP/VEC state, if used */
+ msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ load_fp_state(&tsk->thread.fp_state);
+ regs->msr |= (MSR_FP | tsk->thread.fpexc_mode);
}
-#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
+ load_vr_state(&tsk->thread.vr_state);
regs->msr |= MSR_VEC;
}
-#endif
return err;
}
@@ -594,6 +608,8 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
unsigned long new_msr = 0;
int ctx_has_vsx_region = 0;
+ BUG_ON(regs != current->thread.regs);
+
if (new_ctx &&
get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))
return -EFAULT;
@@ -616,7 +632,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
if (old_ctx != NULL) {
if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
- || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0,
+ || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
ctx_has_vsx_region)
|| __copy_to_user(&old_ctx->uc_sigmask,
&current->blocked, sizeof(sigset_t)))
@@ -644,7 +660,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
do_exit(SIGSEGV);
set_current_blocked(&set);
- if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
+ if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
do_exit(SIGSEGV);
/* This returns like rt_sigreturn */
@@ -667,6 +683,8 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
unsigned long msr;
#endif
+ BUG_ON(current->thread.regs != regs);
+
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -676,7 +694,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * If there is a transactional state then throw it away.
+ * The purpose of a sigreturn is to destroy all traces of the
+ * signal frame, this includes any transactional state created
+ * within in. We only check for suspended as we can never be
+ * active in the kernel, we are active, there is nothing better to
+ * do than go ahead and Bad Thing later.
+ * The cause is not important as there will never be a
+ * recheckpoint so it's not user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
@@ -684,14 +716,14 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
struct ucontext __user *uc_transact;
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
- if (restore_tm_sigcontexts(regs, &uc->uc_mcontext,
+ if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
}
else
/* Fall through, for non-TM restore */
#endif
- if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
+ if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
goto badframe;
if (restore_altstack(&uc->uc_stack))
@@ -710,13 +742,17 @@ badframe:
return 0;
}
-int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
+ struct task_struct *tsk)
{
struct rt_sigframe __user *frame;
unsigned long newsp = 0;
long err = 0;
+ struct pt_regs *regs = tsk->thread.regs;
+
+ BUG_ON(tsk != current);
- frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0);
+ frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0);
if (unlikely(frame == NULL))
goto badframe;
@@ -737,14 +773,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
- regs, ksig->sig,
- NULL,
+ tsk, ksig->sig, NULL,
(unsigned long)ksig->ka.sa.sa_handler);
} else
#endif
{
err |= __put_user(0, &frame->uc.uc_link);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig,
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
NULL, (unsigned long)ksig->ka.sa.sa_handler,
1);
}
@@ -753,11 +788,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
goto badframe;
/* Make sure signal handler doesn't get spurious FP exceptions */
- current->thread.fp_state.fpscr = 0;
+ tsk->thread.fp_state.fpscr = 0;
/* Set up to return from userspace. */
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
- regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp;
+ if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
+ regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
} else {
err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
if (err)
@@ -807,7 +842,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
badframe:
if (show_unhandled_signals)
printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- current->comm, current->pid, "setup_rt_frame",
+ tsk->comm, tsk->pid, "setup_rt_frame",
(long)frame, regs->nip, regs->link);
return 1;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 25a39052bf6b..9c6f3fd58059 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -830,7 +830,7 @@ int __cpu_disable(void)
/* Update sibling maps */
base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
+ for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 5fa92706444b..644cce3d8dce 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -40,6 +40,7 @@
#include <asm/syscalls.h>
#include <asm/time.h>
#include <asm/unistd.h>
+#include <asm/asm-prototypes.h>
static inline unsigned long do_mmap2(unsigned long addr, size_t len,
unsigned long prot, unsigned long flags,
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3efbedefba6a..67859b7d1c97 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -73,6 +73,7 @@
#include <asm/vdso_datapage.h>
#include <asm/firmware.h>
#include <asm/cputime.h>
+#include <asm/asm-prototypes.h>
/* powerpc clocksource/clockevent code */
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 298afcf3bf2a..3a2d04134da9 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -108,6 +108,7 @@ _GLOBAL(tm_reclaim)
/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
std r3, STK_PARAM(R3)(r1)
+ std r4, STK_PARAM(R4)(r1)
SAVE_NVGPRS(r1)
/* We need to setup MSR for VSX register save instructions. */
@@ -126,43 +127,6 @@ _GLOBAL(tm_reclaim)
mtmsrd r15
std r14, TM_FRAME_L0(r1)
- /* Stash the stack pointer away for use after reclaim */
- std r1, PACAR1(r13)
-
- /* ******************** FPR/VR/VSRs ************
- * Before reclaiming, capture the current/transactional FPR/VR
- * versions /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as parameter 2.
- *
- * We enabled VEC/FP/VSX in the msr above, so we can execute these
- * instructions!
- */
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
-
- addi r7, r3, THREAD_TRANSACT_VRSTATE
- SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
- mfvscr v0
- li r6, VRSTATE_VSCR
- stvx v0, r7, r6
-dont_backup_vec:
- mfspr r0, SPRN_VRSAVE
- std r0, THREAD_TRANSACT_VRSAVE(r3)
-
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
- addi r7, r3, THREAD_TRANSACT_FPSTATE
- SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
- mffs fr0
- stfd fr0,FPSTATE_FPSCR(r7)
-
-dont_backup_fp:
/* Do sanity check on MSR to make sure we are suspended */
li r7, (MSR_TS_S)@higher
srdi r6, r14, 32
@@ -170,6 +134,9 @@ dont_backup_fp:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+ /* Stash the stack pointer away for use after reclaim */
+ std r1, PACAR1(r13)
+
/* Clear MSR RI since we are about to change r1, EE is already off. */
li r4, 0
mtmsrd r4, 1
@@ -273,6 +240,43 @@ dont_backup_fp:
* MSR.
*/
+
+ /* ******************** FPR/VR/VSRs ************
+ * After reclaiming, capture the checkpointed FPRs/VRs /if used/.
+ *
+ * (If VSX used, FP and VMX are implied. Or, we don't need to look
+ * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
+ *
+ * We're passed the thread's MSR as the second parameter
+ *
+ * We enabled VEC/FP/VSX in the msr above, so we can execute these
+ * instructions!
+ */
+ ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */
+ mr r3, r12
+ andis. r0, r4, MSR_VEC@h
+ beq dont_backup_vec
+
+ addi r7, r3, THREAD_CKVRSTATE
+ SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
+ mfvscr v0
+ li r6, VRSTATE_VSCR
+ stvx v0, r7, r6
+dont_backup_vec:
+ mfspr r0, SPRN_VRSAVE
+ std r0, THREAD_CKVRSAVE(r3)
+
+ andi. r0, r4, MSR_FP
+ beq dont_backup_fp
+
+ addi r7, r3, THREAD_CKFPSTATE
+ SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
+
+ mffs fr0
+ stfd fr0,FPSTATE_FPSCR(r7)
+
+dont_backup_fp:
+
/* TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
* cause (aborted).
@@ -288,6 +292,7 @@ dont_backup_fp:
/* Restore original MSR/IRQ state & clear TM mode */
ld r14, TM_FRAME_L0(r1) /* Orig MSR */
+
li r15, 0
rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1
mtmsrd r14
@@ -356,28 +361,29 @@ _GLOBAL(__tm_recheckpoint)
mtmsr r5
#ifdef CONFIG_ALTIVEC
- /* FP and VEC registers: These are recheckpointed from thread.fpr[]
- * and thread.vr[] respectively. The thread.transact_fpr[] version
- * is more modern, and will be loaded subsequently by any FPUnavailable
- * trap.
+ /*
+ * FP and VEC registers: These are recheckpointed from
+ * thread.ckfp_state and thread.ckvr_state respectively. The
+ * thread.fp_state[] version holds the 'live' (transactional)
+ * and will be loaded subsequently by any FPUnavailable trap.
*/
andis. r0, r4, MSR_VEC@h
beq dont_restore_vec
- addi r8, r3, THREAD_VRSTATE
+ addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
lvx v0, r8, r5
mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
dont_restore_vec:
- ld r5, THREAD_VRSAVE(r3)
+ ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
andi. r0, r4, MSR_FP
beq dont_restore_fp
- addi r8, r3, THREAD_FPSTATE
+ addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 2cb589264cb7..a1f8f5641e9e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -25,7 +25,8 @@
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/module.h> /* print_modules */
#include <linux/prctl.h>
#include <linux/delay.h>
#include <linux/kprobes.h>
@@ -116,7 +117,7 @@ static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
-static unsigned __kprobes long oops_begin(struct pt_regs *regs)
+static unsigned long oops_begin(struct pt_regs *regs)
{
int cpu;
unsigned long flags;
@@ -143,8 +144,9 @@ static unsigned __kprobes long oops_begin(struct pt_regs *regs)
pmac_backlight_unblank();
return flags;
}
+NOKPROBE_SYMBOL(oops_begin);
-static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+static void oops_end(unsigned long flags, struct pt_regs *regs,
int signr)
{
bust_spinlocks(0);
@@ -195,8 +197,9 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
panic("Fatal exception");
do_exit(signr);
}
+NOKPROBE_SYMBOL(oops_end);
-static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
#ifdef CONFIG_PREEMPT
@@ -220,6 +223,7 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
return 0;
}
+NOKPROBE_SYMBOL(__die);
void die(const char *str, struct pt_regs *regs, long err)
{
@@ -801,7 +805,7 @@ void RunModeException(struct pt_regs *regs)
_exception(SIGTRAP, regs, 0, 0);
}
-void __kprobes single_step_exception(struct pt_regs *regs)
+void single_step_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
@@ -818,6 +822,7 @@ void __kprobes single_step_exception(struct pt_regs *regs)
bail:
exception_exit(prev_state);
}
+NOKPROBE_SYMBOL(single_step_exception);
/*
* After we have successfully emulated an instruction, we have to
@@ -1139,7 +1144,7 @@ static int emulate_math(struct pt_regs *regs)
static inline int emulate_math(struct pt_regs *regs) { return -1; }
#endif
-void __kprobes program_check_exception(struct pt_regs *regs)
+void program_check_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
@@ -1259,16 +1264,18 @@ sigill:
bail:
exception_exit(prev_state);
}
+NOKPROBE_SYMBOL(program_check_exception);
/*
* This occurs when running in hypervisor mode on POWER6 or later
* and an illegal instruction is encountered.
*/
-void __kprobes emulation_assist_interrupt(struct pt_regs *regs)
+void emulation_assist_interrupt(struct pt_regs *regs)
{
regs->msr |= REASON_ILLEGAL;
program_check_exception(regs);
}
+NOKPROBE_SYMBOL(emulation_assist_interrupt);
void alignment_exception(struct pt_regs *regs)
{
@@ -1309,6 +1316,18 @@ bail:
exception_exit(prev_state);
}
+void slb_miss_bad_addr(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ else
+ bad_page_fault(regs, regs->dar, SIGSEGV);
+
+ exception_exit(prev_state);
+}
+
void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
@@ -1371,6 +1390,22 @@ void vsx_unavailable_exception(struct pt_regs *regs)
}
#ifdef CONFIG_PPC64
+static void tm_unavailable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (user_mode(regs)) {
+ current->thread.load_tm++;
+ regs->msr |= MSR_TM;
+ tm_enable();
+ tm_restore_sprs(&current->thread);
+ return;
+ }
+#endif
+ pr_emerg("Unrecoverable TM Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
+}
+
void facility_unavailable_exception(struct pt_regs *regs)
{
static char *facility_strings[] = {
@@ -1450,6 +1485,27 @@ void facility_unavailable_exception(struct pt_regs *regs)
return;
}
+ if (status == FSCR_TM_LG) {
+ /*
+ * If we're here then the hardware is TM aware because it
+ * generated an exception with FSRM_TM set.
+ *
+ * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
+ * told us not to do TM, or the kernel is not built with TM
+ * support.
+ *
+ * If both of those things are true, then userspace can spam the
+ * console by triggering the printk() below just by continually
+ * doing tbegin (or any TM instruction). So in that case just
+ * send the process a SIGILL immediately.
+ */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto out;
+
+ tm_unavailable(regs);
+ return;
+ }
+
if ((status < ARRAY_SIZE(facility_strings)) &&
facility_strings[status])
facility = facility_strings[status];
@@ -1462,6 +1518,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
"%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
+out:
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return;
@@ -1503,7 +1560,8 @@ void fp_unavailable_tm(struct pt_regs *regs)
/* If VMX is in use, get the transactional values back */
if (regs->msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
+ msr_check_and_set(MSR_VEC);
+ load_vr_state(&current->thread.vr_state);
/* At this point all the VSX state is loaded, so enable it */
regs->msr |= MSR_VSX;
}
@@ -1524,7 +1582,8 @@ void altivec_unavailable_tm(struct pt_regs *regs)
current->thread.used_vr = 1;
if (regs->msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
+ msr_check_and_set(MSR_FP);
+ load_fp_state(&current->thread.fp_state);
regs->msr |= MSR_VSX;
}
}
@@ -1563,10 +1622,12 @@ void vsx_unavailable_tm(struct pt_regs *regs)
*/
tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
+ msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC));
+
if (orig_msr & MSR_FP)
- do_load_up_transact_fpu(&current->thread);
+ load_fp_state(&current->thread.fp_state);
if (orig_msr & MSR_VEC)
- do_load_up_transact_altivec(&current->thread);
+ load_vr_state(&current->thread.vr_state);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
@@ -1655,7 +1716,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
}
-void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
+void DebugException(struct pt_regs *regs, unsigned long debug_status)
{
current->thread.debug.dbsr = debug_status;
@@ -1716,6 +1777,7 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
} else
handle_debug(regs, debug_status);
}
+NOKPROBE_SYMBOL(DebugException);
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
#if !defined(CONFIG_TAU_INT)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 6767605ea8da..4111d30badfa 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
#include <linux/security.h>
#include <linux/memblock.h>
+#include <asm/cpu_has_feature.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index cbabd143acae..78a7449bf489 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso32ld)
# strip rule for the .so file
@@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+ cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index c710802b8fb6..31107bf5a61f 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
$(call if_changed,vdso64ld)
# strip rule for the .so file
@@ -31,15 +31,9 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
-# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
- $(call if_changed_dep,vdso64as)
-
# actual build commands
quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
-quiet_cmd_vdso64as = VDSO64A $@
- cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+ cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index 184a6ba7f283..abf17feffe40 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -59,7 +59,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
bl V_LOCAL_FUNC(__get_datapage)
mtlr r12
addi r3,r3,CFG_SYSCALL_MAP64
- cmpli cr0,r4,0
+ cmpldi cr0,r4,0
crclr cr0*4+so
beqlr
li r0,NR_syscalls
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index a76b4af37ef2..382021324883 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -145,7 +145,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
bne cr0,99f
li r3,0
- cmpli cr0,r4,0
+ cmpldi cr0,r4,0
crclr cr0*4+so
beqlr
lis r5,CLOCK_REALTIME_RES@h
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 616a6d854638..bc85bdff4e01 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -7,31 +7,6 @@
#include <asm/page.h>
#include <asm/ptrace.h>
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_altivec(struct thread_struct *thread)
- *
- * This is similar to load_up_altivec but for the transactional version of the
- * vector regs. It doesn't mess with the task MSR or valid flags.
- * Furthermore, VEC laziness is not supported with TM currently.
- */
-_GLOBAL(do_load_up_transact_altivec)
- mfmsr r6
- oris r5,r6,MSR_VEC@h
- MTMSRD(r5)
- isync
-
- li r4,1
- stw r4,THREAD_USED_VR(r3)
-
- li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
- lvx v0,r10,r3
- mtvscr v0
- addi r10,r3,THREAD_TRANSACT_VRSTATE
- REST_32VRS(0,r4,r10)
-
- blr
-#endif
-
/*
* Load state from memory into VMX registers including VSCR.
* Assumes the caller has enabled VMX in the MSR.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b5fba689fca6..8295f51c1a5f 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -44,18 +44,68 @@ SECTIONS
* Text, read only data and other permanent read-only sections
*/
- /* Text and gots */
+ _text = .;
+ _stext = .;
+
+ /*
+ * Head text.
+ * This needs to be in its own output section to avoid ld placing
+ * branch trampoline stubs randomly throughout the fixed sections,
+ * which it will do (even if the branch comes from another section)
+ * in order to optimize stub generation.
+ */
+ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
+#ifdef CONFIG_PPC64
+ KEEP(*(.head.text.first_256B));
+#ifdef CONFIG_PPC_BOOK3E
+# define END_FIXED 0x100
+#else
+ KEEP(*(.head.text.real_vectors));
+ *(.head.text.real_trampolines);
+ KEEP(*(.head.text.virt_vectors));
+ *(.head.text.virt_trampolines);
+# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+ KEEP(*(.head.data.fwnmi_page));
+# define END_FIXED 0x8000
+# else
+# define END_FIXED 0x7000
+# endif
+#endif
+ ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error");
+#else /* !CONFIG_PPC64 */
+ HEAD_TEXT
+#endif
+ } :kernel
+
+ /*
+ * If the build dies here, it's likely code in head_64.S is referencing
+ * labels it can't reach, and the linker inserting stubs without the
+ * assembler's knowledge. To debug, remove the above assert and
+ * rebuild. Look for branch stubs in the fixed section region.
+ *
+ * Linker stub generation could be allowed in "trampoline"
+ * sections if absolutely necessary, but this would require
+ * some rework of the fixed sections. Before resorting to this,
+ * consider references that have sufficient addressing range,
+ * (e.g., hand coded trampolines) so the linker does not have
+ * to add stubs.
+ *
+ * Linker stubs at the top of the main text section are currently not
+ * detected, and will result in a crash at boot due to offsets being
+ * wrong.
+ */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
- HEAD_TEXT
- _text = .;
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text .fixup __ftr_alt_* .ref.text)
SCHED_TEXT
+ CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ MEM_KEEP(init.text)
+ MEM_KEEP(exit.text)
#ifdef CONFIG_PPC32
*(.got1)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c2024ac9d4e8..029be26b5a17 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,9 @@ config KVM
select ANON_INODES
select HAVE_KVM_EVENTFD
select SRCU
+ select KVM_VFIO
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1f9e5529e692..7dd89b79d038 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
+common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I.
CFLAGS_emulate.o := -I.
CFLAGS_emulate_loadstore.o := -I.
-common-objs-y += powerpc.o emulate.o emulate_loadstore.o
+common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
@@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
kvm-e500-objs := \
$(common-objs-y) \
+ emulate.o \
booke.o \
booke_emulate.o \
booke_interrupts.o \
@@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
kvm-e500mc-objs := \
$(common-objs-y) \
+ emulate.o \
booke.o \
booke_emulate.o \
bookehv_interrupts.o \
@@ -61,9 +63,6 @@ kvm-pr-y := \
book3s_32_mmu.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-kvm-book3s_64-module-objs := \
- $(KVM)/coalesced_mmio.o
-
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_rmhandlers.o
endif
@@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_hv_hmi.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
@@ -88,11 +88,8 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-module-objs += \
- $(KVM)/kvm_main.o \
- $(KVM)/eventfd.o \
- powerpc.o \
- emulate_loadstore.o \
+kvm-book3s_64-module-objs := \
+ $(common-objs-y) \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
@@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
kvm-book3s_32-objs := \
$(common-objs-y) \
+ emulate.o \
fpu.o \
book3s_paired_singles.o \
book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 47018fcbf7d6..b6952dd23152 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "dec", VCPU_STAT(dec_exits) },
{ "ext_intr", VCPU_STAT(ext_intr_exits) },
{ "queue_intr", VCPU_STAT(queue_intr) },
+ { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) },
+ { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) },
+ { "halt_wait_ns", VCPU_STAT(halt_wait_ns) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
+ { "halt_successful_wait", VCPU_STAT(halt_successful_wait) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "pf_storage", VCPU_STAT(pf_storage) },
@@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "ld_slow", VCPU_STAT(ld_slow) },
{ "st", VCPU_STAT(st) },
{ "st_slow", VCPU_STAT(st_slow) },
+ { "pthru_all", VCPU_STAT(pthru_all) },
+ { "pthru_host", VCPU_STAT(pthru_host) },
+ { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
{ NULL }
};
@@ -592,9 +599,6 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_BESCR:
*val = get_reg_val(id, vcpu->arch.bescr);
break;
- case KVM_REG_PPC_VTB:
- *val = get_reg_val(id, vcpu->arch.vtb);
- break;
case KVM_REG_PPC_IC:
*val = get_reg_val(id, vcpu->arch.ic);
break;
@@ -666,9 +670,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_BESCR:
vcpu->arch.bescr = set_reg_val(id, *val);
break;
- case KVM_REG_PPC_VTB:
- vcpu->arch.vtb = set_reg_val(id, *val);
- break;
case KVM_REG_PPC_IC:
vcpu->arch.ic = set_reg_val(id, *val);
break;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 2afdb9c0937d..8359752b3efc 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -498,6 +498,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_MMCR0:
case SPRN_MMCR1:
case SPRN_MMCR2:
+ case SPRN_UMMCR2:
#endif
break;
unprivileged:
@@ -579,7 +580,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
*spr_val = vcpu->arch.spurr;
break;
case SPRN_VTB:
- *spr_val = vcpu->arch.vtb;
+ *spr_val = to_book3s(vcpu)->vtb;
break;
case SPRN_IC:
*spr_val = vcpu->arch.ic;
@@ -640,6 +641,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
case SPRN_MMCR0:
case SPRN_MMCR1:
case SPRN_MMCR2:
+ case SPRN_UMMCR2:
case SPRN_TIR:
#endif
*spr_val = 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2fd5580c8f6e..3686471be32b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -53,11 +53,15 @@
#include <asm/smp.h>
#include <asm/dbell.h>
#include <asm/hmi.h>
+#include <asm/pnv-pci.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
#include <linux/module.h>
+#include <linux/compiler.h>
#include "book3s.h"
@@ -70,6 +74,8 @@
/* Used to indicate that a guest page fault needs to be handled */
#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
/* Used as a "null" value for timebase values */
#define TB_NIL (~(u64)0)
@@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = {
.get = param_get_int,
};
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
+static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
+module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
+
+/* Factor by which the vcore halt poll interval is grown, default is to double
+ */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
+
+/* Factor by which the vcore halt poll interval is shrunk, default is to reset
+ */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+ int *ip)
+{
+ int i = *ip;
+ struct kvm_vcpu *vcpu;
+
+ while (++i < MAX_SMT_THREADS) {
+ vcpu = READ_ONCE(vc->runnable_threads[i]);
+ if (vcpu) {
+ *ip = i;
+ return vcpu;
+ }
+ }
+ return NULL;
+}
+
+/* Used to traverse the list of runnable threads for a given vcore */
+#define for_each_runnable_thread(i, vcpu, vc) \
+ for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
+
static bool kvmppc_ipi_thread(int cpu)
{
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
@@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
+ r = RESUME_PASSTHROUGH;
+ break;
default:
kvmppc_dump_regs(vcpu);
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
@@ -1149,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DPDES:
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, vcpu->arch.vcore->vtb);
+ break;
case KVM_REG_PPC_DAWR:
*val = get_reg_val(id, vcpu->arch.dawr);
break;
@@ -1341,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_DPDES:
vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_VTB:
+ vcpu->arch.vcore->vtb = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_DAWR:
vcpu->arch.dawr = set_reg_val(id, *val);
break;
@@ -1493,7 +1549,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
if (vcore == NULL)
return NULL;
- INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
init_swait_queue_head(&vcore->wq);
@@ -1802,7 +1857,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
- list_del(&vcpu->arch.run_list);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
}
static int kvmppc_grab_hwthread(int cpu)
@@ -2048,66 +2103,6 @@ static void init_master_vcore(struct kvmppc_vcore *vc)
vc->conferring_threads = 0;
}
-/*
- * See if the existing subcores can be split into 3 (or fewer) subcores
- * of at most two threads each, so we can fit in another vcore. This
- * assumes there are at most two subcores and at most 6 threads in total.
- */
-static bool can_split_piggybacked_subcores(struct core_info *cip)
-{
- int sub, new_sub;
- int large_sub = -1;
- int thr;
- int n_subcores = cip->n_subcores;
- struct kvmppc_vcore *vc, *vcnext;
- struct kvmppc_vcore *master_vc = NULL;
-
- for (sub = 0; sub < cip->n_subcores; ++sub) {
- if (cip->subcore_threads[sub] <= 2)
- continue;
- if (large_sub >= 0)
- return false;
- large_sub = sub;
- vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
- preempt_list);
- if (vc->num_threads > 2)
- return false;
- n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
- }
- if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2))
- return false;
-
- /*
- * Seems feasible, so go through and move vcores to new subcores.
- * Note that when we have two or more vcores in one subcore,
- * all those vcores must have only one thread each.
- */
- new_sub = cip->n_subcores;
- thr = 0;
- sub = large_sub;
- list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
- if (thr >= 2) {
- list_del(&vc->preempt_list);
- list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
- /* vc->num_threads must be 1 */
- if (++cip->subcore_threads[new_sub] == 1) {
- cip->subcore_vm[new_sub] = vc->kvm;
- init_master_vcore(vc);
- master_vc = vc;
- ++cip->n_subcores;
- } else {
- vc->master_vcore = master_vc;
- ++new_sub;
- }
- }
- thr += vc->num_threads;
- }
- cip->subcore_threads[large_sub] = 2;
- cip->max_subcore_threads = 2;
-
- return true;
-}
-
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
{
int n_threads = vc->num_threads;
@@ -2118,23 +2113,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (n_threads < cip->max_subcore_threads)
n_threads = cip->max_subcore_threads;
- if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
- cip->max_subcore_threads = n_threads;
- } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
- vc->num_threads <= 2) {
- /*
- * We may be able to fit another subcore in by
- * splitting an existing subcore with 3 or 4
- * threads into two 2-thread subcores, or one
- * with 5 or 6 threads into three subcores.
- * We can only do this if those subcores have
- * piggybacked virtual cores.
- */
- if (!can_split_piggybacked_subcores(cip))
- return false;
- } else {
+ if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
return false;
- }
+ cip->max_subcore_threads = n_threads;
sub = cip->n_subcores;
++cip->n_subcores;
@@ -2148,43 +2129,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
return true;
}
-static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
- struct core_info *cip, int sub)
-{
- struct kvmppc_vcore *vc;
- int n_thr;
-
- vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
- preempt_list);
-
- /* require same VM and same per-core reg values */
- if (pvc->kvm != vc->kvm ||
- pvc->tb_offset != vc->tb_offset ||
- pvc->pcr != vc->pcr ||
- pvc->lpcr != vc->lpcr)
- return false;
-
- /* P8 guest with > 1 thread per core would see wrong TIR value */
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- (vc->num_threads > 1 || pvc->num_threads > 1))
- return false;
-
- n_thr = cip->subcore_threads[sub] + pvc->num_threads;
- if (n_thr > cip->max_subcore_threads) {
- if (!subcore_config_ok(cip->n_subcores, n_thr))
- return false;
- cip->max_subcore_threads = n_thr;
- }
-
- cip->total_threads += pvc->num_threads;
- cip->subcore_threads[sub] = n_thr;
- pvc->master_vcore = vc;
- list_del(&pvc->preempt_list);
- list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
-
- return true;
-}
-
/*
* Work out whether it is possible to piggyback the execution of
* vcore *pvc onto the execution of the other vcores described in *cip.
@@ -2192,27 +2136,18 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
int target_threads)
{
- int sub;
-
if (cip->total_threads + pvc->num_threads > target_threads)
return false;
- for (sub = 0; sub < cip->n_subcores; ++sub)
- if (cip->subcore_threads[sub] &&
- can_piggyback_subcore(pvc, cip, sub))
- return true;
-
- if (can_dynamic_split(pvc, cip))
- return true;
- return false;
+ return can_dynamic_split(pvc, cip);
}
static void prepare_threads(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
+ int i;
+ struct kvm_vcpu *vcpu;
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
else if (vcpu->arch.vpa.update_pending ||
@@ -2259,15 +2194,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
- int still_running = 0;
+ int still_running = 0, i;
u64 now;
long ret;
- struct kvm_vcpu *vcpu, *vnext;
+ struct kvm_vcpu *vcpu;
spin_lock(&vc->lock);
now = get_tb();
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
@@ -2307,8 +2241,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
}
if (vc->n_runnable > 0 && vc->runner == NULL) {
/* make sure there's a candidate runner awake */
- vcpu = list_first_entry(&vc->runnable_threads,
- struct kvm_vcpu, arch.run_list);
+ i = -1;
+ vcpu = next_runnable_thread(vc, &i);
wake_up(&vcpu->arch.cpu_run);
}
}
@@ -2361,7 +2295,7 @@ static inline void kvmppc_set_host_core(int cpu)
*/
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
+ struct kvm_vcpu *vcpu;
int i;
int srcu_idx;
struct core_info core_info;
@@ -2397,8 +2331,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
@@ -2477,8 +2410,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
active |= 1 << thr;
list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
pvc->pcpu = pcpu + thr;
- list_for_each_entry(vcpu, &pvc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
@@ -2604,34 +2536,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
finish_wait(&vcpu->arch.cpu_run, &wait);
}
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ /* 10us base */
+ if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
+ vc->halt_poll_ns = 10000;
+ else
+ vc->halt_poll_ns *= halt_poll_ns_grow;
+
+ if (vc->halt_poll_ns > halt_poll_max_ns)
+ vc->halt_poll_ns = halt_poll_max_ns;
+}
+
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ if (halt_poll_ns_shrink == 0)
+ vc->halt_poll_ns = 0;
+ else
+ vc->halt_poll_ns /= halt_poll_ns_shrink;
+}
+
+/* Check to see if any of the runnable vcpus on the vcore have pending
+ * exceptions or are no longer ceded
+ */
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ for_each_runnable_thread(i, vcpu, vc) {
+ if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* All the vcpus in this vcore are idle, so wait for a decrementer
* or external interrupt to one of the vcpus. vc->lock is held.
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu;
+ ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
+ u64 block_ns;
DECLARE_SWAITQUEUE(wait);
- prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ /* Poll for pending exceptions and ceded state */
+ cur = start_poll = ktime_get();
+ if (vc->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
+ ++vc->runner->stat.halt_attempted_poll;
- /*
- * Check one last time for pending exceptions and ceded state after
- * we put ourselves on the wait queue
- */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
- do_sleep = 0;
- break;
+ vc->vcore_state = VCORE_POLLING;
+ spin_unlock(&vc->lock);
+
+ do {
+ if (kvmppc_vcore_check_block(vc)) {
+ do_sleep = 0;
+ break;
+ }
+ cur = ktime_get();
+ } while (single_task_running() && ktime_before(cur, stop));
+
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+
+ if (!do_sleep) {
+ ++vc->runner->stat.halt_successful_poll;
+ goto out;
}
}
- if (!do_sleep) {
+ prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+ if (kvmppc_vcore_check_block(vc)) {
finish_swait(&vc->wq, &wait);
- return;
+ do_sleep = 0;
+ /* If we polled, count this as a successful poll */
+ if (vc->halt_poll_ns)
+ ++vc->runner->stat.halt_successful_poll;
+ goto out;
}
+ start_wait = ktime_get();
+
vc->vcore_state = VCORE_SLEEPING;
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
@@ -2640,13 +2630,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
+ ++vc->runner->stat.halt_successful_wait;
+
+ cur = ktime_get();
+
+out:
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
+
+ /* Attribute wait time */
+ if (do_sleep) {
+ vc->runner->stat.halt_wait_ns +=
+ ktime_to_ns(cur) - ktime_to_ns(start_wait);
+ /* Attribute failed poll time */
+ if (vc->halt_poll_ns)
+ vc->runner->stat.halt_poll_fail_ns +=
+ ktime_to_ns(start_wait) -
+ ktime_to_ns(start_poll);
+ } else {
+ /* Attribute successful poll time */
+ if (vc->halt_poll_ns)
+ vc->runner->stat.halt_poll_success_ns +=
+ ktime_to_ns(cur) -
+ ktime_to_ns(start_poll);
+ }
+
+ /* Adjust poll time */
+ if (halt_poll_max_ns) {
+ if (block_ns <= vc->halt_poll_ns)
+ ;
+ /* We slept and blocked for longer than the max halt time */
+ else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
+ shrink_halt_poll_ns(vc);
+ /* We slept and our poll time is too small */
+ else if (vc->halt_poll_ns < halt_poll_max_ns &&
+ block_ns < halt_poll_max_ns)
+ grow_halt_poll_ns(vc);
+ } else
+ vc->halt_poll_ns = 0;
+
+ trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
}
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
- int n_ceded;
+ int n_ceded, i;
struct kvmppc_vcore *vc;
- struct kvm_vcpu *v, *vn;
+ struct kvm_vcpu *v;
trace_kvmppc_run_vcpu_enter(vcpu);
@@ -2666,7 +2695,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
- list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
++vc->n_runnable;
/*
@@ -2706,8 +2735,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
continue;
}
- list_for_each_entry_safe(v, vn, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
@@ -2720,7 +2748,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
break;
n_ceded = 0;
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+ for_each_runnable_thread(i, v, vc) {
if (!v->arch.pending_exceptions)
n_ceded += v->arch.ceded;
else
@@ -2759,8 +2787,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
/* Wake up some vcpu to run the core */
- v = list_first_entry(&vc->runnable_threads,
- struct kvm_vcpu, arch.run_list);
+ i = -1;
+ v = next_runnable_thread(vc, &i);
wake_up(&v->arch.cpu_run);
}
@@ -2818,7 +2846,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = kvmppc_book3s_hv_page_fault(run, vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
- }
+ } else if (r == RESUME_PASSTHROUGH)
+ r = kvmppc_xics_rm_complete(vcpu, 0);
} while (is_kvmppc_resume_guest(r));
out:
@@ -3247,6 +3276,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmppc_free_vcores(kvm);
kvmppc_free_hpt(kvm);
+
+ kvmppc_free_pimap(kvm);
}
/* We don't need to emulate any privileged instructions or dcbz */
@@ -3282,6 +3313,184 @@ static int kvmppc_core_check_processor_compat_hv(void)
return 0;
}
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+ kfree(kvm->arch.pimap);
+}
+
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
+{
+ return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+}
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+ struct irq_desc *desc;
+ struct kvmppc_irq_map *irq_map;
+ struct kvmppc_passthru_irqmap *pimap;
+ struct irq_chip *chip;
+ int i;
+
+ if (!kvm_irq_bypass)
+ return 1;
+
+ desc = irq_to_desc(host_irq);
+ if (!desc)
+ return -EIO;
+
+ mutex_lock(&kvm->lock);
+
+ pimap = kvm->arch.pimap;
+ if (pimap == NULL) {
+ /* First call, allocate structure to hold IRQ map */
+ pimap = kvmppc_alloc_pimap();
+ if (pimap == NULL) {
+ mutex_unlock(&kvm->lock);
+ return -ENOMEM;
+ }
+ kvm->arch.pimap = pimap;
+ }
+
+ /*
+ * For now, we only support interrupts for which the EOI operation
+ * is an OPAL call followed by a write to XIRR, since that's
+ * what our real-mode EOI code does.
+ */
+ chip = irq_data_get_irq_chip(&desc->irq_data);
+ if (!chip || !is_pnv_opal_msi(chip)) {
+ pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
+ host_irq, guest_gsi);
+ mutex_unlock(&kvm->lock);
+ return -ENOENT;
+ }
+
+ /*
+ * See if we already have an entry for this guest IRQ number.
+ * If it's mapped to a hardware IRQ number, that's an error,
+ * otherwise re-use this entry.
+ */
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq) {
+ if (pimap->mapped[i].r_hwirq) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
+ if (i == KVMPPC_PIRQ_MAPPED) {
+ mutex_unlock(&kvm->lock);
+ return -EAGAIN; /* table is full */
+ }
+
+ irq_map = &pimap->mapped[i];
+
+ irq_map->v_hwirq = guest_gsi;
+ irq_map->desc = desc;
+
+ /*
+ * Order the above two stores before the next to serialize with
+ * the KVM real mode handler.
+ */
+ smp_wmb();
+ irq_map->r_hwirq = desc->irq_data.hwirq;
+
+ if (i == pimap->n_mapped)
+ pimap->n_mapped++;
+
+ kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+ struct irq_desc *desc;
+ struct kvmppc_passthru_irqmap *pimap;
+ int i;
+
+ if (!kvm_irq_bypass)
+ return 0;
+
+ desc = irq_to_desc(host_irq);
+ if (!desc)
+ return -EIO;
+
+ mutex_lock(&kvm->lock);
+
+ if (kvm->arch.pimap == NULL) {
+ mutex_unlock(&kvm->lock);
+ return 0;
+ }
+ pimap = kvm->arch.pimap;
+
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq)
+ break;
+ }
+
+ if (i == pimap->n_mapped) {
+ mutex_unlock(&kvm->lock);
+ return -ENODEV;
+ }
+
+ kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+
+ /* invalidate the entry */
+ pimap->mapped[i].r_hwirq = 0;
+
+ /*
+ * We don't free this structure even when the count goes to
+ * zero. The structure is freed when we destroy the VM.
+ */
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+}
+
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret = 0;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = prod;
+
+ ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+
+ return ret;
+}
+
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = NULL;
+
+ /*
+ * When producer of consumer is unregistered, we change back to
+ * default external interrupt handling mode - KVM real mode
+ * will switch back to host.
+ */
+ ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+}
+#endif
+
static long kvm_arch_vm_ioctl_hv(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3400,6 +3609,10 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
+#ifdef CONFIG_KVM_XICS
+ .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
+ .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
+#endif
};
static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 5f0380db3eab..0c84d6bc8356 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,7 @@
#include <asm/xics.h>
#include <asm/dbell.h>
#include <asm/cputhreads.h>
+#include <asm/io.h>
#define KVM_CMA_CHUNK_ORDER 18
@@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap)
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
+
+#ifdef CONFIG_KVM_XICS
+static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
+ u32 xisr)
+{
+ int i;
+
+ /*
+ * We access the mapped array here without a lock. That
+ * is safe because we never reduce the number of entries
+ * in the array and we never change the v_hwirq field of
+ * an entry once it is set.
+ *
+ * We have also carefully ordered the stores in the writer
+ * and the loads here in the reader, so that if we find a matching
+ * hwirq here, the associated GSI and irq_desc fields are valid.
+ */
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (xisr == pimap->mapped[i].r_hwirq) {
+ /*
+ * Order subsequent reads in the caller to serialize
+ * with the writer.
+ */
+ smp_rmb();
+ return &pimap->mapped[i];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * If we have an interrupt that's not an IPI, check if we have a
+ * passthrough adapter and if so, check if this external interrupt
+ * is for the adapter.
+ * We will attempt to deliver the IRQ directly to the target VCPU's
+ * ICP, the virtual ICP (based on affinity - the xive value in ICS).
+ *
+ * If the delivery fails or if this is not for a passthrough adapter,
+ * return to the host to handle this interrupt. We earlier
+ * saved a copy of the XIRR in the PACA, it will be picked up by
+ * the host ICP driver.
+ */
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+{
+ struct kvmppc_passthru_irqmap *pimap;
+ struct kvmppc_irq_map *irq_map;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (!vcpu)
+ return 1;
+ pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
+ if (!pimap)
+ return 1;
+ irq_map = get_irqmap(pimap, xisr);
+ if (!irq_map)
+ return 1;
+
+ /* We're handling this interrupt, generic code doesn't need to */
+ local_paca->kvm_hstate.saved_xirr = 0;
+
+ return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
+}
+
+#else
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+{
+ return 1;
+}
+#endif
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * 2 Passthrough that needs completion in the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ * -2 if there is PCI passthrough external interrupt that was handled
+ */
+
+long kvmppc_read_intr(void)
+{
+ unsigned long xics_phys;
+ u32 h_xirr;
+ __be32 xirr;
+ u32 xisr;
+ u8 host_ipi;
+
+ /* see if a host IPI is pending */
+ host_ipi = local_paca->kvm_hstate.host_ipi;
+ if (host_ipi)
+ return 1;
+
+ /* Now read the interrupt from the ICP */
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ if (unlikely(!xics_phys))
+ return 1;
+
+ /*
+ * Save XIRR for later. Since we get control in reverse endian
+ * on LE systems, save it byte reversed and fetch it back in
+ * host endian. Note that xirr is the value read from the
+ * XIRR register, while h_xirr is the host endian version.
+ */
+ xirr = _lwzcix(xics_phys + XICS_XIRR);
+ h_xirr = be32_to_cpu(xirr);
+ local_paca->kvm_hstate.saved_xirr = h_xirr;
+ xisr = h_xirr & 0xffffff;
+ /*
+ * Ensure that the store/load complete to guarantee all side
+ * effects of loading from XIRR has completed
+ */
+ smp_mb();
+
+ /* if nothing pending in the ICP */
+ if (!xisr)
+ return 0;
+
+ /* We found something in the ICP...
+ *
+ * If it is an IPI, clear the MFRR and EOI it.
+ */
+ if (xisr == XICS_IPI) {
+ _stbcix(xics_phys + XICS_MFRR, 0xff);
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+ /*
+ * Need to ensure side effects of above stores
+ * complete before proceeding.
+ */
+ smp_mb();
+
+ /*
+ * We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ host_ipi = local_paca->kvm_hstate.host_ipi;
+ if (unlikely(host_ipi != 0)) {
+ /* We raced with the host,
+ * we need to resend that IPI, bummer
+ */
+ _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ /* Let side effects complete */
+ smp_mb();
+ return 1;
+ }
+
+ /* OK, it's an IPI for us */
+ local_paca->kvm_hstate.saved_xirr = 0;
+ return -1;
+ }
+
+ return kvmppc_check_passthru(xisr, xirr);
+}
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index e3f738eb1cac..e3f738eb1cac 100644
--- a/arch/powerpc/kernel/hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 980d8a6f7284..82ff5de8b1e7 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
+#include <linux/kernel_stat.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@@ -18,7 +19,10 @@
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
+#include <asm/pgtable.h>
#include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
#include "book3s_xics.h"
@@ -26,9 +30,12 @@
int h_ipi_redirect = 1;
EXPORT_SYMBOL(h_ipi_redirect);
+int kvm_irq_bypass = 1;
+EXPORT_SYMBOL(kvm_irq_bypass);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu);
/* -- ICS routines -- */
static void ics_rm_check_resend(struct kvmppc_xics *xics,
@@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
icp->rm_action |= XICS_RM_NOTIFY_EOI;
icp->rm_eoied_irq = irq;
}
+
+ if (state->host_irq) {
+ ++vcpu->stat.pthru_all;
+ if (state->intr_cpu != -1) {
+ int pcpu = raw_smp_processor_id();
+
+ pcpu = cpu_first_thread_sibling(pcpu);
+ ++vcpu->stat.pthru_host;
+ if (state->intr_cpu != pcpu) {
+ ++vcpu->stat.pthru_bad_aff;
+ xics_opal_rm_set_server(state->host_irq, pcpu);
+ }
+ state->intr_cpu = -1;
+ }
+ }
bail:
return check_too_hard(xics, icp);
}
+unsigned long eoi_rc;
+
+static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
+{
+ unsigned long xics_phys;
+ int64_t rc;
+
+ rc = pnv_opal_pci_msi_eoi(c, hwirq);
+
+ if (rc)
+ eoi_rc = rc;
+
+ iosync();
+
+ /* EOI it */
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+}
+
+static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
+{
+ unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
+
+ return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
+}
+
+/*
+ * Increment a per-CPU 32-bit unsigned integer variable.
+ * Safe to call in real-mode. Handles vmalloc'ed addresses
+ *
+ * ToDo: Make this work for any integral type
+ */
+
+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
+{
+ unsigned long l;
+ unsigned int *raddr;
+ int cpu = smp_processor_id();
+
+ raddr = per_cpu_ptr(addr, cpu);
+ l = (unsigned long)raddr;
+
+ if (REGION_ID(l) == VMALLOC_REGION_ID) {
+ l = vmalloc_to_phys(raddr);
+ raddr = (unsigned int *)l;
+ }
+ ++*raddr;
+}
+
+/*
+ * We don't try to update the flags in the irq_desc 'istate' field in
+ * here as would happen in the normal IRQ handling path for several reasons:
+ * - state flags represent internal IRQ state and are not expected to be
+ * updated outside the IRQ subsystem
+ * - more importantly, these are useful for edge triggered interrupts,
+ * IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
+ * and these states shouldn't apply to us.
+ *
+ * However, we do update irq_stats - we somewhat duplicate the code in
+ * kstat_incr_irqs_this_cpu() for this since this function is defined
+ * in irq/internal.h which we don't want to include here.
+ * The only difference is that desc->kstat_irqs is an allocated per CPU
+ * variable and could have been vmalloc'ed, so we can't directly
+ * call __this_cpu_inc() on it. The kstat structure is a static
+ * per CPU variable and it should be accessible by real-mode KVM.
+ *
+ */
+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
+{
+ this_cpu_inc_rm(desc->kstat_irqs);
+ __this_cpu_inc(kstat.irqs_sum);
+}
+
+long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
+ u32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap)
+{
+ struct kvmppc_xics *xics;
+ struct kvmppc_icp *icp;
+ u32 irq;
+
+ irq = irq_map->v_hwirq;
+ xics = vcpu->kvm->arch.xics;
+ icp = vcpu->arch.icp;
+
+ kvmppc_rm_handle_irq_desc(irq_map->desc);
+ icp_rm_deliver_irq(xics, icp, irq);
+
+ /* EOI the interrupt */
+ icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
+
+ if (check_too_hard(xics, icp) == H_TOO_HARD)
+ return 2;
+ else
+ return -2;
+}
+
/* --- Non-real mode XICS-related built-in routines --- */
/**
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 975655573844..c3c1d1bcfc67 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -221,6 +221,13 @@ kvmppc_primary_no_guest:
li r3, 0 /* Don't wake on privileged (OS) doorbell */
b kvm_do_nap
+/*
+ * kvm_novcpu_wakeup
+ * Entered from kvm_start_guest if kvm_hstate.napping is set
+ * to NAPPING_NOVCPU
+ * r2 = kernel TOC
+ * r13 = paca
+ */
kvm_novcpu_wakeup:
ld r1, HSTATE_HOST_R1(r13)
ld r5, HSTATE_KVM_VCORE(r13)
@@ -230,6 +237,13 @@ kvm_novcpu_wakeup:
/* check the wake reason */
bl kvmppc_check_wake_reason
+ /*
+ * Restore volatile registers since we could have called
+ * a C routine in kvmppc_check_wake_reason.
+ * r5 = VCORE
+ */
+ ld r5, HSTATE_KVM_VCORE(r13)
+
/* see if any other thread is already exiting */
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
@@ -322,6 +336,11 @@ kvm_start_guest:
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
+ /*
+ * kvmppc_check_wake_reason could invoke a C routine, but we
+ * have no volatile registers to restore when we return.
+ */
+
cmpdi r3, 0
bge kvm_no_guest
@@ -625,9 +644,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
38:
BEGIN_FTR_SECTION
- /* DPDES is shared between threads */
+ /* DPDES and VTB are shared between threads */
ld r8, VCORE_DPDES(r5)
+ ld r7, VCORE_VTB(r5)
mtspr SPRN_DPDES, r8
+ mtspr SPRN_VTB, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Mark the subcore state as inside guest */
@@ -787,10 +808,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
- ld r6, VCPU_VTB(r4)
- mtspr SPRN_IC, r5
- mtspr SPRN_VTB, r6
ld r8, VCPU_EBBHR(r4)
+ mtspr SPRN_IC, r5
mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4)
ld r6, VCPU_BESCR(r4)
@@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
cmpwi r3, 512 /* 1 microsecond */
blt hdec_soon
+deliver_guest_interrupt:
ld r6, VCPU_CTR(r4)
ld r7, VCPU_XER(r4)
@@ -895,7 +915,6 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-deliver_guest_interrupt:
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
@@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* set, we know the host wants us out so let's do it now
*/
bl kvmppc_read_intr
+
+ /*
+ * Restore the active volatile registers after returning from
+ * a C function.
+ */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+
+ /*
+ * kvmppc_read_intr return codes:
+ *
+ * Exit to host (r3 > 0)
+ * 1 An interrupt is pending that needs to be handled by the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ *
+ * 2 Passthrough that needs completion in the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+ * to indicate to the host to complete handling the interrupt
+ *
+ * Before returning to guest, we check if any CPU is heading out
+ * to the host and if so, we head out also. If no CPUs are heading
+ * check return values <= 0.
+ *
+ * Return to guest (r3 <= 0)
+ * 0 No external interrupt is pending
+ * -1 A guest wakeup IPI (which has now been cleared)
+ * In either case, we return to guest to deliver any pending
+ * guest interrupts.
+ *
+ * -2 A PCI passthrough external interrupt was handled
+ * (interrupt was delivered directly to guest)
+ * Return to guest to deliver any pending guest interrupts.
+ */
+
+ cmpdi r3, 1
+ ble 1f
+
+ /* Return code = 2 */
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
+ stw r12, VCPU_TRAP(r9)
+ b guest_exit_cont
+
+1: /* Return code <= 1 */
cmpdi r3, 0
bgt guest_exit_cont
- /* Check if any CPU is heading out to the host, if so head out too */
+ /* Return code <= 0 */
4: ld r5, HSTATE_KVM_VCORE(r13)
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
@@ -1271,10 +1334,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
stw r6, VCPU_PSPB(r9)
std r7, VCPU_FSCR(r9)
mfspr r5, SPRN_IC
- mfspr r6, SPRN_VTB
mfspr r7, SPRN_TAR
std r5, VCPU_IC(r9)
- std r6, VCPU_VTB(r9)
std r7, VCPU_TAR(r9)
mfspr r8, SPRN_EBBHR
std r8, VCPU_EBBHR(r9)
@@ -1501,9 +1562,11 @@ kvmhv_switch_to_host:
isync
BEGIN_FTR_SECTION
- /* DPDES is shared between threads */
+ /* DPDES and VTB are shared between threads */
mfspr r7, SPRN_DPDES
+ mfspr r8, SPRN_VTB
std r7, VCORE_DPDES(r5)
+ std r8, VCORE_VTB(r5)
/* clear DPDES so we don't get guest doorbells in the host */
li r8, 0
mtspr SPRN_DPDES, r8
@@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
ld r29, VCPU_GPR(R29)(r4)
ld r30, VCPU_GPR(R30)(r4)
ld r31, VCPU_GPR(R31)(r4)
-
+
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
+ /*
+ * Restore volatile registers since we could have called a
+ * C routine in kvmppc_check_wake_reason
+ * r4 = VCPU
+ * r3 tells us whether we need to return to host or not
+ * WARNING: it gets checked further down:
+ * should not modify r3 until this check is done.
+ */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
/* clear our bit in vcore->napping_threads */
34: ld r5,HSTATE_KVM_VCORE(r13)
lbz r7,HSTATE_PTID(r13)
@@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
li r0,0
stb r0,HSTATE_NAPPING(r13)
- /* See if the wake reason means we need to exit */
+ /* See if the wake reason saved in r3 means we need to exit */
stw r12, VCPU_TRAP(r4)
mr r9, r4
cmpdi r3, 0
@@ -2297,10 +2370,14 @@ machine_check_realmode:
* 0 if nothing needs to be done
* 1 if something happened that needs to be handled by the host
* -1 if there was a guest wakeup (IPI or msgsnd)
+ * -2 if we handled a PCI passthrough interrupt (returned by
+ * kvmppc_read_intr only)
*
* Also sets r12 to the interrupt vector for any interrupt that needs
* to be handled now by the host (0x500 for external interrupt), or zero.
- * Modifies r0, r6, r7, r8.
+ * Modifies all volatile registers (since it may call a C function).
+ * This routine calls kvmppc_read_intr, a C function, if an external
+ * interrupt is pending.
*/
kvmppc_check_wake_reason:
mfspr r6, SPRN_SRR1
@@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE
rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
cmpwi r6, 8 /* was it an external interrupt? */
- li r12, BOOK3S_INTERRUPT_EXTERNAL
- beq kvmppc_read_intr /* if so, see what it was */
+ beq 7f /* if so, see what it was */
li r3, 0
li r12, 0
cmpwi r6, 6 /* was it the decrementer? */
@@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
blr
-/*
- * Determine what sort of external interrupt is pending (if any).
- * Returns:
- * 0 if no interrupt is pending
- * 1 if an interrupt is pending that needs to be handled by the host
- * -1 if there was a guest wakeup IPI (which has now been cleared)
- * Modifies r0, r6, r7, r8, returns value in r3.
- */
-kvmppc_read_intr:
- /* see if a host IPI is pending */
- li r3, 1
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne 1f
+ /* external interrupt - create a stack frame so we can call C */
+7: mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+ bl kvmppc_read_intr
+ nop
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+ cmpdi r3, 1
+ ble 1f
- /* Now read the interrupt from the ICP */
- ld r6, HSTATE_XICS_PHYS(r13)
- li r7, XICS_XIRR
- cmpdi r6, 0
- beq- 1f
- lwzcix r0, r6, r7
/*
- * Save XIRR for later. Since we get in in reverse endian on LE
- * systems, save it byte reversed and fetch it back in host endian.
- */
- li r3, HSTATE_SAVED_XIRR
- STWX_BE r0, r3, r13
-#ifdef __LITTLE_ENDIAN__
- lwz r3, HSTATE_SAVED_XIRR(r13)
-#else
- mr r3, r0
-#endif
- rlwinm. r3, r3, 0, 0xffffff
- sync
- beq 1f /* if nothing pending in the ICP */
-
- /* We found something in the ICP...
- *
- * If it's not an IPI, stash it in the PACA and return to
- * the host, we don't (yet) handle directing real external
- * interrupts directly to the guest
+ * Return code of 2 means PCI passthrough interrupt, but
+ * we need to return back to host to complete handling the
+ * interrupt. Trap reason is expected in r12 by guest
+ * exit code.
*/
- cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
- bne 42f
-
- /* It's an IPI, clear the MFRR and EOI it */
- li r3, 0xff
- li r8, XICS_MFRR
- stbcix r3, r6, r8 /* clear the IPI */
- stwcix r0, r6, r7 /* EOI it */
- sync
-
- /* We need to re-check host IPI now in case it got set in the
- * meantime. If it's clear, we bounce the interrupt to the
- * guest
- */
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne- 43f
-
- /* OK, it's an IPI for us */
- li r12, 0
- li r3, -1
-1: blr
-
-42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in
- * the PACA earlier, it will be picked up by the host ICP driver
- */
- li r3, 1
- b 1b
-
-43: /* We raced with the host, we need to resend that IPI, bummer */
- li r0, IPI_PRIORITY
- stbcix r0, r6, r8 /* set the IPI */
- sync
- li r3, 1
- b 1b
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
+1:
+ ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
+ addi r1, r1, PPC_MIN_STKFRM
+ mtlr r0
+ blr
/*
* Save away FP, VMX and VSX registers.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e76f79a45988..826c541a12af 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -226,7 +226,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
*/
vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
- vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
+ to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
svcpu->in_use = false;
@@ -448,6 +448,8 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
case PVR_POWER7:
case PVR_POWER7p:
case PVR_POWER8:
+ case PVR_POWER8E:
+ case PVR_POWER8NVL:
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
BOOK3S_HFLAG_NEW_TLBIE;
break;
@@ -1361,6 +1363,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, to_book3s(vcpu)->vtb);
+ break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
/*
@@ -1397,6 +1402,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
to_book3s(vcpu)->hior = set_reg_val(id, *val);
to_book3s(vcpu)->hior_explicit = true;
break;
+ case KVM_REG_PPC_VTB:
+ to_book3s(vcpu)->vtb = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a75ba38a2d81..3bdc639157c1 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
return 0;
}
+ /* Record which CPU this arrived on for passed-through interrupts */
+ if (state->host_irq)
+ state->intr_cpu = raw_smp_processor_id();
+
/* Attempt delivery */
icp_deliver_irq(xics, NULL, irq);
@@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
return H_SUCCESS;
}
-static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{
@@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
/* -- Initialisation code etc. -- */
+static void xics_debugfs_irqmap(struct seq_file *m,
+ struct kvmppc_passthru_irqmap *pimap)
+{
+ int i;
+
+ if (!pimap)
+ return;
+ seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
+ pimap->n_mapped);
+ for (i = 0; i < pimap->n_mapped; i++) {
+ seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
+ pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
+ }
+}
+
static int xics_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xics *xics = m->private;
@@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
t_check_resend = 0;
t_reject = 0;
+ xics_debugfs_irqmap(m, kvm->arch.pimap);
+
seq_printf(m, "=========\nICP state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
{
struct kvmppc_xics *xics = kvm->arch.xics;
+ if (!xics)
+ return -ENODEV;
return ics_deliver_irq(xics, irq, level);
}
@@ -1329,20 +1353,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
xics->kvm = kvm;
/* Already there ? */
- mutex_lock(&kvm->lock);
if (kvm->arch.xics)
ret = -EEXIST;
else
kvm->arch.xics = xics;
- mutex_unlock(&kvm->lock);
if (ret) {
kfree(xics);
return ret;
}
- xics_debugfs_init(xics);
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) {
/* Enable real mode support */
@@ -1354,9 +1374,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
return 0;
}
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+ struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
+
+ xics_debugfs_init(xics);
+}
+
struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics",
.create = kvmppc_xics_create,
+ .init = kvmppc_xics_init,
.destroy = kvmppc_xics_free,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,
@@ -1414,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
return pin;
}
+
+void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
+ unsigned long host_irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ u16 idx;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return;
+
+ ics->irq_state[idx].host_irq = host_irq;
+ ics->irq_state[idx].intr_cpu = -1;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
+
+void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
+ unsigned long host_irq)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ u16 idx;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return;
+
+ ics->irq_state[idx].host_irq = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index a46b954055c4..2a50320b55ca 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -42,6 +42,8 @@ struct ics_irq_state {
u8 lsi; /* level-sensitive interrupt */
u8 asserted; /* Only for LSI */
u8 exists;
+ int intr_cpu;
+ u32 host_irq;
};
/* Atomic ICP state, updated with a single compare & swap */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 02b4672f7347..df3f2706d3e5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -2038,7 +2038,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (type == KVMPPC_DEBUG_NONE)
continue;
- if (type & !(KVMPPC_DEBUG_WATCH_READ |
+ if (type & ~(KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE |
KVMPPC_DEBUG_BREAKPOINT))
return -EINVAL;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 29911a07bcdb..ddbf8f0284c0 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
char *virt;
struct page **pages;
struct tlbe_priv *privs[2] = {};
- u64 *g2h_bitmap = NULL;
+ u64 *g2h_bitmap;
size_t array_len;
u32 sets;
int num_pages, ret, i;
@@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
cfg->array / PAGE_SIZE;
- pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+ pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
if (ret < 0)
- goto err_pages;
+ goto free_pages;
if (ret != num_pages) {
num_pages = ret;
ret = -EFAULT;
- goto err_put_page;
+ goto put_pages;
}
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
if (!virt) {
ret = -ENOMEM;
- goto err_put_page;
+ goto put_pages;
}
- privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
- GFP_KERNEL);
- privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
- GFP_KERNEL);
+ privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
+ if (!privs[0]) {
+ ret = -ENOMEM;
+ goto put_pages;
+ }
- if (!privs[0] || !privs[1]) {
+ privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
+ if (!privs[1]) {
ret = -ENOMEM;
- goto err_privs;
+ goto free_privs_first;
}
- g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
- GFP_KERNEL);
+ g2h_bitmap = kcalloc(params.tlb_sizes[1],
+ sizeof(*g2h_bitmap),
+ GFP_KERNEL);
if (!g2h_bitmap) {
ret = -ENOMEM;
- goto err_privs;
+ goto free_privs_second;
}
free_gtlb(vcpu_e500);
@@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-
-err_privs:
- kfree(privs[0]);
+ free_privs_second:
kfree(privs[1]);
-
-err_put_page:
+ free_privs_first:
+ kfree(privs[0]);
+ put_pages:
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
-
-err_pages:
+ free_pages:
kfree(pages);
return ret;
}
@@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
- int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
- int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
if (e500_mmu_host_init(vcpu_e500))
- goto err;
+ goto free_vcpu;
vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
@@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
vcpu_e500->gtlb_params[1].sets = 1;
- vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+ vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
+ KVM_E500_TLB1_SIZE,
+ sizeof(*vcpu_e500->gtlb_arch),
+ GFP_KERNEL);
if (!vcpu_e500->gtlb_arch)
return -ENOMEM;
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
- vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
- vcpu_e500->gtlb_params[0].entries,
+ vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
+ sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[0])
- goto err;
+ goto free_vcpu;
- vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
- vcpu_e500->gtlb_params[1].entries,
+ vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
+ sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[1])
- goto err;
+ goto free_vcpu;
- vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
- vcpu_e500->gtlb_params[1].entries,
+ vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
+ sizeof(*vcpu_e500->g2h_tlb1_map),
GFP_KERNEL);
if (!vcpu_e500->g2h_tlb1_map)
- goto err;
+ goto free_vcpu;
vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-
-err:
+ free_vcpu:
free_gtlb(vcpu_e500);
return -1;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6ce40dd6fe51..70963c845e96 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -27,6 +27,8 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/module.h>
+#include <linux/irqbypass.h>
+#include <linux/kvm_irqfd.h>
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
@@ -436,6 +438,16 @@ err_out:
return -EINVAL;
}
+bool kvm_arch_has_vcpu_debugfs(void)
+{
+ return false;
+}
+
+int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
unsigned int i;
@@ -739,6 +751,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * irq_bypass_add_producer and irq_bypass_del_producer are only
+ * useful if the architecture supports PCI passthrough.
+ * irq_bypass_stop and irq_bypass_start are not needed and so
+ * kvm_ops are not defined for them.
+ */
+bool kvm_arch_has_irq_bypass(void)
+{
+ return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
+ (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
+}
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
+
+ if (kvm->arch.kvm_ops->irq_bypass_add_producer)
+ return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
+
+ return 0;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
+
+ if (kvm->arch.kvm_ops->irq_bypass_del_producer)
+ kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
+}
+
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@@ -1167,6 +1215,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_MPIC
+ if (kvm->arch.mpic)
+ return true;
+#endif
+#ifdef CONFIG_KVM_XICS
+ if (kvm->arch.xics)
+ return true;
+#endif
+ return false;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daff5783..fb21990c0fb4 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
__entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
);
+TRACE_EVENT(kvmppc_vcore_wakeup,
+ TP_PROTO(int do_sleep, __u64 ns),
+
+ TP_ARGS(do_sleep, ns),
+
+ TP_STRUCT__entry(
+ __field(__u64, ns)
+ __field(int, waited)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->ns = ns;
+ __entry->waited = do_sleep;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s time %lld ns, tgid=%d",
+ __entry->waited ? "wait" : "poll",
+ __entry->ns, __entry->tgid)
+);
+
TRACE_EVENT(kvmppc_run_vcpu_enter,
TP_PROTO(struct kvm_vcpu *vcpu),
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ba21be15310f..ad5290005ca4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -22,7 +22,7 @@ obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
ifeq ($(CONFIG_GENERIC_CSUM),)
-obj-y += checksum_$(CONFIG_WORD_SIZE).o checksum_wrappers.o
+obj-y += checksum_$(BITS).o checksum_wrappers.o
endif
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index d90870a66b60..aa8214f30c92 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,17 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
stw r7,12(r1)
stw r8,8(r1)
- andi. r0,r4,1 /* is destination address even ? */
- cmplwi cr7,r0,0
addic r12,r6,0
addi r6,r4,-4
neg r0,r4
addi r4,r3,-4
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ crset 4*cr7+eq
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
+ rlwinm r7,r6,3,0x8
+ rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+ cmplwi cr7,r7,0 /* is destination address even ? */
andi. r8,r0,3 /* get it word-aligned first */
mtctr r8
beq+ 61f
@@ -237,7 +239,7 @@ _GLOBAL(csum_partial_copy_generic)
66: addze r3,r12
addi r1,r1,16
beqlr+ cr7
- rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */
+ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
/* read fault */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 74145f02ad41..043415f0bdb1 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -188,7 +188,10 @@ void __init apply_feature_fixups(void)
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
#endif
do_final_fixups();
+}
+void __init setup_feature_keys(void)
+{
/*
* Initialise jump label. This causes all the cpu/mmu_has_feature()
* checks to take on their correct polarity based on the current set of
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 43435c6892fb..eda7a96161ab 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -37,6 +37,7 @@ _GLOBAL(memset)
clrldi r5,r5,58
mtctr r0
beq 5f
+ .balign 16
4: std r4,0(r6)
std r4,8(r6)
std r4,16(r6)
@@ -90,6 +91,7 @@ _GLOBAL(backwards_memcpy)
andi. r0,r6,3
mtctr r7
bne 5f
+ .balign 16
1: lwz r7,-4(r4)
lwzu r8,-8(r4)
stw r7,-4(r6)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f2cea6d5e764..1a4e570f7894 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,17 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
- init_$(CONFIG_WORD_SIZE).o \
- pgtable_$(CONFIG_WORD_SIZE).o
+ init_$(BITS).o pgtable_$(BITS).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
+obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
ifeq ($(CONFIG_PPC_STD_MMU_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a4db22f65021..d0b137d96df1 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -26,7 +26,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_event.h>
@@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+int do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
enum ctx_state prev_state = exception_enter();
@@ -498,8 +498,8 @@ bad_area_nosemaphore:
bail:
exception_exit(prev_state);
return rc;
-
}
+NOKPROBE_SYMBOL(do_page_fault);
/*
* bad_page_fault is called when we have a bad access from the kernel.
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 0e4e9654bd2c..83ddc0e171b0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
}
#endif
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
- int i, shift;
- unsigned int mask;
-
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
- }
- return -1;
-}
-
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
@@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
} else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_size = __hpte_actual_psize(lp, size);
- if (a_size != -1)
- break;
- }
+ size = hpte_page_sizes[lp] & 0xf;
+ a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
if (cpu_has_feature(CPU_FTR_ARCH_300))
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556e16f4..90480e23fd2c 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -93,6 +93,9 @@ static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
@@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
#endif /* CONFIG_HUGETLB_PAGE */
}
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations. Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE. For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ * PTE LP actual page size
+ * rrrr rrrz >=8KB
+ * rrrr rrzz >=16KB
+ * rrrr rzzz >=32KB
+ * rrrr zzzz >=64KB
+ * ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void init_hpte_page_sizes(void)
+{
+ long int ap, bp;
+ long int shift, penc;
+
+ for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+ if (!mmu_psize_defs[bp].shift)
+ continue; /* not a supported page size */
+ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+ penc = mmu_psize_defs[bp].penc[ap];
+ if (penc == -1)
+ continue;
+ shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+ if (shift <= 0)
+ continue; /* should never happen */
+ /*
+ * For page sizes less than 1MB, this loop
+ * replicates the entry for all possible values
+ * of the rrrr bits.
+ */
+ while (penc < (1 << LP_BITS)) {
+ hpte_page_sizes[penc] = (ap << 4) | bp;
+ penc += 1 << shift;
+ }
+ }
+ }
+}
+
static void __init htab_init_page_sizes(void)
{
+ init_hpte_page_sizes();
+
if (!debug_pagealloc_enabled()) {
/*
* Pick a size for the linear mapping. Currently, we only
@@ -711,6 +766,29 @@ int remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+static void update_hid_for_hash(void)
+{
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ /*
+ * now switch the HID
+ */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 &= ~HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
@@ -737,6 +815,8 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
*/
partition_tb->patb1 = 0;
pr_info("Partition table %p\n", partition_tb);
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
/*
* update partition table control register,
* 64 K size.
@@ -1460,6 +1540,29 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void tm_flush_hash_page(int local)
+{
+ /*
+ * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a
+ * page back to a block device w/PIO could pick up transactional data
+ * (bad!) so we force an abort here. Before the sync the page will be
+ * made read-only, which will flush_hash_page. BIG ISSUE here: if the
+ * kernel uses a page from userspace without unmapping it first, it may
+ * see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+}
+#else
+static inline void tm_flush_hash_page(int local)
+{
+}
+#endif
+
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
@@ -1486,21 +1589,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
ssize, local);
} pte_iterate_hashed_end();
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
+ tm_flush_hash_page(local);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1557,22 +1646,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
MMU_PAGE_16M, ssize, local);
}
tm_abort:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
- return;
+ tm_flush_hash_page(local);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7372ee13eb1e..a5d3ecdabc44 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1019,8 +1019,15 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
mask = _PAGE_PRESENT | _PAGE_READ;
+
+ /*
+ * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined
+ * as 0 and _PAGE_RO has to be set when a page is not writable
+ */
if (write)
mask |= _PAGE_WRITE;
+ else
+ mask |= _PAGE_RO;
if ((pte_val(pte) & mask) != mask)
return 0;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 448685fbf27c..8a7c38b8d335 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -137,7 +137,7 @@ void __init MMU_init(void)
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
- printk(KERN_WARNING "Only using first contiguous memory region");
+ pr_warn("Only using first contiguous memory region\n");
#else
wii_memory_fixups();
#endif
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a2168ae9e..e0f1c33601dd 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -15,6 +15,9 @@
#include <linux/rculist.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
+#include <linux/migrate.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
#include <asm/mmu_context.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void)
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+/*
+ * Taken from alloc_migrate_target with changes to remove CMA allocations
+ */
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
+ int **resultp)
+{
+ gfp_t gfp_mask = GFP_USER;
+ struct page *new_page;
+
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return NULL;
+
+ if (PageHighMem(page))
+ gfp_mask |= __GFP_HIGHMEM;
+
+ /*
+ * We don't want the allocation to force an OOM if possibe
+ */
+ new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
+ return new_page;
+}
+
+static int mm_iommu_move_page_from_cma(struct page *page)
+{
+ int ret = 0;
+ LIST_HEAD(cma_migrate_pages);
+
+ /* Ignore huge pages for now */
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return -EBUSY;
+
+ lru_add_drain();
+ ret = isolate_lru_page(page);
+ if (ret)
+ return ret;
+
+ list_add(&page->lru, &cma_migrate_pages);
+ put_page(page); /* Drop the gup reference */
+
+ ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CMA);
+ if (ret) {
+ if (!list_empty(&cma_migrate_pages))
+ putback_movable_pages(&cma_migrate_pages);
+ }
+
+ return 0;
+}
+
long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
for (i = 0; i < entries; ++i) {
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
1/* pages */, 1/* iswrite */, &page)) {
+ ret = -EFAULT;
for (j = 0; j < i; ++j)
- put_page(pfn_to_page(
- mem->hpas[j] >> PAGE_SHIFT));
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
- ret = -EFAULT;
goto unlock_exit;
}
-
+ /*
+ * If we get a page from the CMA zone, since we are going to
+ * be pinning these entries, we might as well move them out
+ * of the CMA zone if possible. NOTE: faulting in + migration
+ * can be expensive. Batching can be considered later
+ */
+ if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
+ if (mm_iommu_move_page_from_cma(page))
+ goto populate;
+ if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+ 1/* pages */, 1/* iswrite */,
+ &page)) {
+ ret = -EFAULT;
+ for (j = 0; j < i; ++j)
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
+ vfree(mem->hpas);
+ kfree(mem);
+ goto unlock_exit;
+ }
+ }
+populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 7d95bc402dba..c491f2c8f2b9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -369,44 +369,34 @@ void destroy_context(struct mm_struct *mm)
}
#ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if (cpu == boot_cpuid)
- return NOTIFY_OK;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
- stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
- kfree(stale_map[cpu]);
- stale_map[cpu] = NULL;
-
- /* We also clear the cpu_vm_mask bits of CPUs going away */
- clear_tasks_mm_cpumask(cpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
+ return 0;
+
+ pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ return 0;
}
-static struct notifier_block mmu_context_cpu_nb = {
- .notifier_call = mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu == boot_cpuid)
+ return 0;
+
+ pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+
+ /* We also clear the cpu_vm_mask bits of CPUs going away */
+ clear_tasks_mm_cpumask(cpu);
+#endif
+ return 0;
+}
#endif /* CONFIG_SMP */
@@ -469,7 +459,9 @@ void __init mmu_context_init(void)
#else
stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
- register_cpu_notifier(&mmu_context_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
#endif
printk(KERN_INFO
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 34079302cc17..f4f437cbabf1 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -35,7 +35,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+ __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
@@ -116,3 +116,12 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
return;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/* For use by kexec */
+void mmu_cleanup_all(void)
+{
+ if (radix_enabled())
+ radix__mmu_cleanup_all();
+ else if (mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
+}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index af897d91d09f..ed7bddc456b7 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -294,6 +294,32 @@ found:
return;
}
+static void update_hid_for_radix(void)
+{
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
+ /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ /*
+ * now switch the HID
+ */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
@@ -345,6 +371,8 @@ void __init radix__early_init_mmu(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -368,6 +396,18 @@ void radix__early_init_mmu_secondary(void)
}
}
+void radix__mmu_cleanup_all(void)
+{
+ unsigned long lpcr;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
+ mtspr(SPRN_PTCR, 0);
+ radix__flush_tlb_all();
+ }
+}
+
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 0b6fb244d0a1..911fdfb63ec1 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
if (changed) {
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(ptep, entry);
+ __ptep_set_access_flags(vma->vm_mm, ptep, entry);
flush_tlb_page(vma, address);
}
return changed;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index dfdb90cb4403..e2974fcd20f1 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -113,7 +113,12 @@ BEGIN_FTR_SECTION
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
b slb_finish_load_1T
-0:
+0: /*
+ * For userspace addresses, make sure this is region 0.
+ */
+ cmpdi r9, 0
+ bne 8f
+
/* when using slices, we extract the psize off the slice bitmaps
* and then we need to get the sllp encoding off the mmu_psize_defs
* array.
@@ -173,11 +178,9 @@ BEGIN_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
b slb_finish_load
-8: /* invalid EA */
- li r10,0 /* BAD_VSID */
- li r9,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b slb_finish_load
+8: /* invalid EA - return an error indication */
+ crset 4*cr0+eq /* indicate failure */
+ blr
/*
* Finish loading of an SLB entry and return
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 48df05ef5231..0e49ec541ab5 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -400,3 +400,27 @@ void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
}
EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+
+void radix__flush_tlb_all(void)
+{
+ unsigned long rb,prs,r,rs;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
+
+ asm volatile("ptesync": : :"memory");
+ /*
+ * now flush guest entries by passing PRS = 1 and LPID != 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ /*
+ * now flush host entires by passing PRS = 0 and LPID == 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d5301b6f20d0..89f70073dec8 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -40,6 +40,8 @@
#define PPC_BLR() EMIT(PPC_INST_BLR)
#define PPC_BLRL() EMIT(PPC_INST_BLRL)
#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r))
+#define PPC_BCTR() EMIT(PPC_INST_BCTR)
+#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r))
#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \
___PPC_RA(a) | IMM_L(i))
#define PPC_MR(d, a) PPC_OR(d, a, a)
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 5046d6f65c02..62fa7589db2b 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -16,30 +16,33 @@
/*
* Stack layout:
+ * Ensure the top half (upto local_tmp_var) stays consistent
+ * with our redzone usage.
*
* [ prev sp ] <-------------
* [ nv gpr save area ] 8*8 |
+ * [ tail_call_cnt ] 8 |
+ * [ local_tmp_var ] 8 |
* fp (r31) --> [ ebpf stack space ] 512 |
- * [ local/tmp var space ] 16 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
-/* for bpf JIT code internal usage */
-#define BPF_PPC_STACK_LOCALS 16
/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
#define BPF_PPC_STACK_SAVE (8*8)
+/* for bpf JIT code internal usage */
+#define BPF_PPC_STACK_LOCALS 16
/* Ensure this is quadword aligned */
-#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \
- MAX_BPF_STACK + BPF_PPC_STACK_SAVE)
+#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \
+ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
#ifndef __ASSEMBLY__
/* BPF register usage */
-#define SKB_HLEN_REG (MAX_BPF_REG + 0)
-#define SKB_DATA_REG (MAX_BPF_REG + 1)
-#define TMP_REG_1 (MAX_BPF_REG + 2)
-#define TMP_REG_2 (MAX_BPF_REG + 3)
+#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0)
+#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1)
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 2)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 3)
/* BPF to ppc register mappings */
static const int b2p[] = {
@@ -59,12 +62,16 @@ static const int b2p[] = {
/* frame pointer aka BPF_REG_10 */
[BPF_REG_FP] = 31,
/* eBPF jit internal registers */
+ [BPF_REG_AX] = 2,
[SKB_HLEN_REG] = 25,
[SKB_DATA_REG] = 26,
[TMP_REG_1] = 9,
[TMP_REG_2] = 10
};
+/* PPC NVR range -- update this if we ever use NVRs below r24 */
+#define BPF_PPC_NVR_MIN 24
+
/* Assembly helpers */
#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \
u64 func##_negative_offset(u64 r3, u64 r4); \
@@ -82,6 +89,7 @@ DECLARE_LOAD_FUNC(sk_load_byte);
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
#define SEEN_SKB 0x4000 /* uses sk_buff */
+#define SEEN_TAILCALL 0x8000 /* uses tail calls */
struct codegen_context {
/*
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 6073b78516f6..0fe98a567125 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -17,6 +17,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <asm/kprobes.h>
+#include <linux/bpf.h>
#include "bpf_jit64.h"
@@ -58,6 +59,40 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
}
+/*
+ * When not setting up our own stackframe, the redzone usage is:
+ *
+ * [ prev sp ] <-------------
+ * [ ... ] |
+ * sp (r1) ---> [ stack pointer ] --------------
+ * [ nv gpr save area ] 8*8
+ * [ tail_call_cnt ] 8
+ * [ local_tmp_var ] 8
+ * [ unused red zone ] 208 bytes protected
+ */
+static int bpf_jit_stack_local(struct codegen_context *ctx)
+{
+ if (bpf_has_stack_frame(ctx))
+ return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK;
+ else
+ return -(BPF_PPC_STACK_SAVE + 16);
+}
+
+static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+{
+ return bpf_jit_stack_local(ctx) + 8;
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+ if (reg >= BPF_PPC_NVR_MIN && reg < 32)
+ return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0)
+ - (8 * (32 - reg));
+
+ pr_err("BPF JIT is asking about unknown registers");
+ BUG();
+}
+
static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
{
/*
@@ -73,36 +108,27 @@ static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
}
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
-#ifdef PPC64_ELF_ABI_v1
- /* func points to the function descriptor */
- PPC_LI64(b2p[TMP_REG_2], func);
- /* Load actual entry point from function descriptor */
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
- /* ... and move it to LR */
- PPC_MTLR(b2p[TMP_REG_1]);
+ int i;
+
/*
- * Load TOC from function descriptor at offset 8.
- * We can clobber r2 since we get called through a
- * function pointer (so caller will save/restore r2)
- * and since we don't use a TOC ourself.
+ * Initialize tail_call_cnt if we do tail calls.
+ * Otherwise, put in NOPs so that it can be skipped when we are
+ * invoked through a tail call.
*/
- PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
- /* We can clobber r12 */
- PPC_FUNC_ADDR(12, func);
- PPC_MTLR(12);
-#endif
- PPC_BLRL();
-}
+ if (ctx->seen & SEEN_TAILCALL) {
+ PPC_LI(b2p[TMP_REG_1], 0);
+ /* this goes in the redzone */
+ PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
+ } else {
+ PPC_NOP();
+ PPC_NOP();
+ }
-static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
-{
- int i;
- bool new_stack_frame = bpf_has_stack_frame(ctx);
+#define BPF_TAILCALL_PROLOGUE_SIZE 8
- if (new_stack_frame) {
+ if (bpf_has_stack_frame(ctx)) {
/*
* We need a stack frame, but we don't necessarily need to
* save/restore LR unless we call other functions
@@ -122,9 +148,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
*/
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
if (bpf_is_seen_register(ctx, i))
- PPC_BPF_STL(b2p[i], 1,
- (new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
- (8 * (32 - b2p[i])));
+ PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/*
* Save additional non-volatile regs if we cache skb
@@ -132,53 +156,142 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
*/
if (ctx->seen & SEEN_SKB) {
PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
- BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
+ bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
- BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
+ bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
bpf_jit_emit_skb_loads(image, ctx);
}
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, BPF_REG_FP))
PPC_ADDI(b2p[BPF_REG_FP], 1,
- BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE);
+ STACK_FRAME_MIN_SIZE + MAX_BPF_STACK);
}
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
{
int i;
- bool new_stack_frame = bpf_has_stack_frame(ctx);
-
- /* Move result to r3 */
- PPC_MR(3, b2p[BPF_REG_0]);
/* Restore NVRs */
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
if (bpf_is_seen_register(ctx, i))
- PPC_BPF_LL(b2p[i], 1,
- (new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
- (8 * (32 - b2p[i])));
+ PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
/* Restore non-volatile registers used for skb cache */
if (ctx->seen & SEEN_SKB) {
PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
- BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
+ bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
- BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
+ bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
}
/* Tear down our stack frame */
- if (new_stack_frame) {
+ if (bpf_has_stack_frame(ctx)) {
PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
if (ctx->seen & SEEN_FUNC) {
PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
PPC_MTLR(0);
}
}
+}
+
+static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ /* Move result to r3 */
+ PPC_MR(3, b2p[BPF_REG_0]);
PPC_BLR();
}
+static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+ /* func points to the function descriptor */
+ PPC_LI64(b2p[TMP_REG_2], func);
+ /* Load actual entry point from function descriptor */
+ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+ /* ... and move it to LR */
+ PPC_MTLR(b2p[TMP_REG_1]);
+ /*
+ * Load TOC from function descriptor at offset 8.
+ * We can clobber r2 since we get called through a
+ * function pointer (so caller will save/restore r2)
+ * and since we don't use a TOC ourself.
+ */
+ PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+ /* We can clobber r12 */
+ PPC_FUNC_ADDR(12, func);
+ PPC_MTLR(12);
+#endif
+ PPC_BLRL();
+}
+
+static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+ /*
+ * By now, the eBPF program has already setup parameters in r3, r4 and r5
+ * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+ * r4/BPF_REG_2 - pointer to bpf_array
+ * r5/BPF_REG_3 - index in bpf_array
+ */
+ int b2p_bpf_array = b2p[BPF_REG_2];
+ int b2p_index = b2p[BPF_REG_3];
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
+ PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
+ PPC_BCC(COND_GE, out);
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+ PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
+ PPC_BCC(COND_GT, out);
+
+ /*
+ * tail_call_cnt++;
+ */
+ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
+ PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
+
+ /* prog = array->ptrs[index]; */
+ PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
+ PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
+ PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ PPC_CMPLDI(b2p[TMP_REG_1], 0);
+ PPC_BCC(COND_EQ, out);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
+#ifdef PPC64_ELF_ABI_v1
+ /* skip past the function descriptor */
+ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
+ FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
+#else
+ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
+#endif
+ PPC_MTCTR(b2p[TMP_REG_1]);
+
+ /* tear down stack, restore NVRs, ... */
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ PPC_BCTR();
+ /* out: */
+}
+
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
@@ -200,7 +313,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
u64 imm64;
u8 *func;
u32 true_cond;
- int stack_local_off;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -219,9 +331,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
* optimization but everything else should work without
* any issues.
*/
- if (dst_reg >= 24 && dst_reg <= 31)
+ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
bpf_set_seen_register(ctx, insn[i].dst_reg);
- if (src_reg >= 24 && src_reg <= 31)
+ if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
bpf_set_seen_register(ctx, insn[i].src_reg);
switch (code) {
@@ -490,25 +602,12 @@ bpf_alu32_trunc:
* Way easier and faster(?) to store the value
* into stack and then use ldbrx
*
- * First, determine where in stack we can store
- * this:
- * - if we have allotted a stack frame, then we
- * will utilize the area set aside by
- * BPF_PPC_STACK_LOCALS
- * - else, we use the area beneath the NV GPR
- * save area
- *
* ctx->seen will be reliable in pass2, but
* the instructions generated will remain the
* same across all passes
*/
- if (bpf_has_stack_frame(ctx))
- stack_local_off = STACK_FRAME_MIN_SIZE;
- else
- stack_local_off = -(BPF_PPC_STACK_SAVE + 8);
-
- PPC_STD(dst_reg, 1, stack_local_off);
- PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off);
+ PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
+ PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
break;
}
@@ -668,7 +767,7 @@ emit_clear:
/* Save skb pointer if we need to re-cache skb data */
if (bpf_helper_changes_skb_data(func))
- PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE);
+ PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -678,7 +777,7 @@ emit_clear:
/* refresh skb cache */
if (bpf_helper_changes_skb_data(func)) {
/* reload skb pointer to r3 */
- PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE);
+ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
}
break;
@@ -837,9 +936,12 @@ common_load:
break;
/*
- * TODO: Tail call
+ * Tail call
*/
case BPF_JMP | BPF_CALL | BPF_X:
+ ctx->seen |= SEEN_TAILCALL;
+ bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ break;
default:
/*
@@ -872,21 +974,37 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
int pass;
int flen;
struct bpf_binary_header *bpf_hdr;
+ struct bpf_prog *org_fp = fp;
+ struct bpf_prog *tmp_fp;
+ bool bpf_blinded = false;
if (!bpf_jit_enable)
- return fp;
+ return org_fp;
+
+ tmp_fp = bpf_jit_blind_constants(org_fp);
+ if (IS_ERR(tmp_fp))
+ return org_fp;
+
+ if (tmp_fp != org_fp) {
+ bpf_blinded = true;
+ fp = tmp_fp;
+ }
flen = fp->len;
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
- return fp;
+ if (addrs == NULL) {
+ fp = org_fp;
+ goto out;
+ }
+
+ memset(&cgctx, 0, sizeof(struct codegen_context));
- cgctx.idx = 0;
- cgctx.seen = 0;
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
/* We hit something illegal or unsupported. */
+ fp = org_fp;
goto out;
+ }
/*
* Pretend to build prologue, given the features we've seen. This will
@@ -901,8 +1019,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
bpf_jit_fill_ill_insns);
- if (!bpf_hdr)
+ if (!bpf_hdr) {
+ fp = org_fp;
goto out;
+ }
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
@@ -939,6 +1059,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
out:
kfree(addrs);
+
+ if (bpf_blinded)
+ bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
return fp;
}
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index b129d007e7fe..b19265de9178 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -43,7 +43,7 @@ static unsigned int profiling_interval;
#define SPU_PC_MASK 0xFFFF
DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
-unsigned long oprof_spu_smpl_arry_lck_flags;
+static unsigned long oprof_spu_smpl_arry_lck_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index ef2142ff7dbd..83d2b4ef7f0d 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -36,7 +36,7 @@
static DEFINE_SPINLOCK(buffer_lock);
static DEFINE_SPINLOCK(cache_lock);
static int num_spu_nodes;
-int spu_prof_num_nodes;
+static int spu_prof_num_nodes;
struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
struct delayed_work spu_work;
@@ -88,7 +88,7 @@ static void spu_buff_add(unsigned long int value, int spu)
/* This function copies the per SPU buffers to the
* OProfile kernel buffer.
*/
-void sync_spu_buff(void)
+static void sync_spu_buff(void)
{
int spu;
unsigned long flags;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4ed377f0f7b2..72c27b8d2cf3 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2158,7 +2158,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit();
}
-int power_pmu_prepare_cpu(unsigned int cpu)
+static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 7aa37236bb70..43fabb3cae0f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -131,7 +131,7 @@ static const struct attribute_group *attr_groups[] = {
#define HGPCI_MAX_DATA_BYTES \
(HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
-DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
struct hv_gpci_request_buffer {
struct hv_get_perf_counter_info_params params;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index a383c23a9070..7963658dbc22 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -416,7 +416,7 @@ static struct attribute *power7_pmu_format_attr[] = {
NULL,
};
-struct attribute_group power7_pmu_format_group = {
+static struct attribute_group power7_pmu_format_group = {
.name = "format",
.attrs = power7_pmu_format_attr,
};
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 5fde2b192fec..ab830d106ec5 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -204,7 +204,7 @@ static struct attribute *power8_pmu_format_attr[] = {
NULL,
};
-struct attribute_group power8_pmu_format_group = {
+static struct attribute_group power8_pmu_format_group = {
.name = "format",
.attrs = power8_pmu_format_attr,
};
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 788346303852..8e9a81967ff8 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -119,7 +119,7 @@ static struct attribute *power9_pmu_format_attr[] = {
NULL,
};
-struct attribute_group power9_pmu_format_group = {
+static struct attribute_group power9_pmu_format_group = {
.name = "format",
.attrs = power9_pmu_format_attr,
};
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 5ecce543103e..a886c2c22097 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -204,7 +204,7 @@ static void pika_setup_critical_temp(struct device_node *np,
i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
return;
}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index 0035d146df73..fe4d4eac7427 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
status |= (ignore | mask);
if (status == 0xff)
- return NO_IRQ;
+ return 0;
cpld_irq = ffz(status) + offset;
@@ -110,14 +110,14 @@ static void cpld_pic_cascade(struct irq_desc *desc)
irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
&cpld_regs->pci_mask);
- if (irq != NO_IRQ) {
+ if (irq) {
generic_handle_irq(irq);
return;
}
irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
&cpld_regs->misc_mask);
- if (irq != NO_IRQ) {
+ if (irq) {
generic_handle_irq(irq);
return;
}
@@ -177,7 +177,7 @@ mpc5121_ads_cpld_pic_init(void)
goto end;
cascade_irq = irq_of_parse_and_map(np, 0);
- if (cascade_irq == NO_IRQ)
+ if (!cascade_irq)
goto end;
/*
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 8eb82b043dd8..cec3f88f153d 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -473,7 +473,7 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
}
lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
- if (lpbfifo.irq == NO_IRQ) {
+ if (!lpbfifo.irq) {
dev_err(&pdev->dev, "mapping irq failed\n");
ret = -ENODEV;
goto err0;
@@ -528,7 +528,6 @@ static struct platform_driver mpc512x_lpbfifo_driver = {
.remove = mpc512x_lpbfifo_remove,
.driver = {
.name = DRV_NAME,
- .owner = THIS_MODULE,
.of_match_table = mpc512x_lpbfifo_match,
},
};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 4fe2074c88cb..fc98912f42cf 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -511,7 +511,7 @@ unsigned int mpc52xx_get_irq(void)
irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
}
} else {
- return NO_IRQ;
+ return 0;
}
return irq_linear_revmap(mpc52xx_irqhost, irq);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 60e89fc9c753..8b065bdf7412 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -131,7 +131,7 @@ int __init pq2ads_pci_init_irq(void)
}
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR "No interrupt in pci pic node.\n");
of_node_put(np);
goto out;
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index dbcd0303afed..63c5ab6489c9 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -222,7 +222,6 @@ static const struct of_device_id mcu_of_match_table[] = {
static struct i2c_driver mcu_driver = {
.driver = {
.name = "mcu-mpc8349emitx",
- .owner = THIS_MODULE,
.of_match_table = mcu_of_match_table,
},
.probe = mcu_probe,
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 2ef03e7d248c..0d6a62fc5864 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto err;
ret = of_irq_to_resource(np, 0, &res[1]);
- if (ret == NO_IRQ)
+ if (!ret)
goto err;
pdev = platform_device_alloc("mpc83xx_spi", i);
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index fcbea4b51a78..24717d060008 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -352,7 +352,7 @@ static int pmc_probe(struct platform_device *ofdev)
return -ENODEV;
pmc_irq = irq_of_parse_and_map(np, 0);
- if (pmc_irq != NO_IRQ) {
+ if (pmc_irq) {
ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED,
"pmc", ofdev);
@@ -400,7 +400,7 @@ out_syscr:
out_pmc:
iounmap(pmc_regs);
out:
- if (pmc_irq != NO_IRQ)
+ if (pmc_irq)
free_irq(pmc_irq, ofdev);
return ret;
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 28720a4ded7b..954e5e8b14ef 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -76,7 +76,7 @@ void __init mpc85xx_cpm2_pic_init(void)
return;
}
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
of_node_put(np);
printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
return;
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 62f171c71c4c..86f20156178e 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -196,7 +196,7 @@ static void mpc85xx_8259_cascade_handler(struct irq_desc *desc)
{
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
/* handle an interrupt from the 8259 */
generic_handle_irq(cascade_irq);
@@ -247,7 +247,7 @@ static int mpc85xx_cds_8259_attach(void)
}
cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
+ if (!cascade_irq) {
printk(KERN_ERR "Failed to map cascade interrupt\n");
return -ENXIO;
}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 6bc07d837b1c..ed69c7ee1829 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -51,7 +51,7 @@ static void mpc85xx_8259_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ) {
+ if (cascade_irq) {
generic_handle_irq(cascade_irq);
}
chip->irq_eoi(&desc->irq_data);
@@ -96,7 +96,7 @@ void __init mpc85xx_ds_pic_init(void)
}
cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
+ if (!cascade_irq) {
printk(KERN_ERR "Failed to map cascade interrupt\n");
return;
}
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index b02d6a5bb035..82f8490b5aa7 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -78,7 +78,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
break;
}
if (i == 3)
- return NO_IRQ;
+ return 0;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i));
@@ -103,7 +103,7 @@ static void socrates_fpga_pic_cascade(struct irq_desc *desc)
*/
cascade_irq = socrates_fpga_pic_get_irq(irq);
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
}
@@ -292,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic)
for (i = 0; i < 3; i++) {
socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
- if (socrates_fpga_irqs[i] == NO_IRQ) {
+ if (!socrates_fpga_irqs[i]) {
pr_warning("FPGA PIC: can't get irq%d.\n", i);
continue;
}
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 845defa1fd19..a6c695fa4da0 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -22,7 +22,7 @@ static void mpc86xx_8259_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -58,7 +58,7 @@ void __init mpc86xx_init_irq(void)
}
cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
+ if (!cascade_irq) {
printk(KERN_ERR "Failed to map cascade interrupt\n");
return;
}
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index b1ab6e96cb31..f81069f79a94 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -241,6 +241,6 @@ void __init mpc8xx_pics_init(void)
}
irq = cpm_pic_init();
- if (irq != NO_IRQ)
+ if (irq)
irq_set_chained_handler(irq, cpm_cascade);
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index f32edec13fd1..ca2da30ad2ab 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -74,6 +74,7 @@ config PPC_BOOK3S_64
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
+ select HAVE_KERNEL_XZ
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -86,6 +87,7 @@ endchoice
choice
prompt "CPU selection"
depends on PPC64
+ default POWER8_CPU if CPU_LITTLE_ENDIAN
default GENERIC_CPU
help
This will create a kernel which is optimised for a particular CPU.
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index aed7714495c1..8b55c5f19d4c 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -271,7 +271,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
for_each_pci_msi_entry(entry, dev) {
virq = irq_create_direct_mapping(msic->irq_domain);
- if (virq == NO_IRQ) {
+ if (!virq) {
dev_warn(&dev->dev,
"axon_msi: virq allocation failed!\n");
return -1;
@@ -293,7 +293,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
for_each_pci_msi_entry(entry, dev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
irq_set_msi_desc(entry->irq, NULL);
@@ -375,7 +375,7 @@ static int axon_msi_probe(struct platform_device *device)
}
virq = irq_of_parse_and_map(dn, 0);
- if (virq == NO_IRQ) {
+ if (!virq) {
printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
dn->full_name);
goto out_free_fifo;
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index 1428d583c238..b926438d73af 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -189,7 +189,7 @@ static struct device_node *cbe_get_be_node(int cpu_id)
return NULL;
}
-void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+static void __init cbe_fill_regs_map(struct cbe_regs_map *map)
{
if(map->be_node) {
struct device_node *be, *np;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 9f609fc8d331..a6bbbaba14a3 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -123,7 +123,7 @@ static void iic_ioexc_cascade(struct irq_desc *desc)
unsigned int cirq =
irq_linear_revmap(iic_host,
base | cascade);
- if (cirq != NO_IRQ)
+ if (cirq)
generic_handle_irq(cirq);
}
/* post-ack level interrupts */
@@ -153,10 +153,10 @@ static unsigned int iic_get_irq(void)
*(unsigned long *) &pending =
in_be64((u64 __iomem *) &iic->regs->pending_destr);
if (!(pending.flags & CBE_IIC_IRQ_VALID))
- return NO_IRQ;
+ return 0;
virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
- if (virq == NO_IRQ)
- return NO_IRQ;
+ if (!virq)
+ return 0;
iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
BUG_ON(iic->eoi_ptr > 15);
return virq;
@@ -187,18 +187,12 @@ void iic_message_pass(int cpu, int msg)
out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
}
-struct irq_domain *iic_get_irq_host(int node)
-{
- return iic_host;
-}
-EXPORT_SYMBOL_GPL(iic_get_irq_host);
-
static void iic_request_ipi(int msg)
{
int virq;
virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
- if (virq == NO_IRQ) {
+ if (!virq) {
printk(KERN_ERR
"iic: failed to map IPI %s\n", smp_ipi_name[msg]);
return;
@@ -353,7 +347,7 @@ static int __init setup_iic(void)
cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
cascade |= IIC_UNIT_IIC;
cascade = irq_create_mapping(iic_host, cascade);
- if (cascade == NO_IRQ)
+ if (!cascade)
continue;
/*
* irq_data is a generic pointer that gets passed back
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index f7d1a4953ea0..7ff51f96a00e 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -411,7 +411,7 @@ static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
virq = irq_create_mapping(NULL,
IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
- BUG_ON(virq == NO_IRQ);
+ BUG_ON(!virq);
ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
BUG_ON(ret);
@@ -651,7 +651,7 @@ static int dma_fixed_dma_supported(struct device *dev, u64 mask)
static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
-struct dma_map_ops dma_iommu_fixed_ops = {
+static struct dma_map_ops dma_iommu_fixed_ops = {
.alloc = dma_fixed_alloc_coherent,
.free = dma_fixed_free_coherent,
.map_sg = dma_fixed_map_sg,
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 348a27b12512..e3ad0c38f017 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -385,7 +385,7 @@ static int __init cbe_init_pm_irq(void)
for_each_online_node(node) {
irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
(node << IIC_IRQ_NODE_SHIFT));
- if (irq == NO_IRQ) {
+ if (!irq) {
printk("ERROR: Unable to allocate irq for node %d\n",
node);
return -EINVAL;
@@ -412,7 +412,7 @@ void cbe_sync_irq(int node)
IIC_IRQ_IOEX_PMI
| (node << IIC_IRQ_NODE_SHIFT));
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
"for node %d\n", irq, node);
return;
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 2d4f60c0119a..460ab392f0e7 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -298,7 +298,7 @@ int cbe_sysreset_hack(void)
}
#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
-int __init cbe_ptcal_init(void)
+static int __init cbe_ptcal_init(void)
{
int ret;
ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index d06dcac66fcb..ff924af00e78 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -207,11 +207,11 @@ static void spider_irq_cascade(struct irq_desc *desc)
cs = in_be32(pic->regs + TIR_CS) >> 24;
if (cs == SPIDER_IRQ_INVALID)
- virq = NO_IRQ;
+ virq = 0;
else
virq = irq_linear_revmap(pic->host, cs);
- if (virq != NO_IRQ)
+ if (virq)
generic_handle_irq(virq);
chip->irq_eoi(&desc->irq_data);
@@ -245,19 +245,19 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
/* Now do the horrible hacks */
tmp = of_get_property(of_node, "#interrupt-cells", NULL);
if (tmp == NULL)
- return NO_IRQ;
+ return 0;
intsize = *tmp;
imap = of_get_property(of_node, "interrupt-map", &imaplen);
if (imap == NULL || imaplen < (intsize + 1))
- return NO_IRQ;
+ return 0;
iic = of_find_node_by_phandle(imap[intsize]);
if (iic == NULL)
- return NO_IRQ;
+ return 0;
imap += intsize + 1;
tmp = of_get_property(iic, "#interrupt-cells", NULL);
if (tmp == NULL) {
of_node_put(iic);
- return NO_IRQ;
+ return 0;
}
intsize = *tmp;
/* Assume unit is last entry of interrupt specifier */
@@ -266,7 +266,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
if (tmp == NULL) {
of_node_put(iic);
- return NO_IRQ;
+ return 0;
}
/* ugly as hell but works for now */
pic->node_id = (*tmp) >> 1;
@@ -281,7 +281,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
(pic->node_id << IIC_IRQ_NODE_SHIFT) |
(2 << IIC_IRQ_CLASS_SHIFT) |
unit);
- if (virq == NO_IRQ)
+ if (!virq)
printk(KERN_ERR "spider_pic: failed to map cascade !");
return virq;
}
@@ -318,7 +318,7 @@ static void __init spider_init_one(struct device_node *of_node, int chip,
/* Hook up the cascade interrupt to the iic and nodeid */
virq = spider_find_cascade_and_node(pic);
- if (virq == NO_IRQ)
+ if (!virq)
return;
irq_set_handler_data(virq, pic);
irq_set_chained_handler(virq, spider_irq_cascade);
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index bb4a8e07c229..e84d8fbc2e21 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -402,7 +402,7 @@ static int spu_request_irqs(struct spu *spu)
{
int ret = 0;
- if (spu->irqs[0] != NO_IRQ) {
+ if (spu->irqs[0]) {
snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
spu->number);
ret = request_irq(spu->irqs[0], spu_irq_class_0,
@@ -410,7 +410,7 @@ static int spu_request_irqs(struct spu *spu)
if (ret)
goto bail0;
}
- if (spu->irqs[1] != NO_IRQ) {
+ if (spu->irqs[1]) {
snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
spu->number);
ret = request_irq(spu->irqs[1], spu_irq_class_1,
@@ -418,7 +418,7 @@ static int spu_request_irqs(struct spu *spu)
if (ret)
goto bail1;
}
- if (spu->irqs[2] != NO_IRQ) {
+ if (spu->irqs[2]) {
snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
spu->number);
ret = request_irq(spu->irqs[2], spu_irq_class_2,
@@ -429,10 +429,10 @@ static int spu_request_irqs(struct spu *spu)
return 0;
bail2:
- if (spu->irqs[1] != NO_IRQ)
+ if (spu->irqs[1])
free_irq(spu->irqs[1], spu);
bail1:
- if (spu->irqs[0] != NO_IRQ)
+ if (spu->irqs[0])
free_irq(spu->irqs[0], spu);
bail0:
return ret;
@@ -440,11 +440,11 @@ bail0:
static void spu_free_irqs(struct spu *spu)
{
- if (spu->irqs[0] != NO_IRQ)
+ if (spu->irqs[0])
free_irq(spu->irqs[0], spu);
- if (spu->irqs[1] != NO_IRQ)
+ if (spu->irqs[1])
free_irq(spu->irqs[1], spu);
- if (spu->irqs[2] != NO_IRQ)
+ if (spu->irqs[2])
free_irq(spu->irqs[2], spu);
}
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 21b4bfb97200..672d310dcf14 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -105,7 +105,10 @@ static int __init spu_map_interrupts_old(struct spu *spu,
spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
/* Right now, we only fail if class 2 failed */
- return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+ if (!spu->irqs[2])
+ return -EINVAL;
+
+ return 0;
}
static void __iomem * __init spu_map_prop_old(struct spu *spu,
@@ -191,7 +194,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0],
oirq.np->full_name);
spu->irqs[i] = irq_create_of_mapping(&oirq);
- if (spu->irqs[i] == NO_IRQ) {
+ if (!spu->irqs[i]) {
pr_debug("spu_new: failed to map it !\n");
goto err;
}
@@ -202,7 +205,7 @@ err:
pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
spu->name);
for (; i >= 0; i--) {
- if (spu->irqs[i] != NO_IRQ)
+ if (spu->irqs[i])
irq_dispose_mapping(spu->irqs[i]);
}
return ret;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5be15cff758d..2975754c65ea 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
gang = alloc_spu_gang();
SPUFS_I(inode)->i_ctx = NULL;
SPUFS_I(inode)->i_gang = gang;
- if (!gang)
+ if (!gang) {
+ ret = -ENOMEM;
goto out_iput;
+ }
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index bfb300633dfe..0ce1b45f02a8 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -368,7 +368,7 @@ static void chrp_8259_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -514,7 +514,7 @@ static void __init chrp_find_8259(void)
}
if (chrp_mpic != NULL) {
cascade_irq = irq_of_parse_and_map(pic, 0);
- if (cascade_irq == NO_IRQ)
+ if (!cascade_irq)
printk(KERN_ERR "i8259: failed to map cascade irq\n");
else
irq_set_chained_handler(cascade_irq,
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index b7866e01483d..ade83829d5e8 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -181,7 +181,7 @@ unsigned int flipper_pic_get_irq(void)
irq_status = in_be32(io_base + FLIPPER_ICR) &
in_be32(io_base + FLIPPER_IMR);
if (irq_status == 0)
- return NO_IRQ; /* no more IRQs pending */
+ return 0; /* no more IRQs pending */
irq = __ffs(irq_status);
return irq_linear_revmap(flipper_irq_host, irq);
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 9b7975706bfc..89c54de88b7a 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -114,7 +114,7 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
in_be32(io_base + HW_BROADWAY_IMR);
if (irq_status == 0)
- return NO_IRQ; /* no more IRQs pending */
+ return 0; /* no more IRQs pending */
irq = __ffs(irq_status);
return irq_linear_revmap(h, irq);
@@ -131,7 +131,7 @@ static void hlwd_pic_irq_cascade(struct irq_desc *desc)
raw_spin_unlock(&desc->lock);
virq = __hlwd_pic_get_irq(irq_domain);
- if (virq != NO_IRQ)
+ if (virq)
generic_handle_irq(virq);
else
pr_err("spurious interrupt!\n");
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index dafba1057a47..dfd310031549 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -26,7 +26,7 @@
#include <linux/tty.h>
#include <linux/serial_core.h>
#include <linux/of_platform.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <asm/time.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 80804f9916ee..f97bab8e37a2 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -23,7 +23,7 @@
#include <linux/pci.h>
#include <linux/kdev_t.h>
#include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/seq_file.h>
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
index ed7321d6772e..8e3590941960 100644
--- a/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -47,7 +47,7 @@ static void mvme5100_8259_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -84,7 +84,7 @@ static void __init mvme5100_pic_init(void)
}
cirq = irq_of_parse_and_map(cp, 0);
- if (cirq == NO_IRQ) {
+ if (!cirq) {
pr_warn("mvme5100_pic_init: no cascade interrupt?\n");
return;
}
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index a2f89e6326ce..a0589aac4163 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -552,7 +552,7 @@ void maple_pci_irq_fixup(struct pci_dev *dev)
pci_bus_to_host(dev->bus) == u4_pcie) {
printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n");
dev->irq = irq_create_mapping(NULL, 1);
- if (dev->irq != NO_IRQ)
+ if (dev->irq)
irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
}
@@ -562,7 +562,7 @@ void maple_pci_irq_fixup(struct pci_dev *dev)
if (dev->vendor == PCI_VENDOR_ID_AMD &&
dev->device == PCI_DEVICE_ID_AMD_8111_IDE &&
(dev->class & 5) != 5) {
- dev->irq = NO_IRQ;
+ dev->irq = 0;
}
DBG(" <- maple_pci_irq_fixup\n");
@@ -648,7 +648,7 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
return defirq;
}
irq = irq_of_parse_and_map(np, channel & 0x1);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk("Failed to map onboard IDE interrupt for channel %d\n",
channel);
return defirq;
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 3c30c7a4534d..b7f937563827 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -156,7 +156,7 @@ static void __noreturn maple_halt(void)
}
#ifdef CONFIG_SMP
-struct smp_ops_t maple_smp_ops = {
+static struct smp_ops_t maple_smp_ops = {
.probe = smp_mpic_probe,
.message_pass = smp_mpic_message_pass,
.kick_cpu = smp_generic_kick_cpu,
@@ -176,7 +176,7 @@ static void __init maple_use_rtas_reboot_and_halt_if_present(void)
}
}
-void __init maple_setup_arch(void)
+static void __init maple_setup_arch(void)
{
/* init to some ~sane value until calibrate_delay() runs */
loops_per_jiffy = 50000000;
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
index 00d4b28cbb60..c7f1dbe94de7 100644
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -14,6 +14,16 @@ config PPC_PASEMI
menu "PA Semi PWRficient options"
depends on PPC_PASEMI
+config PPC_PASEMI_NEMO
+ bool "Nemo motherboard Support"
+ depends on PPC_PASEMI
+ select PPC_I8259
+ help
+ This option enables support for the 'Nemo' motherboard
+ used in A-Eons's Amigaone X1000. This consists of some
+ device tree patches and workarounds for the SB600 South
+ Bridge that provides SATA/USB/Audio.
+
config PPC_PASEMI_IOMMU
bool "PA Semi IOMMU support"
depends on PPC_PASEMI
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index ddf635000c6b..c23e60959aa8 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -306,7 +306,7 @@ static struct platform_driver gpio_mdio_driver =
},
};
-int gpio_mdio_init(void)
+static int gpio_mdio_init(void)
{
struct device_node *np;
@@ -326,7 +326,7 @@ int gpio_mdio_init(void)
}
module_init(gpio_mdio_init);
-void gpio_mdio_exit(void)
+static void gpio_mdio_exit(void)
{
platform_driver_unregister(&gpio_mdio_driver);
if (gpio_regs)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 309d9ccccd50..e74adc4e7fd8 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
!firmware_has_feature(FW_FEATURE_LPAR)) {
dev->dev.archdata.dma_ops = &dma_direct_ops;
+ /*
+ * Set the coherent DMA mask to prevent the iommu
+ * being used unnecessarily
+ */
+ dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
return;
}
#endif
@@ -194,7 +199,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
}
-int __init iob_init(struct device_node *dn)
+static int __init iob_init(struct device_node *dn)
{
unsigned long tmp;
u32 regword;
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
index e0ab299763c1..8571e7bf78b6 100644
--- a/arch/powerpc/platforms/pasemi/misc.c
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -76,7 +76,7 @@ static int __init pasemi_register_i2c_devices(void)
}
info.irq = irq_of_parse_and_map(node, 0);
- if (info.irq == NO_IRQ)
+ if (!info.irq)
info.irq = -1;
if (find_i2c_driver(node, &info) < 0)
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
index d9af76342d99..d9cd510c8865 100644
--- a/arch/powerpc/platforms/pasemi/msi.c
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -68,7 +68,7 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
@@ -109,7 +109,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
}
virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
hwirq);
msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index e86c1bd08f1f..3182400cf48f 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -59,7 +59,7 @@ struct mce_regs {
static struct mce_regs mce_regs[MAX_MCE_REGS];
static int num_mce_regs;
-static int nmi_virq = NO_IRQ;
+static int nmi_virq = 0;
static void __noreturn pas_restart(char *cmd)
@@ -105,7 +105,7 @@ static void pas_take_timebase(void)
arch_spin_unlock(&timebase_lock);
}
-struct smp_ops_t pas_smp_ops = {
+static struct smp_ops_t pas_smp_ops = {
.probe = smp_mpic_probe,
.message_pass = smp_mpic_message_pass,
.kick_cpu = smp_generic_kick_cpu,
@@ -115,7 +115,7 @@ struct smp_ops_t pas_smp_ops = {
};
#endif /* CONFIG_SMP */
-void __init pas_setup_arch(void)
+static void __init pas_setup_arch(void)
{
#ifdef CONFIG_SMP
/* Setup SMP callback */
@@ -264,7 +264,7 @@ static int pas_machine_check_handler(struct pt_regs *regs)
srr0 = regs->nip;
srr1 = regs->msr;
- if (nmi_virq != NO_IRQ && mpic_get_mcirq() == nmi_virq) {
+ if (nmi_virq && mpic_get_mcirq() == nmi_virq) {
printk(KERN_ERR "NMI delivered\n");
debugger(regs);
mpic_end_irq(irq_get_irq_data(nmi_virq));
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 6d6f277477aa..c8c217b7dd33 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -401,7 +401,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
{
struct pmac_i2c_host_kw *host = bus->hostdata;
u8 mode_reg = host->speed;
- int use_irq = host->irq != NO_IRQ && !bus->polled;
+ int use_irq = host->irq && !bus->polled;
/* Setup mode & subaddress if any */
switch(bus->mode) {
@@ -535,7 +535,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
break;
}
host->irq = irq_of_parse_and_map(np, 0);
- if (host->irq == NO_IRQ)
+ if (!host->irq)
printk(KERN_WARNING
"low_i2c: Failed to map interrupt for %s\n",
np->full_name);
@@ -557,7 +557,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
*/
if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND,
"keywest i2c", host))
- host->irq = NO_IRQ;
+ host->irq = 0;
printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n",
*addrp, host->irq, np->full_name);
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index e49d07f3d542..459138ed4571 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -26,7 +26,7 @@ static irqreturn_t macio_gpio_irq(int irq, void *data)
static int macio_do_gpio_irq_enable(struct pmf_function *func)
{
unsigned int irq = irq_of_parse_and_map(func->node, 0);
- if (irq == NO_IRQ)
+ if (!irq)
return -EINVAL;
return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
}
@@ -34,7 +34,7 @@ static int macio_do_gpio_irq_enable(struct pmf_function *func)
static int macio_do_gpio_irq_disable(struct pmf_function *func)
{
unsigned int irq = irq_of_parse_and_map(func->node, 0);
- if (irq == NO_IRQ)
+ if (!irq)
return -EINVAL;
free_irq(irq, func);
return 0;
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index 43075081721f..695e8c4d4224 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -804,7 +804,7 @@ void pmf_unregister_driver(struct device_node *np)
}
EXPORT_SYMBOL_GPL(pmf_unregister_driver);
-struct pmf_function *__pmf_find_function(struct device_node *target,
+static struct pmf_function *__pmf_find_function(struct device_node *target,
const char *name, u32 flags)
{
struct device_node *actor = of_node_get(target);
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 981546345033..f5f9ad7c3398 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -251,7 +251,7 @@ static unsigned int pmac_pic_get_irq(void)
}
raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
if (unlikely(irq < 0))
- return NO_IRQ;
+ return 0;
return irq_linear_revmap(pmac_pic_host, irq);
}
@@ -389,7 +389,7 @@ static void __init pmac_pic_probe_oldstyle(void)
out_le32(&pmac_irq_hw[i]->enable, 0);
/* Hookup cascade irq */
- if (slave && pmac_irq_cascade != NO_IRQ)
+ if (slave && pmac_irq_cascade)
setup_irq(pmac_irq_cascade, &gatwick_cascade_action);
printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
@@ -444,7 +444,7 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
pswitch = of_find_node_by_name(NULL, "programmer-switch");
if (pswitch) {
nmi_irq = irq_of_parse_and_map(pswitch, 0);
- if (nmi_irq != NO_IRQ) {
+ if (nmi_irq) {
mpic_irq_set_priority(nmi_irq, 9);
setup_irq(nmi_irq, &xmon_action);
}
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 834868b9fdc9..c9eb7d6540ea 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -852,37 +852,33 @@ static void smp_core99_setup_cpu(int cpu_nr)
#ifdef CONFIG_PPC64
#ifdef CONFIG_HOTPLUG_CPU
-static int smp_core99_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static unsigned int smp_core99_host_open;
+
+static int smp_core99_cpu_prepare(unsigned int cpu)
{
int rc;
- switch(action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- /* Open i2c bus if it was used for tb sync */
- if (pmac_tb_clock_chip_host) {
- rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
- if (rc) {
- pr_err("Failed to open i2c bus for time sync\n");
- return notifier_from_errno(rc);
- }
+ /* Open i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && !smp_core99_host_open) {
+ rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+ if (rc) {
+ pr_err("Failed to open i2c bus for time sync\n");
+ return notifier_from_errno(rc);
}
- break;
- case CPU_ONLINE:
- case CPU_UP_CANCELED:
- /* Close i2c bus if it was used for tb sync */
- if (pmac_tb_clock_chip_host)
- pmac_i2c_close(pmac_tb_clock_chip_host);
- break;
- default:
- break;
+ smp_core99_host_open = 1;
}
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block smp_core99_cpu_nb = {
- .notifier_call = smp_core99_cpu_notify,
-};
+static int smp_core99_cpu_online(unsigned int cpu)
+{
+ /* Close i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && smp_core99_host_open) {
+ pmac_i2c_close(pmac_tb_clock_chip_host);
+ smp_core99_host_open = 0;
+ }
+ return 0;
+}
#endif /* CONFIG_HOTPLUG_CPU */
static void __init smp_core99_bringup_done(void)
@@ -902,7 +898,11 @@ static void __init smp_core99_bringup_done(void)
g5_phy_disable_cpu1();
}
#ifdef CONFIG_HOTPLUG_CPU
- register_cpu_notifier(&smp_core99_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE,
+ "powerpc/pmac:prepare", smp_core99_cpu_prepare,
+ NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online",
+ smp_core99_cpu_online, NULL);
#endif
if (ppc_md.progress)
@@ -979,7 +979,7 @@ static void pmac_cpu_die(void)
#endif /* CONFIG_HOTPLUG_CPU */
/* Core99 Macs (dual G4s and G5s) */
-struct smp_ops_t core99_smp_ops = {
+static struct smp_ops_t core99_smp_ops = {
.message_pass = smp_mpic_message_pass,
.probe = smp_core99_probe,
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 86544ea85dc3..2354ea51e871 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -763,7 +763,8 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
* reset followed by hot reset on root bus. So we also
* need the PCI bus settlement delay.
*/
- rc = pnv_eeh_poll(phb->opal_id);
+ if (rc > 0)
+ rc = pnv_eeh_poll(phb->opal_id);
if (option == EEH_RESET_DEACTIVATE) {
if (system_state < SYSTEM_RUNNING)
udelay(1000 * EEH_PE_RST_SETTLE_TIME);
@@ -806,7 +807,8 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
goto out;
/* Poll state of the PHB until the request is done */
- rc = pnv_eeh_poll(phb->opal_id);
+ if (rc > 0)
+ rc = pnv_eeh_poll(phb->opal_id);
if (option == EEH_RESET_DEACTIVATE)
msleep(EEH_PE_RST_SETTLE_TIME);
out:
@@ -1090,10 +1092,16 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
}
}
- bus = eeh_pe_bus_get(pe);
if (pe->type & EEH_PE_VF)
return pnv_eeh_reset_vf_pe(pe, option);
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
+ pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->addr);
+ return -EIO;
+ }
+
if (pci_is_root_bus(bus) ||
pci_is_root_bus(bus->parent))
return pnv_eeh_root_reset(hose, option);
@@ -1306,7 +1314,7 @@ static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
return;
}
- switch (data->type) {
+ switch (be16_to_cpu(data->type)) {
case OPAL_P7IOC_DIAG_TYPE_RGC:
pr_info("P7IOC diag-data for RGC\n\n");
pnv_eeh_dump_hub_diag_common(data);
@@ -1538,7 +1546,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
/* Try best to clear it */
opal_pci_eeh_freeze_clear(phb->opal_id,
- frozen_pe_no,
+ be64_to_cpu(frozen_pe_no),
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
ret = EEH_NEXT_ERR_NONE;
} else if ((*pe)->state & EEH_PE_ISOLATED ||
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 00e1a0195c78..aec85e778028 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -115,7 +115,7 @@ static u64 dma_npu_get_required_mask(struct device *dev)
return 0;
}
-struct dma_map_ops dma_npu_ops = {
+static struct dma_map_ops dma_npu_ops = {
.map_page = dma_npu_map_page,
.map_sg = dma_npu_map_sg,
.alloc = dma_npu_alloc,
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 2ee96431f736..4c827826c05e 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data)
uint32_t dump_id, dump_size, dump_type;
struct dump_obj *dump;
char name[22];
+ struct kobject *kobj;
rc = dump_read_info(&dump_id, &dump_size, &dump_type);
if (rc != OPAL_SUCCESS)
@@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data)
* that gracefully and not create two conflicting
* entries.
*/
- if (kset_find_obj(dump_kset, name))
+ kobj = kset_find_obj(dump_kset, name);
+ if (kobj) {
+ /* Drop reference added by kset_find_obj() */
+ kobject_put(kobj);
return 0;
+ }
dump = create_dump_obj(dump_id, dump_size, dump_type);
if (!dump)
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 37f959bf392e..f2344cbd2f46 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data)
uint64_t elog_type;
int rc;
char name[2+16+1];
+ struct kobject *kobj;
rc = opal_get_elog_size(&id, &size, &type);
if (rc != OPAL_SUCCESS) {
@@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data)
* that gracefully and not create two conflicting
* entries.
*/
- if (kset_find_obj(elog_kset, name))
+ kobj = kset_find_obj(elog_kset, name);
+ if (kobj) {
+ /* Drop reference added by kset_find_obj() */
+ kobject_put(kobj);
return IRQ_HANDLED;
+ }
create_elog_obj(log_id, elog_size, elog_type);
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index e505223b4ec5..998316bf2dad 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -222,13 +222,14 @@ int __init opal_event_init(void)
/* Get hardware and virtual IRQ */
irq = be32_to_cpup(irqs);
virq = irq_create_mapping(NULL, irq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_warn("Failed to map irq 0x%x\n", irq);
continue;
}
/* Install interrupt handler */
- rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+ rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+ "opal", NULL);
if (rc) {
irq_dispose_mapping(virq);
pr_warn("Error %d requesting irq %d (0x%x)\n",
@@ -259,7 +260,7 @@ machine_arch_initcall(powernv, opal_event_init);
int opal_event_request(unsigned int opal_event_nr)
{
if (WARN_ON_ONCE(!opal_event_irqchip.domain))
- return NO_IRQ;
+ return 0;
return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3d29d40eb0e9..44d2d842cee7 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
+OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE);
OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 8b4fc68cebcb..6c9a65b52e63 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs,
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index 1349a099c74c..94498a04558b 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -344,7 +344,7 @@ int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return (hwirq ? hwirq : -ENOMEM);
virq = irq_create_mapping(NULL, hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_warn("%s: Failed to map cxl mode MSI to linux irq\n",
pci_name(pdev));
return -ENOMEM;
@@ -374,7 +374,7 @@ void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
return;
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
irq_set_msi_desc(entry->irq, NULL);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6b9528307f62..d4b33dd2d9e7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -111,17 +111,44 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
- return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
- (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+ /*
+ * WARNING: We cannot rely on the resource flags. The Linux PCI
+ * allocation code sometimes decides to put a 64-bit prefetchable
+ * BAR in the 32-bit window, so we have to compare the addresses.
+ *
+ * For simplicity we only test resource start.
+ */
+ return (r->start >= phb->ioda.m64_base &&
+ r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+ unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+ return (resource_flags & flags) == flags;
}
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
{
+ s64 rc;
+
phb->ioda.pe_array[pe_no].phb = phb;
phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ /*
+ * Clear the PE frozen state as it might be put into frozen state
+ * in the last PCI remove path. It's not harmful to do so when the
+ * PE is already in unfrozen state.
+ */
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n",
+ __func__, rc, phb->hose->global_number, pe_no);
+
return &phb->ioda.pe_array[pe_no];
}
@@ -142,7 +169,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
{
- unsigned long pe = phb->ioda.total_pe_num - 1;
+ long pe;
for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
@@ -155,11 +182,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
{
struct pnv_phb *phb = pe->phb;
+ unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
- clear_bit(pe->pe_number, phb->ioda.pe_alloc);
+ clear_bit(pe_num, phb->ioda.pe_alloc);
}
/* The default M64 BAR is shared by all PEs */
@@ -229,7 +257,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
sgsz = phb->ioda.m64_segsize;
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
r = &pdev->resource[i];
- if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+ if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -402,7 +430,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
struct device_node *dn = hose->dn;
struct resource *res;
u32 m64_range[2], i;
- const u32 *r;
+ const __be32 *r;
u64 pci_addr;
if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
@@ -1877,7 +1905,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
unsigned shift, unsigned long index,
unsigned long npages)
{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
@@ -2209,7 +2237,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
pnv_pci_link_table_and_group(phb->hose->node, num,
tbl, &pe->table_group);
- pnv_pci_phb3_tce_invalidate_pe(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
return 0;
}
@@ -2347,7 +2375,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
if (ret)
pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
else
- pnv_pci_phb3_tce_invalidate_pe(pe);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
@@ -2703,15 +2731,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
}
#ifdef CONFIG_PCI_MSI
-static void pnv_ioda2_msi_eoi(struct irq_data *d)
+int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
{
- unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
- struct irq_chip *chip = irq_data_get_irq_chip(d);
struct pnv_phb *phb = container_of(chip, struct pnv_phb,
ioda.irq_chip);
+
+ return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+}
+
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
int64_t rc;
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct irq_chip *chip = irq_data_get_irq_chip(d);
- rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
WARN_ON_ONCE(rc);
icp_native_eoi(d);
@@ -2741,6 +2775,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
irq_set_chip(virq, &phb->ioda.irq_chip);
}
+/*
+ * Returns true iff chip is something that we could call
+ * pnv_opal_pci_msi_eoi for.
+ */
+bool is_pnv_opal_msi(struct irq_chip *chip)
+{
+ return chip->irq_eoi == pnv_ioda2_msi_eoi;
+}
+EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
+
static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg)
@@ -2863,7 +2907,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
+ if (!pnv_pci_is_m64_flags(res->flags)) {
dev_warn(&pdev->dev, "Don't support SR-IOV with"
" non M64 VF BAR%d: %pR. \n",
i, res);
@@ -2958,7 +3002,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
index++;
}
} else if ((res->flags & IORESOURCE_MEM) &&
- !pnv_pci_is_mem_pref_64(res->flags)) {
+ !pnv_pci_is_m64(phb, res)) {
region.start = res->start -
phb->hose->mem_offset[0] -
phb->ioda.m32_pci_base;
@@ -3018,6 +3062,38 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
}
}
+#ifdef CONFIG_DEBUG_FS
+static int pnv_pci_diag_data_set(void *data, u64 val)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ s64 ret;
+
+ if (val != 1ULL)
+ return -EINVAL;
+
+ hose = (struct pci_controller *)data;
+ if (!hose || !hose->private_data)
+ return -ENODEV;
+
+ phb = hose->private_data;
+
+ /* Retrieve the diag data from firmware */
+ ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+ PNV_PCI_DIAG_BUF_SIZE);
+ if (ret != OPAL_SUCCESS)
+ return -EIO;
+
+ /* Print the diag data to the kernel log */
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL,
+ pnv_pci_diag_data_set, "%llu\n");
+
+#endif /* CONFIG_DEBUG_FS */
+
static void pnv_pci_ioda_create_dbgfs(void)
{
#ifdef CONFIG_DEBUG_FS
@@ -3033,9 +3109,14 @@ static void pnv_pci_ioda_create_dbgfs(void)
sprintf(name, "PCI%04x", hose->global_number);
phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
- if (!phb->dbgfs)
+ if (!phb->dbgfs) {
pr_warning("%s: Error on creating debugfs on PHB#%x\n",
__func__, hose->global_number);
+ continue;
+ }
+
+ debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose,
+ &pnv_pci_diag_data_fops);
}
#endif /* CONFIG_DEBUG_FS */
}
@@ -3083,9 +3164,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
bridge = bridge->bus->self;
}
- /* We fail back to M32 if M64 isn't supported */
- if (phb->ioda.m64_segsize &&
- pnv_pci_is_mem_pref_64(type))
+ /*
+ * We fall back to M32 if M64 isn't supported. We enforce the M64
+ * alignment for any 64-bit resource, PCIe doesn't care and
+ * bridges only do 64-bit prefetchable anyway.
+ */
+ if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
return phb->ioda.m64_segsize;
if (type & IORESOURCE_MEM)
return phb->ioda.m32_segsize;
@@ -3125,7 +3209,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
w = NULL;
if (r->flags & type & IORESOURCE_IO)
w = &hose->io_resource;
- else if (pnv_pci_is_mem_pref_64(r->flags) &&
+ else if (pnv_pci_is_m64(phb, r) &&
(type & IORESOURCE_PREFETCH) &&
phb->ioda.m64_segsize)
w = &hose->mem_resources[1];
@@ -3392,12 +3476,6 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
struct pnv_phb *phb = pe->phb;
struct pnv_ioda_pe *slave, *tmp;
- /* Release slave PEs in compound PE */
- if (pe->flags & PNV_IODA_PE_MASTER) {
- list_for_each_entry_safe(slave, tmp, &pe->slaves, list)
- pnv_ioda_release_pe(slave);
- }
-
list_del(&pe->list);
switch (phb->type) {
case PNV_PHB_IODA1:
@@ -3412,7 +3490,26 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
pnv_ioda_release_pe_seg(pe);
pnv_ioda_deconfigure_pe(pe->phb, pe);
- pnv_ioda_free_pe(pe);
+
+ /* Release slave PEs in the compound PE */
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+ list_del(&slave->list);
+ pnv_ioda_free_pe(slave);
+ }
+ }
+
+ /*
+ * The PE for root bus can be removed because of hotplug in EEH
+ * recovery for fenced PHB error. We need to mark the PE dead so
+ * that it can be populated again in PCI hot add path. The PE
+ * shouldn't be destroyed as it's the global reserved resource.
+ */
+ if (phb->ioda.root_pe_populated &&
+ phb->ioda.root_pe_idx == pe->pe_number)
+ phb->ioda.root_pe_populated = false;
+ else
+ pnv_ioda_free_pe(pe);
}
static void pnv_pci_release_device(struct pci_dev *pdev)
@@ -3428,7 +3525,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
if (!pdn || pdn->pe_number == IODA_INVALID_PE)
return;
+ /*
+ * PCI hotplug can happen as part of EEH error recovery. The @pdn
+ * isn't removed and added afterwards in this scenario. We should
+ * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+ * device count is decreased on removing devices while failing to
+ * be increased on adding devices. It leads to unbalanced PE's device
+ * count and eventually make normal PCI hotplug path broken.
+ */
pe = &phb->ioda.pe_array[pdn->pe_number];
+ pdn->pe_number = IODA_INVALID_PE;
+
WARN_ON(--pe->device_count < 0);
if (pe->device_count == 0)
pnv_ioda_release_pe(pe);
@@ -3722,10 +3829,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (rc)
pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
- /* If we're running in kdump kerenl, the previous kerenl never
+ /*
+ * If we're running in kdump kernel, the previous kernel never
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
- * transactions from previous kerenl.
+ * transactions from previous kernel.
*/
if (is_kdump_kernel()) {
pr_info(" Issue PHB reset ...\n");
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index a21d831c1114..db7b8020f68e 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -186,7 +186,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOSPC;
}
virq = irq_create_mapping(NULL, phb->msi_base + hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_warn("%s: Failed to map MSI to linux irq\n",
pci_name(pdev));
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1);
@@ -217,7 +217,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev)
return;
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
irq_set_msi_desc(entry->irq, NULL);
@@ -309,8 +309,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
be64_to_cpu(data->dma1ErrorLog1));
for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
- if ((data->pestA[i] >> 63) == 0 &&
- (data->pestB[i] >> 63) == 0)
+ if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
+ (be64_to_cpu(data->pestB[i]) >> 63) == 0)
continue;
pr_info("PE[%3d] A/B: %016llx %016llx\n",
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
index 57caaf11a83f..e48462447ff0 100644
--- a/arch/powerpc/platforms/ps3/device-init.c
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -62,7 +62,7 @@ static int __init ps3_register_lpm_devices(void)
&dev->lpm.rights);
if (result) {
- pr_debug("%s:%d: ps3_repository_read_lpm_privleges failed \n",
+ pr_debug("%s:%d: ps3_repository_read_lpm_privileges failed\n",
__func__, __LINE__);
goto fail_read_repo;
}
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index b831638e6f4a..98f8c3611133 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -192,7 +192,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
*virq = irq_create_mapping(NULL, outlet);
- if (*virq == NO_IRQ) {
+ if (!*virq) {
FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n",
__func__, __LINE__, outlet);
result = -ENOMEM;
@@ -339,7 +339,7 @@ int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq)
if (result) {
FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n",
__func__, __LINE__, ps3_result(result));
- *virq = NO_IRQ;
+ *virq = 0;
return result;
}
@@ -418,7 +418,7 @@ int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev,
" failed: %s\n", __func__, __LINE__,
ps3_result(result));
ps3_event_receive_port_destroy(*virq);
- *virq = NO_IRQ;
+ *virq = 0;
return result;
}
@@ -724,12 +724,12 @@ static unsigned int ps3_get_irq(void)
asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x));
plug &= 0x3f;
- if (unlikely(plug == NO_IRQ)) {
+ if (unlikely(!plug)) {
DBG("%s:%d: no plug found: thread_id %llu\n", __func__,
__LINE__, pd->thread_id);
dump_bmp(&per_cpu(ps3_private, 0));
dump_bmp(&per_cpu(ps3_private, 1));
- return NO_IRQ;
+ return 0;
}
#if defined(DEBUG)
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 3c7707af3384..60154d08debf 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -91,7 +91,7 @@ static void __init ps3_smp_probe(void)
result = smp_request_message_ipi(virqs[i], i);
if (result)
- virqs[i] = NO_IRQ;
+ virqs[i] = 0;
else
ps3_register_ipi_irq(cpu, virqs[i]);
}
@@ -112,7 +112,7 @@ void ps3_smp_cleanup_cpu(int cpu)
for (i = 0; i < MSG_COUNT; i++) {
/* Can't call free_irq from interrupt context. */
ps3_event_receive_port_destroy(virqs[i]);
- virqs[i] = NO_IRQ;
+ virqs[i] = 0;
}
DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 492b2575e0d2..b54850845466 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -284,7 +284,7 @@ fail_alloc_2:
fail_alloc_1:
ps3_spe_irq_destroy(spu->irqs[0]);
fail_alloc_0:
- spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ;
+ spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
return result;
}
@@ -334,7 +334,7 @@ static int ps3_destroy_spu(struct spu *spu)
ps3_spe_irq_destroy(spu->irqs[1]);
ps3_spe_irq_destroy(spu->irqs[0]);
- spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ;
+ spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
spu_unmap(spu);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 4748124faa10..423e450efe07 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -27,7 +27,7 @@
#include <asm/uaccess.h>
#include <asm/rtas.h>
-struct workqueue_struct *pseries_hp_wq;
+static struct workqueue_struct *pseries_hp_wq;
struct pseries_hp_work {
struct work_struct work;
@@ -377,7 +377,7 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
return rc;
}
-void pseries_hp_work_fn(struct work_struct *work)
+static void pseries_hp_work_fn(struct work_struct *work)
{
struct pseries_hp_work *hp_work =
container_of(work, struct pseries_hp_work, work);
@@ -413,6 +413,7 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
queue_work(pseries_hp_wq, (struct work_struct *)work);
} else {
*rc = -ENOMEM;
+ kfree(hp_errlog_copy);
complete(hotplug_done);
}
}
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index a6ddca833119..32187dc76730 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -34,7 +34,7 @@ void request_event_sources_irqs(struct device_node *np,
if (count > 15)
break;
virqs[count] = irq_create_of_mapping(&oirq);
- if (virqs[count] == NO_IRQ) {
+ if (!virqs[count]) {
pr_err("event-sources: Unable to allocate "
"interrupt number for %s\n",
np->full_name);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 43f7beb2902d..76ec104e88be 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
return dlpar_update_device_tree_lmb(lmb);
}
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
- unsigned long section_nr;
- struct mem_section *mem_sect;
- struct memory_block *mem_block;
-
- section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
- mem_sect = __nr_to_section(section_nr);
-
- mem_block = find_memory_block(mem_sect);
- return mem_block;
-}
-
#ifdef CONFIG_MEMORY_HOTREMOVE
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
@@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
static int dlpar_add_lmb(struct of_drconf_cell *);
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+ unsigned long section_nr;
+ struct mem_section *mem_sect;
+ struct memory_block *mem_block;
+
+ section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+ mem_sect = __nr_to_section(section_nr);
+
+ mem_block = find_memory_block(mem_sect);
+ return mem_block;
+}
+
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 543a6386f3eb..326ef0dd6038 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -119,7 +119,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
struct msi_desc *entry;
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
irq_set_msi_desc(entry->irq, NULL);
@@ -471,7 +471,7 @@ again:
virq = irq_create_mapping(NULL, hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
return -ENOSPC;
}
@@ -490,7 +490,7 @@ again:
static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
{
/* No LSI -> leave MSIs (if any) configured */
- if (pdev->irq == NO_IRQ) {
+ if (!pdev->irq) {
dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n");
return;
}
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index fe16a50700de..09eba5a9929a 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
bus = bridge->bus;
+ /* Rely on the pcibios_free_controller_deferred() callback. */
+ pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+ (void *) pci_bus_to_host(bus));
+
dn = pcibios_get_phb_of_node(bus);
if (!dn)
return 0;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 906dbaa97fe2..547fd13e4f8e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb)
release_resource(res);
}
- /* Free pci_controller data structure */
- pcibios_free_controller(phb);
+ /*
+ * The pci_controller data structure is freed by
+ * the pcibios_free_controller_deferred() callback;
+ * see pseries_root_bridge_prepare().
+ */
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index b502ab61aafa..7d28cabf1206 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -156,7 +156,7 @@ static int scanlog_release(struct inode * inode, struct file * file)
return 0;
}
-const struct file_operations scanlog_fops = {
+static const struct file_operations scanlog_fops = {
.owner = THIS_MODULE,
.read = scanlog_read,
.write = scanlog_write,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 4ffcaa6f8670..97aa3f332f24 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -41,7 +41,6 @@
#include <linux/root_dev.h>
#include <linux/of.h>
#include <linux/of_pci.h>
-#include <linux/kexec.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -66,6 +65,7 @@
#include <asm/eeh.h>
#include <asm/reg.h>
#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
#include "pseries.h"
@@ -114,7 +114,7 @@ static void pseries_8259_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -141,7 +141,7 @@ static void __init pseries_setup_i8259_cascade(void)
}
cascade = irq_of_parse_and_map(found, 0);
- if (cascade == NO_IRQ) {
+ if (!cascade) {
printk(KERN_ERR "pic: failed to map cascade interrupt");
return;
}
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9144204442eb..ada29eaed6e2 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -240,7 +240,7 @@ static int axon_ram_probe(struct platform_device *device)
device_add_disk(&device->dev, bank->disk);
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
- if (bank->irq_id == NO_IRQ) {
+ if (!bank->irq_id) {
dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
rc = -EFAULT;
goto failed;
@@ -250,7 +250,7 @@ static int axon_ram_probe(struct platform_device *device)
AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
if (rc != 0) {
dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
- bank->irq_id = NO_IRQ;
+ bank->irq_id = 0;
rc = -EFAULT;
goto failed;
}
@@ -268,7 +268,7 @@ static int axon_ram_probe(struct platform_device *device)
failed:
if (bank != NULL) {
- if (bank->irq_id != NO_IRQ)
+ if (bank->irq_id)
free_irq(bank->irq_id, device);
if (bank->disk != NULL) {
if (bank->disk->major > 0)
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 6c110994d902..3c0eb9b25535 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -132,7 +132,7 @@ unsigned int cpm_pic_init(void)
{
struct device_node *np = NULL;
struct resource res;
- unsigned int sirq = NO_IRQ, hwirq, eirq;
+ unsigned int sirq = 0, hwirq, eirq;
int ret;
pr_debug("cpm_pic_init\n");
@@ -154,7 +154,7 @@ unsigned int cpm_pic_init(void)
goto end;
sirq = irq_of_parse_and_map(np, 0);
- if (sirq == NO_IRQ)
+ if (!sirq)
goto end;
/* Initialize the CPM interrupt controller. */
@@ -168,7 +168,7 @@ unsigned int cpm_pic_init(void)
cpm_pic_host = irq_domain_add_linear(np, 64, &cpm_pic_host_ops, NULL);
if (cpm_pic_host == NULL) {
printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n");
- sirq = NO_IRQ;
+ sirq = 0;
goto end;
}
@@ -182,7 +182,7 @@ unsigned int cpm_pic_init(void)
}
eirq = irq_of_parse_and_map(np, 0);
- if (eirq == NO_IRQ)
+ if (!eirq)
goto end;
if (setup_irq(eirq, &cpm_error_irqaction))
@@ -534,7 +534,8 @@ struct cpm1_gpio16_chip {
static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
{
- struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm1_gpio16_chip *cpm1_gc =
+ container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
struct cpm_ioport16 __iomem *iop = mm_gc->regs;
cpm1_gc->cpdata = in_be16(&iop->dat);
@@ -649,7 +650,8 @@ struct cpm1_gpio32_chip {
static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
{
- struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm1_gpio32_chip *cpm1_gc =
+ container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
struct cpm_ioport32b __iomem *iop = mm_gc->regs;
cpm1_gc->cpdata = in_be32(&iop->dat);
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 911456d17713..947f42007734 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -94,7 +94,8 @@ struct cpm2_gpio32_chip {
static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
{
- struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm2_gpio32_chip *cpm2_gc =
+ container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
struct cpm2_ioports __iomem *iop = mm_gc->regs;
cpm2_gc->cpdata = in_be32(&iop->dat);
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index bffcc7a486a1..48866e6c1efb 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -155,7 +155,7 @@ static struct irq_chip ehv_pic_direct_eoi_irq_chip = {
.irq_set_type = ehv_pic_set_irq_type,
};
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
+/* Return an interrupt vector or 0 if no interrupt is pending. */
unsigned int ehv_pic_get_irq(void)
{
int irq;
@@ -168,7 +168,7 @@ unsigned int ehv_pic_get_irq(void)
ev_int_iack(0, &irq); /* legacy mode */
if (irq == 0xFFFF) /* 0xFFFF --> no irq is pending */
- return NO_IRQ;
+ return 0;
/*
* this will also setup revmap[] in the slow path for the first
diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c
index 06ac3c61b3d0..a6f0b96ce2c9 100644
--- a/arch/powerpc/sysdev/fsl_gtm.c
+++ b/arch/powerpc/sysdev/fsl_gtm.c
@@ -406,7 +406,7 @@ static int __init fsl_gtm_init(void)
unsigned int irq;
irq = irq_of_parse_and_map(np, i);
- if (irq == NO_IRQ) {
+ if (!irq) {
pr_err("%s: not enough interrupts specified\n",
np->full_name);
goto err;
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index b83f32562a37..488ec453038a 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -115,8 +115,8 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data)
errint = __builtin_clz(eisr);
cascade_irq = irq_linear_revmap(mpic->irqhost,
mpic->err_int_vecs[errint]);
- WARN_ON(cascade_irq == NO_IRQ);
- if (cascade_irq != NO_IRQ) {
+ WARN_ON(!cascade_irq);
+ if (cascade_irq) {
generic_handle_irq(cascade_irq);
} else {
eimr |= 1 << (31 - errint);
@@ -134,7 +134,7 @@ void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum)
int ret;
virq = irq_create_mapping(mpic->irqhost, irqnum);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_err("Error interrupt setup failed\n");
return;
}
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 3a2be3676f43..8a244828782e 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -131,7 +131,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
irq_hw_number_t hwirq;
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
msi_data = irq_get_chip_data(entry->irq);
@@ -250,7 +250,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
virq = irq_create_mapping(msi_data->irqhost, hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
dev_err(&pdev->dev, "fail mapping hwirq %i\n", hwirq);
msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
rc = -ENOSPC;
@@ -285,7 +285,7 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
msir_index = cascade_data->index;
if (msir_index >= NR_MSI_REG_MAX)
- cascade_irq = NO_IRQ;
+ cascade_irq = 0;
switch (msi_data->feature & FSL_PIC_IP_MASK) {
case FSL_PIC_IP_MPIC:
@@ -315,7 +315,7 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
cascade_irq = irq_linear_revmap(msi_data->irqhost,
msi_hwirq(msi_data, msir_index,
intr_index + have_shift));
- if (cascade_irq != NO_IRQ) {
+ if (cascade_irq) {
generic_handle_irq(cascade_irq);
ret = IRQ_HANDLED;
}
@@ -337,7 +337,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
if (msi->cascade_array[i]) {
virq = msi->cascade_array[i]->virq;
- BUG_ON(virq == NO_IRQ);
+ BUG_ON(!virq);
free_irq(virq, msi->cascade_array[i]);
kfree(msi->cascade_array[i]);
@@ -362,7 +362,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
int virt_msir, i, ret;
virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index);
- if (virt_msir == NO_IRQ) {
+ if (!virt_msir) {
dev_err(&dev->dev, "%s: Cannot translate IRQ index %d\n",
__func__, irq_index);
return 0;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 68e7c0dd2e45..3cc7cace194a 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -23,7 +23,7 @@
*/
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c
index d57b77573068..02553a8ce191 100644
--- a/arch/powerpc/sysdev/ge/ge_pic.c
+++ b/arch/powerpc/sysdev/ge/ge_pic.c
@@ -102,7 +102,7 @@ static void gef_pic_cascade(struct irq_desc *desc)
*/
cascade_irq = gef_pic_get_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -206,7 +206,7 @@ void __init gef_pic_init(struct device_node *np)
/* Map controller */
gef_pic_cascade_irq = irq_of_parse_and_map(np, 0);
- if (gef_pic_cascade_irq == NO_IRQ) {
+ if (!gef_pic_cascade_irq) {
printk(KERN_ERR "SBC610: failed to map cascade interrupt");
return;
}
@@ -223,12 +223,12 @@ void __init gef_pic_init(struct device_node *np)
/*
* This is called when we receive an interrupt with apparently comes from this
- * chip - check, returning the highest interrupt generated or return NO_IRQ
+ * chip - check, returning the highest interrupt generated or return 0.
*/
unsigned int gef_pic_get_irq(void)
{
u32 cause, mask, active;
- unsigned int virq = NO_IRQ;
+ unsigned int virq = 0;
int hwirq;
cause = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_STATUS);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index aa2c186d3115..bafb014e1a7e 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -68,9 +68,9 @@ unsigned int i8259_irq(void)
if (!pci_intack)
outb(0x0B, 0x20); /* ISR register */
if(~inb(0x20) & 0x80)
- irq = NO_IRQ;
+ irq = 0;
} else if (irq == 0xff)
- irq = NO_IRQ;
+ irq = 0;
if (lock)
raw_spin_unlock(&i8259_lock);
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index f76ee39cb337..f267ee0afc08 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -853,7 +853,7 @@ void ipic_clear_mcp_status(u32 mask)
ipic_write(primary_ipic->regs, IPIC_SERMR, mask);
}
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
+/* Return an interrupt vector or 0 if no interrupt is pending. */
unsigned int ipic_get_irq(void)
{
int irq;
@@ -864,7 +864,7 @@ unsigned int ipic_get_irq(void)
irq = ipic_read(primary_ipic->regs, IPIC_SIVCR) & IPIC_SIVCR_VECTOR_MASK;
if (irq == 0) /* 0 --> no irq is pending */
- return NO_IRQ;
+ return 0;
return irq_linear_revmap(primary_ipic->irqhost, irq);
}
diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 69f5814ae6d4..c31f634f1973 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -89,7 +89,7 @@ static ssize_t mmio_nvram_write(char *buf, size_t count, loff_t *index)
return count;
}
-void mmio_nvram_write_val(int addr, unsigned char val)
+static void mmio_nvram_write_val(int addr, unsigned char val)
{
unsigned long flags;
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index b7cf7abff2eb..3e828b20c21e 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -79,7 +79,7 @@ unsigned int mpc8xx_get_irq(void)
irq = in_be32(&siu_reg->sc_sivec) >> 26;
if (irq == PIC_VEC_SPURRIOUS)
- irq = NO_IRQ;
+ irq = 0;
return irq_linear_revmap(mpc8xx_pic_host, irq);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7de45b2df366..4d48cecfedd1 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1649,7 +1649,7 @@ void __init mpic_init(struct mpic *mpic)
/* Check if this MPIC is chained from a parent interrupt controller */
if (mpic->flags & MPIC_SECONDARY) {
int virq = irq_of_parse_and_map(mpic->node, 0);
- if (virq != NO_IRQ) {
+ if (virq) {
printk(KERN_INFO "%s: hooking up to IRQ %d\n",
mpic->node->full_name, virq);
irq_set_handler_data(virq, mpic);
@@ -1778,13 +1778,13 @@ static unsigned int _mpic_get_one_irq(struct mpic *mpic, int reg)
if (unlikely(src == mpic->spurious_vec)) {
if (mpic->flags & MPIC_SPV_EOI)
mpic_eoi(mpic);
- return NO_IRQ;
+ return 0;
}
if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
mpic->name, (int)src);
mpic_eoi(mpic);
- return NO_IRQ;
+ return 0;
}
return irq_linear_revmap(mpic->irqhost, src);
@@ -1817,17 +1817,17 @@ unsigned int mpic_get_coreint_irq(void)
if (unlikely(src == mpic->spurious_vec)) {
if (mpic->flags & MPIC_SPV_EOI)
mpic_eoi(mpic);
- return NO_IRQ;
+ return 0;
}
if (unlikely(mpic->protected && test_bit(src, mpic->protected))) {
printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n",
mpic->name, (int)src);
- return NO_IRQ;
+ return 0;
}
return irq_linear_revmap(mpic->irqhost, src);
#else
- return NO_IRQ;
+ return 0;
#endif
}
@@ -1852,7 +1852,7 @@ void mpic_request_ipis(void)
for (i = 0; i < 4; i++) {
unsigned int vipi = irq_create_mapping(mpic->irqhost,
mpic->ipi_vecs[0] + i);
- if (vipi == NO_IRQ) {
+ if (!vipi) {
printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]);
continue;
}
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 3f165d972a0e..db2286be5d9a 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -238,7 +238,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
if (receive_mask & (1 << i)) {
msgr->irq = irq_of_parse_and_map(np, irq_index);
- if (msgr->irq == NO_IRQ) {
+ if (!msgr->irq) {
dev_err(&dev->dev,
"Missing interrupt specifier");
kfree(msgr);
@@ -246,7 +246,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
}
irq_index += 1;
} else {
- msgr->irq = NO_IRQ;
+ msgr->irq = 0;
}
mpic_msgrs[reg_number] = msgr;
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index 2cbc7e29b85f..cfc1c57d760f 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -110,7 +110,7 @@ static void u3msi_teardown_msi_irqs(struct pci_dev *pdev)
irq_hw_number_t hwirq;
for_each_pci_msi_entry(entry, pdev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
@@ -155,7 +155,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_hi = addr >> 32;
virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
- if (virq == NO_IRQ) {
+ if (!virq) {
pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq);
msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
return -ENOSPC;
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index 0f842dd16bcd..a79953deb489 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -272,7 +272,7 @@ unsigned int mv64x60_get_irq(void)
u32 cause;
int level1;
irq_hw_number_t hwirq;
- int virq = NO_IRQ;
+ int virq = 0;
cause = in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_SELECT_CAUSE);
if (cause & MV64X60_SELECT_CAUSE_HIGH) {
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 8a0b77a3ec0c..9ea6a221d9d5 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -158,7 +158,7 @@ static int pmi_of_probe(struct platform_device *dev)
data->dev = dev;
data->irq = irq_of_parse_and_map(np, 0);
- if (data->irq == NO_IRQ) {
+ if (!data->irq) {
printk(KERN_ERR "pmi: invalid interrupt.\n");
rc = -EFAULT;
goto error_cleanup_iomap;
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
index 52a93dcae262..9926ad67af76 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
@@ -60,7 +60,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
hwirq = ppc4xx_hsta_msi.irq_map[irq];
- if (hwirq == NO_IRQ) {
+ if (!hwirq) {
pr_err("%s: Failed mapping irq %d\n", __func__, irq);
return -EINVAL;
}
@@ -110,7 +110,7 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev)
int irq;
for_each_pci_msi_entry(entry, dev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
irq = hsta_find_hwirq_offset(entry->irq);
@@ -166,7 +166,7 @@ static int hsta_msi_probe(struct platform_device *pdev)
for (irq = 0; irq < irq_count; irq++) {
ppc4xx_hsta_msi.irq_map[irq] =
irq_of_parse_and_map(dev->of_node, irq);
- if (ppc4xx_hsta_msi.irq_map[irq] == NO_IRQ) {
+ if (!ppc4xx_hsta_msi.irq_map[irq]) {
dev_err(dev, "Unable to map IRQ\n");
ret = -EINVAL;
goto out2;
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c
index 8fb806135043..590dab4f47d6 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_msi.c
@@ -102,7 +102,7 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
__func__);
}
virq = irq_of_parse_and_map(msi_data->msi_dev, int_no);
- if (virq == NO_IRQ) {
+ if (!virq) {
dev_err(&dev->dev, "%s: fail mapping irq\n", __func__);
msi_bitmap_free_hwirqs(&msi_data->bitmap, int_no, 1);
return -ENOSPC;
@@ -129,7 +129,7 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev)
dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n");
for_each_pci_msi_entry(entry, dev) {
- if (entry->irq == NO_IRQ)
+ if (!entry->irq)
continue;
hwirq = virq_to_hw(entry->irq);
irq_set_msi_desc(entry->irq, NULL);
@@ -201,7 +201,7 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev)
for (i = 0; i < msi_irqs; i++) {
virq = msi->msi_virqs[i];
- if (virq != NO_IRQ)
+ if (virq)
irq_dispose_mapping(virq);
}
diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/sysdev/ppc4xx_soc.c
index 5c77c9ba33aa..d41134d2f786 100644
--- a/arch/powerpc/sysdev/ppc4xx_soc.c
+++ b/arch/powerpc/sysdev/ppc4xx_soc.c
@@ -109,7 +109,7 @@ static int __init ppc4xx_l2c_probe(void)
/* Get and map irq number from device tree */
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR "irq_of_parse_and_map failed\n");
of_node_put(np);
return -ENODEV;
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 379de955aae3..57c971b7839c 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -433,7 +433,7 @@ void tsi108_irq_cascade(struct irq_desc *desc)
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = get_pci_source();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 6893d8f236df..a00949f3e378 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -319,7 +319,7 @@ void __init uic_init_tree(void)
}
}
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
+/* Return an interrupt vector or 0 if no interrupt is pending. */
unsigned int uic_get_irq(void)
{
u32 msr;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 0031eda320c3..385e7aa9e273 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,6 +1,7 @@
config PPC_XICS
def_bool n
select PPC_SMP_MUXED_IPI
+ select HARDIRQS_SW_RESEND
config PPC_ICP_NATIVE
def_bool n
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index c1917cf67c3d..e7fa26c4ff73 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -112,10 +112,10 @@ static unsigned int icp_hv_get_irq(void)
unsigned int irq;
if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
+ return 0;
irq = irq_find_mapping(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
+ if (likely(irq)) {
xics_push_cppr(vec);
return irq;
}
@@ -126,7 +126,7 @@ static unsigned int icp_hv_get_irq(void)
/* We might learn about it later, so EOI it */
icp_hv_set_xirr(xirr);
- return NO_IRQ;
+ return 0;
}
static void icp_hv_set_cpu_priority(unsigned char cppr)
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index afdf62f2a695..8a6a043e239b 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -124,10 +124,10 @@ static unsigned int icp_native_get_irq(void)
unsigned int irq;
if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
+ return 0;
irq = irq_find_mapping(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
+ if (likely(irq)) {
xics_push_cppr(vec);
return irq;
}
@@ -138,7 +138,7 @@ static unsigned int icp_native_get_irq(void)
/* We might learn about it later, so EOI it */
icp_native_set_xirr(xirr);
- return NO_IRQ;
+ return 0;
}
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 57d72f10a97f..d38e86fd5720 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -23,10 +23,10 @@
static void icp_opal_teardown_cpu(void)
{
- int cpu = smp_processor_id();
+ int hw_cpu = hard_smp_processor_id();
/* Clear any pending IPI */
- opal_int_set_mfrr(cpu, 0xff);
+ opal_int_set_mfrr(hw_cpu, 0xff);
}
static void icp_opal_flush_ipi(void)
@@ -51,14 +51,14 @@ static unsigned int icp_opal_get_irq(void)
rc = opal_int_get_xirr(&xirr, false);
if (rc < 0)
- return NO_IRQ;
+ return 0;
xirr = be32_to_cpu(xirr);
vec = xirr & 0x00ffffff;
if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
+ return 0;
irq = irq_find_mapping(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
+ if (likely(irq)) {
xics_push_cppr(vec);
return irq;
}
@@ -69,7 +69,7 @@ static unsigned int icp_opal_get_irq(void)
/* We might learn about it later, so EOI it */
opal_int_eoi(xirr);
- return NO_IRQ;
+ return 0;
}
static void icp_opal_set_cpu_priority(unsigned char cppr)
@@ -101,14 +101,16 @@ static void icp_opal_eoi(struct irq_data *d)
static void icp_opal_cause_ipi(int cpu, unsigned long data)
{
- opal_int_set_mfrr(cpu, IPI_PRIORITY);
+ int hw_cpu = get_hard_smp_processor_id(cpu);
+
+ opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
}
static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
{
- int cpu = smp_processor_id();
+ int hw_cpu = hard_smp_processor_id();
- opal_int_set_mfrr(cpu, 0xff);
+ opal_int_set_mfrr(hw_cpu, 0xff);
return smp_ipi_demux();
}
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 27c936c080a6..1c6bf4b66f56 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = {
.irq_mask = ics_opal_mask_irq,
.irq_unmask = ics_opal_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_opal_set_affinity
+ .irq_set_affinity = ics_opal_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_opal_map(struct ics *ics, unsigned int virq);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 3854dd41558d..78ee5c778ef8 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = {
.irq_mask = ics_rtas_mask_irq,
.irq_unmask = ics_rtas_unmask_irq,
.irq_eoi = NULL, /* Patched at init time */
- .irq_set_affinity = ics_rtas_set_affinity
+ .irq_set_affinity = ics_rtas_set_affinity,
+ .irq_set_type = xics_set_irq_type,
+ .irq_retrigger = xics_retrigger,
};
static int ics_rtas_map(struct ics *ics, unsigned int virq)
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a795a5f0301c..69d858e51ac7 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -131,7 +131,7 @@ static void xics_request_ipi(void)
unsigned int ipi;
ipi = irq_create_mapping(xics_host, XICS_IPI);
- BUG_ON(ipi == NO_IRQ);
+ BUG_ON(!ipi);
/*
* IPIs are marked IRQF_PERCPU. The handler was set in map.
@@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
- /* They aren't all level sensitive but we just don't really know */
- irq_set_status_flags(virq, IRQ_LEVEL);
+ /*
+ * Mark interrupts as edge sensitive by default so that resend
+ * actually works. The device-tree parsing will turn the LSIs
+ * back to level.
+ */
+ irq_clear_status_flags(virq, IRQ_LEVEL);
/* Don't call into ICS for IPIs */
if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
- /* Current xics implementation translates everything
- * to level. It is not technically right for MSIs but this
- * is irrelevant at this point. We might get smarter in the future
- */
*out_hwirq = intspec[0];
- *out_flags = IRQ_TYPE_LEVEL_LOW;
+ /*
+ * If intsize is at least 2, we look for the type in the second cell,
+ * we assume the LSB indicates a level interrupt.
+ */
+ if (intsize > 1) {
+ if (intspec[1] & 1)
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+ else
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+ } else
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+ return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+ /*
+ * We only support these. This has really no effect other than setting
+ * the corresponding descriptor bits mind you but those will in turn
+ * affect the resend function when re-enabling an edge interrupt.
+ *
+ * Set set the default to edge as explained in map().
+ */
+ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+ flow_type = IRQ_TYPE_EDGE_RISING;
+
+ if (flow_type != IRQ_TYPE_EDGE_RISING &&
+ flow_type != IRQ_TYPE_LEVEL_LOW)
+ return -EINVAL;
+
+ irqd_set_trigger_type(d, flow_type);
+
+ return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+ /*
+ * We need to push a dummy CPPR when retriggering, since the subsequent
+ * EOI will try to pop it. Passing 0 works, as the function hard codes
+ * the priority value anyway.
+ */
+ xics_push_cppr(0);
+
+ /* Tell the core to do a soft retrigger */
return 0;
}
diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S
index 84ad74213c83..7d8b0e8ed6d9 100644
--- a/arch/powerpc/xmon/spr_access.S
+++ b/arch/powerpc/xmon/spr_access.S
@@ -2,12 +2,12 @@
/* unsigned long xmon_mfspr(sprn, default_value) */
_GLOBAL(xmon_mfspr)
- ld r5, .Lmfspr_table@got(r2)
+ PPC_LL r5, .Lmfspr_table@got(r2)
b xmon_mxspr
/* void xmon_mtspr(sprn, new_value) */
_GLOBAL(xmon_mtspr)
- ld r5, .Lmtspr_table@got(r2)
+ PPC_LL r5, .Lmtspr_table@got(r2)
b xmon_mxspr
/*