summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/Kconfig.debug10
-rw-r--r--arch/powerpc/boot/.gitignore1
-rw-r--r--arch/powerpc/boot/dts/p1020mbg-pc.dtsi151
-rw-r--r--arch/powerpc/boot/dts/p1020mbg-pc_32b.dts89
-rw-r--r--arch/powerpc/boot/dts/p1020mbg-pc_36b.dts89
-rw-r--r--arch/powerpc/boot/dts/p1020utm-pc.dtsi140
-rw-r--r--arch/powerpc/boot/dts/p1020utm-pc_32b.dts89
-rw-r--r--arch/powerpc/boot/dts/p1020utm-pc_36b.dts89
-rw-r--r--arch/powerpc/boot/dts/p2041rdb.dts3
-rw-r--r--arch/powerpc/boot/dts/p3041ds.dts4
-rw-r--r--arch/powerpc/boot/dts/p3060qds.dts2
-rw-r--r--arch/powerpc/boot/dts/p4080ds.dts3
-rw-r--r--arch/powerpc/boot/dts/p5020ds.dts4
-rw-r--r--arch/powerpc/configs/85xx/p1023rds_defconfig2
-rw-r--r--arch/powerpc/configs/chroma_defconfig2
-rw-r--r--arch/powerpc/configs/corenet32_smp_defconfig1
-rw-r--r--arch/powerpc/configs/corenet64_smp_defconfig4
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig3
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig3
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/pseries_defconfig2
-rw-r--r--arch/powerpc/include/asm/atomic.h8
-rw-r--r--arch/powerpc/include/asm/auxvec.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h68
-rw-r--r--arch/powerpc/include/asm/bug.h11
-rw-r--r--arch/powerpc/include/asm/cache.h16
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h309
-rw-r--r--arch/powerpc/include/asm/debug.h56
-rw-r--r--arch/powerpc/include/asm/dma.h1
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h7
-rw-r--r--arch/powerpc/include/asm/exec.h9
-rw-r--r--arch/powerpc/include/asm/fsl_guts.h26
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h2
-rw-r--r--arch/powerpc/include/asm/iommu.h1
-rw-r--r--arch/powerpc/include/asm/irq.h6
-rw-r--r--arch/powerpc/include/asm/kvm.h46
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h98
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_32.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h180
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h52
-rw-r--r--arch/powerpc/include/asm/kvm_host.h90
-rw-r--r--arch/powerpc/include/asm/kvm_para.h41
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h25
-rw-r--r--arch/powerpc/include/asm/machdep.h4
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h6
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h14
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h16
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/include/asm/posix_types.h118
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h4
-rw-r--r--arch/powerpc/include/asm/processor.h30
-rw-r--r--arch/powerpc/include/asm/reg.h5
-rw-r--r--arch/powerpc/include/asm/reg_booke.h5
-rw-r--r--arch/powerpc/include/asm/rtas.h36
-rw-r--r--arch/powerpc/include/asm/runlatch.h45
-rw-r--r--arch/powerpc/include/asm/setup.h24
-rw-r--r--arch/powerpc/include/asm/smp.h1
-rw-r--r--arch/powerpc/include/asm/switch_to.h65
-rw-r--r--arch/powerpc/include/asm/system.h592
-rw-r--r--arch/powerpc/include/asm/udbg.h1
-rw-r--r--arch/powerpc/include/asm/vio.h10
-rw-r--r--arch/powerpc/kernel/align.c2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c16
-rw-r--r--arch/powerpc/kernel/cputable.c1
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S8
-rw-r--r--arch/powerpc/kernel/fadump.c2
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/irq.c10
-rw-r--r--arch/powerpc/kernel/kgdb.c1
-rw-r--r--arch/powerpc/kernel/kprobes.c1
-rw-r--r--arch/powerpc/kernel/kvm.c307
-rw-r--r--arch/powerpc/kernel/kvm_emul.S112
-rw-r--r--arch/powerpc/kernel/lparcfg.c1
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c2
-rw-r--r--arch/powerpc/kernel/process.c4
-rw-r--r--arch/powerpc/kernel/prom.c1
-rw-r--r--arch/powerpc/kernel/prom_init.c3
-rw-r--r--arch/powerpc/kernel/ptrace.c2
-rw-r--r--arch/powerpc/kernel/ptrace32.c2
-rw-r--r--arch/powerpc/kernel/rtas.c35
-rw-r--r--arch/powerpc/kernel/setup-common.c1
-rw-r--r--arch/powerpc/kernel/setup_32.c1
-rw-r--r--arch/powerpc/kernel/setup_64.c3
-rw-r--r--arch/powerpc/kernel/signal.c1
-rw-r--r--arch/powerpc/kernel/signal_32.c1
-rw-r--r--arch/powerpc/kernel/signal_64.c1
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/softemu8xx.c1
-rw-r--r--arch/powerpc/kernel/swsusp.c2
-rw-r--r--arch/powerpc/kernel/swsusp_64.c1
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c1
-rw-r--r--arch/powerpc/kernel/traps.c3
-rw-r--r--arch/powerpc/kernel/udbg.c3
-rw-r--r--arch/powerpc/kernel/vdso.c5
-rw-r--r--arch/powerpc/kernel/vio.c12
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c57
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c21
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c66
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c919
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv.c466
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c209
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c835
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S176
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c10
-rw-r--r--arch/powerpc/kvm/book3s_pr.c179
-rw-r--r--arch/powerpc/kvm/booke.c150
-rw-r--r--arch/powerpc/kvm/booke.h4
-rw-r--r--arch/powerpc/kvm/booke_emulate.c23
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S18
-rw-r--r--arch/powerpc/kvm/e500.c32
-rw-r--r--arch/powerpc/kvm/e500_emulate.c38
-rw-r--r--arch/powerpc/kvm/e500_tlb.c775
-rw-r--r--arch/powerpc/kvm/e500_tlb.h80
-rw-r--r--arch/powerpc/kvm/emulate.c61
-rw-r--r--arch/powerpc/kvm/powerpc.c148
-rw-r--r--arch/powerpc/kvm/trace.h62
-rw-r--r--arch/powerpc/lib/alloc.c2
-rw-r--r--arch/powerpc/lib/copyuser_power7_vmx.c1
-rw-r--r--arch/powerpc/mm/44x_mmu.c1
-rw-r--r--arch/powerpc/mm/fault.c2
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage.c5
-rw-r--r--arch/powerpc/mm/init_32.c1
-rw-r--r--arch/powerpc/mm/init_64.c1
-rw-r--r--arch/powerpc/mm/numa.c2
-rw-r--r--arch/powerpc/mm/pgtable_32.c1
-rw-r--r--arch/powerpc/mm/pgtable_64.c1
-rw-r--r--arch/powerpc/oprofile/common.c1
-rw-r--r--arch/powerpc/oprofile/op_model_7450.c1
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c1
-rw-r--r--arch/powerpc/oprofile/op_model_fsl_emb.c1
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c1
-rw-r--r--arch/powerpc/oprofile/op_model_rs64.c1
-rw-r--r--arch/powerpc/perf/core-book3s.c46
-rw-r--r--arch/powerpc/perf/power4-pmu.c1
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c1
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_pm.c1
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c2
-rw-r--r--arch/powerpc/platforms/82xx/pq2.c1
-rw-r--r--arch/powerpc/platforms/83xx/km83xx.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_itx.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_mds.c1
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c1
-rw-r--r--arch/powerpc/platforms/83xx/sbc834x.c1
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c1
-rw-r--r--arch/powerpc/platforms/85xx/corenet_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c1
-rw-r--r--arch/powerpc/platforms/85xx/ksi8560.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ads.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c3
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_rdb.c3
-rw-r--r--arch/powerpc/platforms/85xx/p1010rdb.c1
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c4
-rw-r--r--arch/powerpc/platforms/85xx/p1023_rds.c1
-rw-r--r--arch/powerpc/platforms/85xx/p2041_rdb.c1
-rw-r--r--arch/powerpc/platforms/85xx/p3041_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/p4080_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/p5020_ds.c1
-rw-r--r--arch/powerpc/platforms/85xx/sbc8548.c1
-rw-r--r--arch/powerpc/platforms/85xx/sbc8560.c1
-rw-r--r--arch/powerpc/platforms/85xx/socrates.c1
-rw-r--r--arch/powerpc/platforms/85xx/stx_gp3.c1
-rw-r--r--arch/powerpc/platforms/85xx/tqm85xx.c1
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c1
-rw-r--r--arch/powerpc/platforms/86xx/gef_ppc9a.c1
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc310.c1
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc610.c1
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c3
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c1
-rw-r--r--arch/powerpc/platforms/86xx/pic.c1
-rw-r--r--arch/powerpc/platforms/86xx/sbc8641d.c1
-rw-r--r--arch/powerpc/platforms/8xx/mpc86xads_setup.c1
-rw-r--r--arch/powerpc/platforms/8xx/mpc885ads_setup.c1
-rw-r--r--arch/powerpc/platforms/8xx/tqm8xx_setup.c1
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c2
-rw-r--r--arch/powerpc/platforms/cell/qpace_setup.c2
-rw-r--r--arch/powerpc/platforms/cell/setup.c2
-rw-r--r--arch/powerpc/platforms/cell/smp.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/c2k.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/prpmc2800.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c1
-rw-r--r--arch/powerpc/platforms/fsl_uli1575.c1
-rw-r--r--arch/powerpc/platforms/maple/setup.c1
-rw-r--r--arch/powerpc/platforms/maple/time.c1
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c2
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c1
-rw-r--r--arch/powerpc/platforms/powermac/cpufreq_32.c2
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c1
-rw-r--r--arch/powerpc/platforms/powermac/setup.c1
-rw-r--r--arch/powerpc/platforms/powermac/time.c1
-rw-r--r--arch/powerpc/platforms/powernv/smp.c1
-rw-r--r--arch/powerpc/platforms/ps3/mm.c1
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c19
-rw-r--r--arch/powerpc/platforms/pseries/eeh_dev.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c4
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c1
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c68
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c29
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c2
-rw-r--r--arch/powerpc/platforms/pseries/ras.c195
-rw-r--r--arch/powerpc/platforms/pseries/smp.c1
-rw-r--r--arch/powerpc/platforms/wsp/chroma.c1
-rw-r--r--arch/powerpc/platforms/wsp/psr2.c1
-rw-r--r--arch/powerpc/platforms/wsp/wsp_pci.c1
-rw-r--r--arch/powerpc/sysdev/cpm_common.c1
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c1
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c1
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c22
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c1
-rw-r--r--arch/powerpc/xmon/xmon.c1
223 files changed, 5854 insertions, 2275 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d219ebecabf0..feab3bad6d0f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -133,7 +133,6 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select HAVE_GENERIC_HARDIRQS
- select HAVE_SPARSE_IRQ
select SPARSE_IRQ
select IRQ_PER_CPU
select IRQ_DOMAIN
@@ -154,6 +153,7 @@ config COMPAT
bool
default y if PPC64
select COMPAT_BINFMT_ELF
+ select ARCH_WANT_OLD_COMPAT_IPC
config SYSVIPC_COMPAT
bool
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 72d55dbc6119..e5f26890a69e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -114,16 +114,6 @@ config DEBUGGER
depends on KGDB || XMON
default y
-config VIRQ_DEBUG
- bool "Expose hardware/virtual IRQ mapping via debugfs"
- depends on DEBUG_FS
- help
- This option will show the mapping relationship between hardware irq
- numbers and virtual irq numbers. The mapping is exposed via debugfs
- in the file powerpc/virq_mapping.
-
- If you don't know what this means you don't need it.
-
config BDI_SWITCH
bool "Include BDI-2000 user context switcher"
depends on DEBUG_KERNEL && PPC32
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 12da77ec0228..1c1aadc8c48f 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -27,7 +27,6 @@ zImage.bin.*
zImage.chrp
zImage.coff
zImage.holly
-zImage.iseries
zImage.*lds
zImage.miboot
zImage.pmac
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc.dtsi b/arch/powerpc/boot/dts/p1020mbg-pc.dtsi
new file mode 100644
index 000000000000..a24699cfea9c
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020mbg-pc.dtsi
@@ -0,0 +1,151 @@
+/*
+ * P1020 MBG-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x4000000>;
+ bank-width = <2>;
+ device-width = <1>;
+
+ partition@0 {
+ /* 128KB for DTB Image */
+ reg = <0x0 0x00020000>;
+ label = "NOR DTB Image";
+ };
+
+ partition@20000 {
+ /* 3.875 MB for Linux Kernel Image */
+ reg = <0x00020000 0x003e0000>;
+ label = "NOR Linux Kernel Image";
+ };
+
+ partition@400000 {
+ /* 58MB for Root file System */
+ reg = <0x00400000 0x03a00000>;
+ label = "NOR Root File System";
+ };
+
+ partition@3e00000 {
+ /* This location must not be altered */
+ /* 1M for Vitesse 7385 Switch firmware */
+ reg = <0x3e00000 0x00100000>;
+ label = "NOR Vitesse-7385 Firmware";
+ read-only;
+ };
+
+ partition@3f00000 {
+ /* This location must not be altered */
+ /* 512KB for u-boot Bootloader Image */
+ /* 512KB for u-boot Environment Variables */
+ reg = <0x03f00000 0x00100000>;
+ label = "NOR U-Boot Image";
+ read-only;
+ };
+ };
+
+ L2switch@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "vitesse-7385";
+ reg = <0x2 0x0 0x20000>;
+ };
+};
+
+&soc {
+ i2c@3000 {
+ rtc@68 {
+ compatible = "dallas,ds1339";
+ reg = <0x68>;
+ };
+ };
+
+ mdio@24000 {
+ phy0: ethernet-phy@0 {
+ interrupts = <3 1 0 0>;
+ reg = <0x0>;
+ };
+ phy1: ethernet-phy@1 {
+ interrupts = <2 1 0 0>;
+ reg = <0x1>;
+ };
+ };
+
+ mdio@25000 {
+ tbi1: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ mdio@26000 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet0: ethernet@b0000 {
+ fixed-link = <1 1 1000 0 0>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ enet1: ethernet@b1000 {
+ phy-handle = <&phy0>;
+ tbi-handle = <&tbi1>;
+ phy-connection-type = "sgmii";
+ };
+
+ enet2: ethernet@b2000 {
+ phy-handle = <&phy1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ usb@22000 {
+ phy_type = "ulpi";
+ };
+
+ /* USB2 is shared with localbus, so it must be disabled
+ by default. We can't put 'status = "disabled";' here
+ since U-Boot doesn't clear the status property when
+ it enables USB2. OTOH, U-Boot does create a new node
+ when there isn't any. So, just comment it out.
+ */
+ usb@23000 {
+ status = "disabled";
+ phy_type = "ulpi";
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc_32b.dts b/arch/powerpc/boot/dts/p1020mbg-pc_32b.dts
new file mode 100644
index 000000000000..ab8f076eae90
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020mbg-pc_32b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 MBG-PC Device Tree Source (32-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p1020si-pre.dtsi"
+/ {
+ model = "fsl,P1020MBG-PC";
+ compatible = "fsl,P1020MBG-PC";
+
+ memory {
+ device_type = "memory";
+ };
+
+ lbc: localbus@ffe05000 {
+ reg = <0x0 0xffe05000 0x0 0x1000>;
+
+ /* NOR and L2 switch */
+ ranges = <0x0 0x0 0x0 0xec000000 0x04000000
+ 0x1 0x0 0x0 0xffa00000 0x00040000
+ 0x2 0x0 0x0 0xffb00000 0x00020000>;
+ };
+
+ soc: soc@ffe00000 {
+ ranges = <0x0 0x0 0xffe00000 0x100000>;
+ };
+
+ pci0: pcie@ffe09000 {
+ reg = <0x0 0xffe09000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0x0 0xa0000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+
+ pci1: pcie@ffe0a000 {
+ reg = <0x0 0xffe0a000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0x0 0x80000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
+
+/include/ "p1020mbg-pc.dtsi"
+/include/ "fsl/p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc_36b.dts b/arch/powerpc/boot/dts/p1020mbg-pc_36b.dts
new file mode 100644
index 000000000000..9e9f401419b1
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020mbg-pc_36b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 MBG-PC Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p1020si-pre.dtsi"
+/ {
+ model = "fsl,P1020MBG-PC";
+ compatible = "fsl,P1020MBG-PC";
+
+ memory {
+ device_type = "memory";
+ };
+
+ lbc: localbus@fffe05000 {
+ reg = <0xf 0xffe05000 0x0 0x1000>;
+
+ /* NOR and L2 switch */
+ ranges = <0x0 0x0 0xf 0xec000000 0x04000000
+ 0x1 0x0 0xf 0xffa00000 0x00040000
+ 0x2 0x0 0xf 0xffb00000 0x00020000>;
+ };
+
+ soc: soc@fffe00000 {
+ ranges = <0x0 0xf 0xffe00000 0x100000>;
+ };
+
+ pci0: pcie@fffe09000 {
+ reg = <0xf 0xffe09000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+
+ pci1: pcie@fffe0a000 {
+ reg = <0xf 0xffe0a000 0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
+
+/include/ "p1020mbg-pc.dtsi"
+/include/ "fsl/p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020utm-pc.dtsi b/arch/powerpc/boot/dts/p1020utm-pc.dtsi
new file mode 100644
index 000000000000..7ea85eabcc5c
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020utm-pc.dtsi
@@ -0,0 +1,140 @@
+/*
+ * P1020 UTM-PC Device Tree Source stub (no addresses or top-level ranges)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&lbc {
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x2000000>;
+ bank-width = <2>;
+ device-width = <1>;
+
+ partition@0 {
+ /* 256KB for DTB Image */
+ reg = <0x0 0x00040000>;
+ label = "NOR DTB Image";
+ };
+
+ partition@40000 {
+ /* 3.75 MB for Linux Kernel Image */
+ reg = <0x00040000 0x003c0000>;
+ label = "NOR Linux Kernel Image";
+ };
+
+ partition@400000 {
+ /* 27MB for Root file System */
+ reg = <0x00400000 0x01b00000>;
+ label = "NOR Root File System";
+ };
+
+ partition@1f00000 {
+ /* This location must not be altered */
+ /* 512KB for u-boot Bootloader Image */
+ /* 512KB for u-boot Environment Variables */
+ reg = <0x01f00000 0x00100000>;
+ label = "NOR U-Boot Image";
+ read-only;
+ };
+ };
+};
+
+&soc {
+ i2c@3000 {
+ rtc@68 {
+ compatible = "dallas,ds1339";
+ reg = <0x68>;
+ };
+ };
+
+ mdio@24000 {
+ phy0: ethernet-phy@0 {
+ interrupts = <3 1 0 0>;
+ reg = <0x0>;
+ };
+ phy1: ethernet-phy@1 {
+ interrupts = <2 1 0 0>;
+ reg = <0x1>;
+ };
+ phy2: ethernet-phy@2 {
+ interrupts = <1 1 0 0>;
+ reg = <0x2>;
+ };
+ };
+
+ mdio@25000 {
+ tbi1: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ mdio@26000 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet0: ethernet@b0000 {
+ phy-handle = <&phy2>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ enet1: ethernet@b1000 {
+ phy-handle = <&phy0>;
+ tbi-handle = <&tbi1>;
+ phy-connection-type = "sgmii";
+ };
+
+ enet2: ethernet@b2000 {
+ phy-handle = <&phy1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ usb@22000 {
+ phy_type = "ulpi";
+ };
+
+ /* USB2 is shared with localbus, so it must be disabled
+ by default. We can't put 'status = "disabled";' here
+ since U-Boot doesn't clear the status property when
+ it enables USB2. OTOH, U-Boot does create a new node
+ when there isn't any. So, just comment it out.
+ */
+ usb@23000 {
+ status = "disabled";
+ phy_type = "ulpi";
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1020utm-pc_32b.dts b/arch/powerpc/boot/dts/p1020utm-pc_32b.dts
new file mode 100644
index 000000000000..4bfdd8971cdb
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020utm-pc_32b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 UTM-PC Device Tree Source (32-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p1020si-pre.dtsi"
+/ {
+ model = "fsl,P1020UTM-PC";
+ compatible = "fsl,P1020UTM-PC";
+
+ memory {
+ device_type = "memory";
+ };
+
+ lbc: localbus@ffe05000 {
+ reg = <0x0 0xffe05000 0x0 0x1000>;
+
+ /* NOR */
+ ranges = <0x0 0x0 0x0 0xec000000 0x02000000
+ 0x1 0x0 0x0 0xffa00000 0x00040000
+ 0x2 0x0 0x0 0xffb00000 0x00020000>;
+ };
+
+ soc: soc@ffe00000 {
+ ranges = <0x0 0x0 0xffe00000 0x100000>;
+ };
+
+ pci0: pcie@ffe09000 {
+ reg = <0x0 0xffe09000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0x0 0xa0000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0x0 0xffc10000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+
+ pci1: pcie@ffe0a000 {
+ reg = <0x0 0xffe0a000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0x0 0x80000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0x0 0xffc00000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
+
+/include/ "p1020utm-pc.dtsi"
+/include/ "fsl/p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020utm-pc_36b.dts b/arch/powerpc/boot/dts/p1020utm-pc_36b.dts
new file mode 100644
index 000000000000..abec53557501
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020utm-pc_36b.dts
@@ -0,0 +1,89 @@
+/*
+ * P1020 UTM-PC Device Tree Source (36-bit address map)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p1020si-pre.dtsi"
+/ {
+ model = "fsl,P1020UTM-PC";
+ compatible = "fsl,P1020UTM-PC";
+
+ memory {
+ device_type = "memory";
+ };
+
+ lbc: localbus@fffe05000 {
+ reg = <0xf 0xffe05000 0x0 0x1000>;
+
+ /* NOR */
+ ranges = <0x0 0x0 0xf 0xec000000 0x02000000
+ 0x1 0x0 0xf 0xffa00000 0x00040000
+ 0x2 0x0 0xf 0xffb00000 0x00020000>;
+ };
+
+ soc: soc@fffe00000 {
+ ranges = <0x0 0xf 0xffe00000 0x100000>;
+ };
+
+ pci0: pcie@fffe09000 {
+ reg = <0xf 0xffe09000 0x0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+
+ pci1: pcie@fffe0a000 {
+ reg = <0xf 0xffe0a000 0 0x1000>;
+ ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xe0000000
+ 0x2000000 0x0 0xe0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
+
+/include/ "p1020utm-pc.dtsi"
+/include/ "fsl/p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts
index 4f957db01230..285213976a7f 100644
--- a/arch/powerpc/boot/dts/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/p2041rdb.dts
@@ -135,7 +135,6 @@
reg = <0xf 0xfe200000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- fsl,msi = <&msi0>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -151,7 +150,6 @@
reg = <0xf 0xfe201000 0 0x1000>;
ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- fsl,msi = <&msi1>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -167,7 +165,6 @@
reg = <0xf 0xfe202000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts
index f469145abaeb..22a215e94162 100644
--- a/arch/powerpc/boot/dts/p3041ds.dts
+++ b/arch/powerpc/boot/dts/p3041ds.dts
@@ -173,7 +173,6 @@
reg = <0xf 0xfe200000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- fsl,msi = <&msi0>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -189,7 +188,6 @@
reg = <0xf 0xfe201000 0 0x1000>;
ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- fsl,msi = <&msi1>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -205,7 +203,6 @@
reg = <0xf 0xfe202000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -221,7 +218,6 @@
reg = <0xf 0xfe203000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
diff --git a/arch/powerpc/boot/dts/p3060qds.dts b/arch/powerpc/boot/dts/p3060qds.dts
index 529042e4b9a2..9ae875c8a211 100644
--- a/arch/powerpc/boot/dts/p3060qds.dts
+++ b/arch/powerpc/boot/dts/p3060qds.dts
@@ -212,7 +212,6 @@
reg = <0xf 0xfe200000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- fsl,msi = <&msi0>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -228,7 +227,6 @@
reg = <0xf 0xfe201000 0 0x1000>;
ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- fsl,msi = <&msi1>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 6d60e54e50a0..3e204609d02e 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -141,7 +141,6 @@
reg = <0xf 0xfe200000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- fsl,msi = <&msi0>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -157,7 +156,6 @@
reg = <0xf 0xfe201000 0 0x1000>;
ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- fsl,msi = <&msi1>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -173,7 +171,6 @@
reg = <0xf 0xfe202000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts
index 1c250684c902..27c07ed6adc1 100644
--- a/arch/powerpc/boot/dts/p5020ds.dts
+++ b/arch/powerpc/boot/dts/p5020ds.dts
@@ -173,7 +173,6 @@
reg = <0xf 0xfe200000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- fsl,msi = <&msi0>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -189,7 +188,6 @@
reg = <0xf 0xfe201000 0 0x1000>;
ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- fsl,msi = <&msi1>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -205,7 +203,6 @@
reg = <0xf 0xfe202000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
@@ -221,7 +218,6 @@
reg = <0xf 0xfe203000 0 0x1000>;
ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
- fsl,msi = <&msi2>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig
index c091aaf7685f..f4337bacd0e7 100644
--- a/arch/powerpc/configs/85xx/p1023rds_defconfig
+++ b/arch/powerpc/configs/85xx/p1023rds_defconfig
@@ -165,7 +165,7 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index acf7fb280464..f104ccde6b53 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -279,7 +279,7 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_PPC_EMULATED_STATS=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_CRYPTO_NULL=m
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index f8aef205d222..91db656294e8 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -116,6 +116,7 @@ CONFIG_SERIAL_8250_RSA=y
CONFIG_HW_RANDOM=y
CONFIG_NVRAM=y
CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_SPI=y
CONFIG_SPI_GPIO=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 7ed8d4cf2719..6798343580f0 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -71,6 +71,8 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MPC=y
# CONFIG_HWMON is not set
CONFIG_VIDEO_OUTPUT_CONTROL=y
# CONFIG_HID_SUPPORT is not set
@@ -95,7 +97,7 @@ CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index 5fb0c8a94811..d6b6df5e8743 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -117,6 +117,7 @@ CONFIG_SERIAL_8250_RSA=y
CONFIG_SERIAL_QE=m
CONFIG_NVRAM=y
CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
CONFIG_I2C_CPM=m
CONFIG_I2C_MPC=y
CONFIG_SPI=y
@@ -214,7 +215,7 @@ CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index fb51bc90edd2..5b0e2926becd 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -119,6 +119,7 @@ CONFIG_SERIAL_8250_RSA=y
CONFIG_SERIAL_QE=m
CONFIG_NVRAM=y
CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
CONFIG_I2C_CPM=m
CONFIG_I2C_MPC=y
CONFIG_SPI=y
@@ -216,7 +217,7 @@ CONFIG_DEBUG_FS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_INFO=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 1acf65026773..c1442a3758ae 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -457,7 +457,7 @@ CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_BOOTX_TEXT=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 30e7d0d20e49..6608232663cb 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -340,7 +340,7 @@ CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-CONFIG_VIRQ_DEBUG=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 14174e838ad9..da29032ae38f 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,13 +5,9 @@
* PowerPC atomic operations
*/
-#include <linux/types.h>
-
#ifdef __KERNEL__
-#include <linux/compiler.h>
-#include <asm/synch.h>
-#include <asm/asm-compat.h>
-#include <asm/system.h>
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
diff --git a/arch/powerpc/include/asm/auxvec.h b/arch/powerpc/include/asm/auxvec.h
index 19a099b62cd6..ce17d2c9eb4e 100644
--- a/arch/powerpc/include/asm/auxvec.h
+++ b/arch/powerpc/include/asm/auxvec.h
@@ -16,4 +16,6 @@
*/
#define AT_SYSINFO_EHDR 33
+#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */
+
#endif
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 000000000000..ae782254e731
--- /dev/null
+++ b/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_BARRIER_H
+#define _ASM_POWERPC_BARRIER_H
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory). The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ * read_barrier_depends() prevents data-dependent loads being reordered
+ * across this point (nop on PPC).
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ *
+ * For the smp_ barriers, ordering is for cacheable memory operations
+ * only. We have to use the sync instruction for smp_mb(), since lwsync
+ * doesn't order loads with respect to previous stores. Lwsync can be
+ * used for smp_rmb() and smp_wmb().
+ *
+ * However, on CPUs that don't support lwsync, lwsync actually maps to a
+ * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
+ */
+#define mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define read_barrier_depends() do { } while(0)
+
+#define set_mb(var, value) do { var = value; mb(); } while (0)
+
+#ifdef CONFIG_SMP
+
+#ifdef __SUBARCH_HAS_LWSYNC
+# define SMPWMB LWSYNC
+#else
+# define SMPWMB eieio
+#endif
+
+#define smp_mb() mb()
+#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+#define smp_read_barrier_depends() read_barrier_depends()
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while(0)
+#endif /* CONFIG_SMP */
+
+/*
+ * This is a barrier which prevents following instructions from being
+ * started until the value of the argument x is known. For example, if
+ * x is a variable loaded from memory, this prevents following
+ * instructions from being executed until the load has been performed.
+ */
+#define data_barrier(x) \
+ asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
+
+#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 065c590c991d..3eb53d741070 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -126,5 +126,16 @@
#include <asm-generic/bug.h>
+#ifndef __ASSEMBLY__
+
+struct pt_regs;
+extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
+extern void bad_page_fault(struct pt_regs *, unsigned long, int);
+extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void die(const char *, struct pt_regs *, long);
+extern void print_backtrace(unsigned long *);
+
+#endif /* !__ASSEMBLY__ */
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_BUG_H */
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 4b509411ad8a..9e495c9a6a88 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -42,8 +42,24 @@ extern struct ppc64_caches ppc64_caches;
#endif /* __powerpc64__ && ! __ASSEMBLY__ */
#if !defined(__ASSEMBLY__)
+
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#ifdef CONFIG_6xx
+extern long _get_L2CR(void);
+extern long _get_L3CR(void);
+extern void _set_L2CR(unsigned long);
+extern void _set_L3CR(unsigned long);
+#else
+#define _get_L2CR() 0L
+#define _get_L3CR() 0L
+#define _set_L2CR(val) do { } while(0)
+#define _set_L3CR(val) do { } while(0)
#endif
+extern void cacheable_memzero(void *p, unsigned int nb);
+extern void *cacheable_memcpy(void *, const void *, unsigned int);
+
+#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..e245aab7f191
--- /dev/null
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -0,0 +1,309 @@
+#ifndef _ASM_POWERPC_CMPXCHG_H_
+#define _ASM_POWERPC_CMPXCHG_H_
+
+#ifdef __KERNEL__
+#include <linux/compiler.h>
+#include <asm/synch.h>
+#include <asm/asm-compat.h>
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __always_inline unsigned long
+__xchg_u32(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+ PPC_RELEASE_BARRIER
+"1: lwarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stwcx. %3,0,%2 \n\
+ bne- 1b"
+ PPC_ACQUIRE_BARRIER
+ : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __always_inline unsigned long
+__xchg_u32_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stwcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__xchg_u64(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+ PPC_RELEASE_BARRIER
+"1: ldarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stdcx. %3,0,%2 \n\
+ bne- 1b"
+ PPC_ACQUIRE_BARRIER
+ : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u64_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2 \n"
+ PPC405_ERR77(0,%2)
+" stdcx. %3,0,%2 \n\
+ bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __xchg_u32(ptr, x);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __xchg_u64(ptr, x);
+#endif
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+}
+
+static __always_inline unsigned long
+__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __xchg_u32_local(ptr, x);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __xchg_u64_local(ptr, x);
+#endif
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+}
+#define xchg(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
+ })
+
+#define xchg_local(ptr,x) \
+ ({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_local((ptr), \
+ (unsigned long)_x_, sizeof(*(ptr))); \
+ })
+
+/*
+ * Compare and exchange - if *p == old, set it to new,
+ * and return the old value of *p.
+ */
+#define __HAVE_ARCH_CMPXCHG 1
+
+static __always_inline unsigned long
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_RELEASE_BARRIER
+"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %4,0,%2\n\
+ bne- 1b"
+ PPC_ACQUIRE_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
+ cmpw 0,%0,%3\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%2)
+" stwcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __always_inline unsigned long
+__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+ PPC_RELEASE_BARRIER
+"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
+ cmpd 0,%0,%3\n\
+ bne- 2f\n\
+ stdcx. %4,0,%2\n\
+ bne- 1b"
+ PPC_ACQUIRE_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
+ cmpd 0,%0,%3\n\
+ bne- 2f\n\
+ stdcx. %4,0,%2\n\
+ bne- 1b"
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
+
+/* This function doesn't exist, so you'll get a linker error
+ if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64(ptr, old, new);
+#endif
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_local(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_local(ptr, old, new);
+#endif
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, sizeof(*(ptr))); \
+ })
+
+
+#define cmpxchg_local(ptr, o, n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, sizeof(*(ptr))); \
+ })
+
+#ifdef CONFIG_PPC64
+#define cmpxchg64(ptr, o, n) \
+ ({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg((ptr), (o), (n)); \
+ })
+#define cmpxchg64_local(ptr, o, n) \
+ ({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_local((ptr), (o), (n)); \
+ })
+#else
+#include <asm-generic/cmpxchg-local.h>
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_CMPXCHG_H_ */
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
new file mode 100644
index 000000000000..716d2f089eb6
--- /dev/null
+++ b/arch/powerpc/include/asm/debug.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_DEBUG_H
+#define _ASM_POWERPC_DEBUG_H
+
+struct pt_regs;
+
+extern struct dentry *powerpc_debugfs_root;
+
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+
+extern int (*__debugger)(struct pt_regs *regs);
+extern int (*__debugger_ipi)(struct pt_regs *regs);
+extern int (*__debugger_bpt)(struct pt_regs *regs);
+extern int (*__debugger_sstep)(struct pt_regs *regs);
+extern int (*__debugger_iabr_match)(struct pt_regs *regs);
+extern int (*__debugger_dabr_match)(struct pt_regs *regs);
+extern int (*__debugger_fault_handler)(struct pt_regs *regs);
+
+#define DEBUGGER_BOILERPLATE(__NAME) \
+static inline int __NAME(struct pt_regs *regs) \
+{ \
+ if (unlikely(__ ## __NAME)) \
+ return __ ## __NAME(regs); \
+ return 0; \
+}
+
+DEBUGGER_BOILERPLATE(debugger)
+DEBUGGER_BOILERPLATE(debugger_ipi)
+DEBUGGER_BOILERPLATE(debugger_bpt)
+DEBUGGER_BOILERPLATE(debugger_sstep)
+DEBUGGER_BOILERPLATE(debugger_iabr_match)
+DEBUGGER_BOILERPLATE(debugger_dabr_match)
+DEBUGGER_BOILERPLATE(debugger_fault_handler)
+
+#else
+static inline int debugger(struct pt_regs *regs) { return 0; }
+static inline int debugger_ipi(struct pt_regs *regs) { return 0; }
+static inline int debugger_bpt(struct pt_regs *regs) { return 0; }
+static inline int debugger_sstep(struct pt_regs *regs) { return 0; }
+static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
+#endif
+
+extern int set_dabr(unsigned long dabr);
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+extern void do_send_trap(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code, int signal_code, int brkpt);
+#else
+extern void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code);
+#endif
+
+#endif /* _ASM_POWERPC_DEBUG_H */
diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h
index adadb9943610..f6813e919bb2 100644
--- a/arch/powerpc/include/asm/dma.h
+++ b/arch/powerpc/include/asm/dma.h
@@ -24,7 +24,6 @@
#include <asm/io.h>
#include <linux/spinlock.h>
-#include <asm/system.h>
#ifndef MAX_DMA_CHANNELS
#define MAX_DMA_CHANNELS 8
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index f3b0c2cc9fea..976835d8f22e 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -134,10 +134,15 @@
* whether they will be clobbered.
*
* Note that r11 can be used as an output parameter.
+ *
+ * The "memory" clobber is only necessary for hcalls where the Hypervisor
+ * will read or write guest memory. However, we add it to all hcalls because
+ * the impact is minimal, and we want to ensure that it's present for the
+ * hcalls that need it.
*/
/* List of common clobbered registers. Do not use this macro. */
-#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc"
+#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory"
#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS
#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10"
diff --git a/arch/powerpc/include/asm/exec.h b/arch/powerpc/include/asm/exec.h
new file mode 100644
index 000000000000..8196e9c7d7e8
--- /dev/null
+++ b/arch/powerpc/include/asm/exec.h
@@ -0,0 +1,9 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_EXEC_H
+#define _ASM_POWERPC_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* _ASM_POWERPC_EXEC_H */
diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
index ce04530d2000..aa4c488589ce 100644
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ b/arch/powerpc/include/asm/fsl_guts.h
@@ -16,15 +16,6 @@
#define __ASM_POWERPC_FSL_GUTS_H__
#ifdef __KERNEL__
-/*
- * These #ifdefs are safe because it's not possible to build a kernel that
- * runs on e500 and e600 cores.
- */
-
-#if !defined(CONFIG_PPC_85xx) && !defined(CONFIG_PPC_86xx)
-#error Only 85xx and 86xx SOCs are supported
-#endif
-
/**
* Global Utility Registers.
*
@@ -36,11 +27,7 @@
* different names. In these cases, one name is chosen to avoid extraneous
* #ifdefs.
*/
-#ifdef CONFIG_PPC_85xx
-struct ccsr_guts_85xx {
-#else
-struct ccsr_guts_86xx {
-#endif
+struct ccsr_guts {
__be32 porpllsr; /* 0x.0000 - POR PLL Ratio Status Register */
__be32 porbmsr; /* 0x.0004 - POR Boot Mode Status Register */
__be32 porimpscr; /* 0x.0008 - POR I/O Impedance Status and Control Register */
@@ -77,11 +64,8 @@ struct ccsr_guts_86xx {
u8 res0a8[0xb0 - 0xa8];
__be32 rstcr; /* 0x.00b0 - Reset Control Register */
u8 res0b4[0xc0 - 0xb4];
-#ifdef CONFIG_PPC_85xx
- __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register */
-#else
- __be32 elbcvselcr; /* 0x.00c0 - eLBC Voltage Select Ctrl Reg */
-#endif
+ __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register
+ Called 'elbcvselcr' on 86xx SOCs */
u8 res0c4[0x224 - 0xc4];
__be32 iodelay1; /* 0x.0224 - IO delay control register 1 */
__be32 iodelay2; /* 0x.0228 - IO delay control register 2 */
@@ -136,7 +120,7 @@ struct ccsr_guts_86xx {
* ch: The channel on the DMA controller (0, 1, 2, or 3)
* device: The device to set as the source (CCSR_GUTS_DMACR_DEV_xx)
*/
-static inline void guts_set_dmacr(struct ccsr_guts_86xx __iomem *guts,
+static inline void guts_set_dmacr(struct ccsr_guts __iomem *guts,
unsigned int co, unsigned int ch, unsigned int device)
{
unsigned int shift = 16 + (8 * (1 - co) + 2 * (3 - ch));
@@ -172,7 +156,7 @@ static inline void guts_set_dmacr(struct ccsr_guts_86xx __iomem *guts,
* ch: The channel on the DMA controller (0, 1, 2, or 3)
* value: the new value for the bit (0 or 1)
*/
-static inline void guts_set_pmuxcr_dma(struct ccsr_guts_86xx __iomem *guts,
+static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts,
unsigned int co, unsigned int ch, unsigned int value)
{
if ((ch == 0) || (ch == 3)) {
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 80fd4d2b4a62..be04330af751 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -35,7 +35,7 @@ struct arch_hw_breakpoint {
#include <linux/kdebug.h>
#include <asm/reg.h>
-#include <asm/system.h>
+#include <asm/debug.h>
struct perf_event;
struct pmu;
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index edfc9803ec91..957a83f43646 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -112,7 +112,6 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
struct dma_attrs *attrs);
extern void iommu_init_early_pSeries(void);
-extern void iommu_init_early_iSeries(void);
extern void iommu_init_early_dart(void);
extern void iommu_init_early_pasemi(void);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index fe0b09dceb7d..cf417e510736 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -27,12 +27,6 @@ extern atomic_t ppc_n_lost_interrupts;
/* This number is used when no interrupt has been assigned */
#define NO_IRQ (0)
-/* This is a special irq number to return from get_irq() to tell that
- * no interrupt happened _and_ ignore it (don't count it as bad). Some
- * platforms like iSeries rely on that.
- */
-#define NO_IRQ_IGNORE ((unsigned int)-1)
-
/* Total number of virq in the platform */
#define NR_IRQS CONFIG_NR_IRQS
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f7727d91ac6b..b921c3f48928 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -265,12 +265,9 @@ struct kvm_debug_exit_arch {
struct kvm_guest_debug_arch {
};
-#define KVM_REG_MASK 0x001f
-#define KVM_REG_EXT_MASK 0xffe0
-#define KVM_REG_GPR 0x0000
-#define KVM_REG_FPR 0x0020
-#define KVM_REG_QPR 0x0040
-#define KVM_REG_FQPR 0x0060
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
#define KVM_INTERRUPT_SET -1U
#define KVM_INTERRUPT_UNSET -2U
@@ -292,4 +289,41 @@ struct kvm_allocate_rma {
__u64 rma_size;
};
+struct kvm_book3e_206_tlb_entry {
+ __u32 mas8;
+ __u32 mas1;
+ __u64 mas2;
+ __u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+ /*
+ * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ *
+ * - The number of ways of TLB0 must be a power of two between 2 and
+ * 16.
+ * - TLB1 must be fully associative.
+ * - The size of TLB0 must be a multiple of the number of ways, and
+ * the number of sets must be a power of two.
+ * - The size of TLB1 may not exceed 64 entries.
+ * - TLB0 supports 4 KiB pages.
+ * - The page sizes supported by TLB1 are as indicated by
+ * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+ * as returned by KVM_GET_SREGS.
+ * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+ * and tlb_ways[] must be zero.
+ *
+ * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+ *
+ * KVM will adjust TLBnCFG based on the sizes configured here,
+ * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+ * set to zero.
+ */
+ __u32 tlb_sizes[4];
+ __u32 tlb_ways[4];
+ __u32 reserved[8];
+};
+
+#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 69c7377d2071..aa795ccef294 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s {
#endif
int context_id[SID_CONTEXTS];
+ bool hior_explicit; /* HIOR is set by ioctl, not PVR */
+
struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
@@ -119,6 +121,11 @@ extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
+extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long status);
+extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
+ unsigned long slb_v, unsigned long valid);
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
@@ -138,6 +145,21 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+ unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index);
+void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index);
+extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
+ unsigned long *nb_ret);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
+extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
@@ -183,7 +205,9 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
if ( num < 14 ) {
- to_svcpu(vcpu)->gpr[num] = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->gpr[num] = val;
+ svcpu_put(svcpu);
to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
} else
vcpu->arch.gpr[num] = val;
@@ -191,80 +215,120 @@ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
- if ( num < 14 )
- return to_svcpu(vcpu)->gpr[num];
- else
+ if ( num < 14 ) {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong r = svcpu->gpr[num];
+ svcpu_put(svcpu);
+ return r;
+ } else
return vcpu->arch.gpr[num];
}
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
- to_svcpu(vcpu)->cr = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->cr = val;
+ svcpu_put(svcpu);
to_book3s(vcpu)->shadow_vcpu->cr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->cr;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 r;
+ r = svcpu->cr;
+ svcpu_put(svcpu);
+ return r;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
{
- to_svcpu(vcpu)->xer = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->xer = val;
to_book3s(vcpu)->shadow_vcpu->xer = val;
+ svcpu_put(svcpu);
}
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->xer;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 r;
+ r = svcpu->xer;
+ svcpu_put(svcpu);
+ return r;
}
static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
{
- to_svcpu(vcpu)->ctr = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->ctr = val;
+ svcpu_put(svcpu);
}
static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->ctr;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong r;
+ r = svcpu->ctr;
+ svcpu_put(svcpu);
+ return r;
}
static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
{
- to_svcpu(vcpu)->lr = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->lr = val;
+ svcpu_put(svcpu);
}
static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->lr;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong r;
+ r = svcpu->lr;
+ svcpu_put(svcpu);
+ return r;
}
static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
{
- to_svcpu(vcpu)->pc = val;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->pc = val;
+ svcpu_put(svcpu);
}
static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->pc;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong r;
+ r = svcpu->pc;
+ svcpu_put(svcpu);
+ return r;
}
static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
{
ulong pc = kvmppc_get_pc(vcpu);
- struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 r;
/* Load the instruction manually if it failed to do so in the
* exit path */
if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
- return svcpu->last_inst;
+ r = svcpu->last_inst;
+ svcpu_put(svcpu);
+ return r;
}
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{
- return to_svcpu(vcpu)->fault_dar;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong r;
+ r = svcpu->fault_dar;
+ svcpu_put(svcpu);
+ return r;
}
static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
index de604db135f5..38040ff82063 100644
--- a/arch/powerpc/include/asm/kvm_book3s_32.h
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -20,11 +20,15 @@
#ifndef __ASM_KVM_BOOK3S_32_H__
#define __ASM_KVM_BOOK3S_32_H__
-static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
+static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
return to_book3s(vcpu)->shadow_vcpu;
}
+static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+}
+
#define PTE_SIZE 12
#define VSID_ALL 0
#define SR_INVALID 0x00000001 /* VSID 1 should always be unused */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index d0ac94f98f9e..b0c08b142770 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -21,14 +21,56 @@
#define __ASM_KVM_BOOK3S_64_H__
#ifdef CONFIG_KVM_BOOK3S_PR
-static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
+static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
+ preempt_disable();
return &get_paca()->shadow_vcpu;
}
+
+static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+ preempt_enable();
+}
#endif
#define SPAPR_TCE_SHIFT 12
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+/* For now use fixed-size 16MB page table */
+#define HPT_ORDER 24
+#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
+#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
+#define HPT_HASH_MASK (HPT_NPTEG - 1)
+#endif
+
+#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
+
+/*
+ * We use a lock bit in HPTE dword 0 to synchronize updates and
+ * accesses to each HPTE, and another bit to indicate non-present
+ * HPTEs.
+ */
+#define HPTE_V_HVLOCK 0x40UL
+#define HPTE_V_ABSENT 0x20UL
+
+static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
+{
+ unsigned long tmp, old;
+
+ asm volatile(" ldarx %0,0,%2\n"
+ " and. %1,%0,%3\n"
+ " bne 2f\n"
+ " ori %0,%0,%4\n"
+ " stdcx. %0,0,%2\n"
+ " beq+ 2f\n"
+ " li %1,%3\n"
+ "2: isync"
+ : "=&r" (tmp), "=&r" (old)
+ : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
+ : "cc", "memory");
+ return old == 0;
+}
+
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
@@ -62,4 +104,140 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+ /* only handle 4k, 64k and 16M pages for now */
+ if (!(h & HPTE_V_LARGE))
+ return 1ul << 12; /* 4k page */
+ if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206))
+ return 1ul << 16; /* 64k page */
+ if ((l & 0xff000) == 0)
+ return 1ul << 24; /* 16M page */
+ return 0; /* error */
+}
+
+static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
+{
+ return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+}
+
+static inline int hpte_is_writable(unsigned long ptel)
+{
+ unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
+
+ return pp != PP_RXRX && pp != PP_RXXX;
+}
+
+static inline unsigned long hpte_make_readonly(unsigned long ptel)
+{
+ if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
+ ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
+ else
+ ptel |= PP_RXRX;
+ return ptel;
+}
+
+static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+{
+ unsigned int wimg = ptel & HPTE_R_WIMG;
+
+ /* Handle SAO */
+ if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
+ cpu_has_feature(CPU_FTR_ARCH_206))
+ wimg = HPTE_R_M;
+
+ if (!io_type)
+ return wimg == HPTE_R_M;
+
+ return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+}
+
+/*
+ * Lock and read a linux PTE. If it's present and writable, atomically
+ * set dirty and referenced bits and return the PTE, otherwise return 0.
+ */
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
+{
+ pte_t pte, tmp;
+
+ /* wait until _PAGE_BUSY is clear then set it atomically */
+ __asm__ __volatile__ (
+ "1: ldarx %0,0,%3\n"
+ " andi. %1,%0,%4\n"
+ " bne- 1b\n"
+ " ori %1,%0,%4\n"
+ " stdcx. %1,0,%3\n"
+ " bne- 1b"
+ : "=&r" (pte), "=&r" (tmp), "=m" (*p)
+ : "r" (p), "i" (_PAGE_BUSY)
+ : "cc");
+
+ if (pte_present(pte)) {
+ pte = pte_mkyoung(pte);
+ if (writing && pte_write(pte))
+ pte = pte_mkdirty(pte);
+ }
+
+ *p = pte; /* clears _PAGE_BUSY */
+
+ return pte;
+}
+
+/* Return HPTE cache control bits corresponding to Linux pte bits */
+static inline unsigned long hpte_cache_bits(unsigned long pte_val)
+{
+#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
+ return pte_val & (HPTE_R_W | HPTE_R_I);
+#else
+ return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
+ ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
+#endif
+}
+
+static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
+{
+ if (key)
+ return PP_RWRX <= pp && pp <= PP_RXRX;
+ return 1;
+}
+
+static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
+{
+ if (key)
+ return pp == PP_RWRW;
+ return pp <= PP_RWRW;
+}
+
+static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
+{
+ unsigned long skey;
+
+ skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
+ ((hpte_r & HPTE_R_KEY_LO) >> 9);
+ return (amr >> (62 - 2 * skey)) & 3;
+}
+
+static inline void lock_rmap(unsigned long *rmap)
+{
+ do {
+ while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
+ cpu_relax();
+ } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
+}
+
+static inline void unlock_rmap(unsigned long *rmap)
+{
+ __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
+}
+
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+ unsigned long pagesize)
+{
+ unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
+
+ if (pagesize <= PAGE_SIZE)
+ return 1;
+ return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index adbfca9dd100..8cd50a514271 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -22,46 +22,55 @@
#define E500_PID_NUM 3
#define E500_TLB_NUM 2
-struct tlbe{
- u32 mas1;
- u32 mas2;
- u32 mas3;
- u32 mas7;
-};
-
#define E500_TLB_VALID 1
#define E500_TLB_DIRTY 2
-struct tlbe_priv {
+struct tlbe_ref {
pfn_t pfn;
unsigned int flags; /* E500_TLB_* */
};
+struct tlbe_priv {
+ struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */
+};
+
struct vcpu_id_table;
+struct kvmppc_e500_tlb_params {
+ int entries, ways, sets;
+};
+
struct kvmppc_vcpu_e500 {
- /* Unmodified copy of the guest's TLB. */
- struct tlbe *gtlb_arch[E500_TLB_NUM];
+ /* Unmodified copy of the guest's TLB -- shared with host userspace. */
+ struct kvm_book3e_206_tlb_entry *gtlb_arch;
+
+ /* Starting entry number in gtlb_arch[] */
+ int gtlb_offset[E500_TLB_NUM];
/* KVM internal information associated with each guest TLB entry */
struct tlbe_priv *gtlb_priv[E500_TLB_NUM];
- unsigned int gtlb_size[E500_TLB_NUM];
+ struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM];
+
unsigned int gtlb_nv[E500_TLB_NUM];
+ /*
+ * information associated with each host TLB entry --
+ * TLB1 only for now. If/when guest TLB1 entries can be
+ * mapped with host TLB0, this will be used for that too.
+ *
+ * We don't want to use this for guest TLB0 because then we'd
+ * have the overhead of doing the translation again even if
+ * the entry is still in the guest TLB (e.g. we swapped out
+ * and back, and our host TLB entries got evicted).
+ */
+ struct tlbe_ref *tlb_refs[E500_TLB_NUM];
+ unsigned int host_tlb1_nv;
+
u32 host_pid[E500_PID_NUM];
u32 pid[E500_PID_NUM];
u32 svr;
- u32 mas0;
- u32 mas1;
- u32 mas2;
- u32 mas3;
- u32 mas4;
- u32 mas5;
- u32 mas6;
- u32 mas7;
-
/* vcpu id table */
struct vcpu_id_table *idt;
@@ -73,6 +82,9 @@ struct kvmppc_vcpu_e500 {
u32 tlb1cfg;
u64 mcar;
+ struct page **shared_tlb_pages;
+ int num_shared_tlb_pages;
+
struct kvm_vcpu vcpu;
};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bf8af5d5d5dc..52eb9c1f4fe0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -32,17 +32,32 @@
#include <linux/atomic.h>
#include <asm/kvm_asm.h>
#include <asm/processor.h>
+#include <asm/page.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+#include <linux/mmu_notifier.h>
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+
+struct kvm;
+extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
+extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+#endif
+
/* We don't currently support large pages. */
#define KVM_HPAGE_GFN_SHIFT(x) 0
#define KVM_NR_PAGE_SIZES 1
@@ -158,34 +173,72 @@ struct kvmppc_spapr_tce_table {
struct page *pages[0];
};
-struct kvmppc_rma_info {
+struct kvmppc_linear_info {
void *base_virt;
unsigned long base_pfn;
unsigned long npages;
struct list_head list;
- atomic_t use_count;
+ atomic_t use_count;
+ int type;
+};
+
+/*
+ * The reverse mapping array has one entry for each HPTE,
+ * which stores the guest's view of the second word of the HPTE
+ * (including the guest physical address of the mapping),
+ * plus forward and backward pointers in a doubly-linked ring
+ * of HPTEs that map the same host page. The pointers in this
+ * ring are 32-bit HPTE indexes, to save space.
+ */
+struct revmap_entry {
+ unsigned long guest_rpte;
+ unsigned int forw, back;
+};
+
+/*
+ * We use the top bit of each memslot->rmap entry as a lock bit,
+ * and bit 32 as a present flag. The bottom 32 bits are the
+ * index in the guest HPT of a HPTE that points to the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT 63
+#define KVMPPC_RMAP_RC_SHIFT 32
+#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
+#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
+#define KVMPPC_RMAP_PRESENT 0x100000000ul
+#define KVMPPC_RMAP_INDEX 0xfffffffful
+
+/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_PAGE_ORDER_MASK 0x1f
+#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
+#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
+#define KVMPPC_GOT_PAGE 0x80
+
+struct kvm_arch_memory_slot {
};
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
- unsigned long ram_npages;
- unsigned long ram_psize;
- unsigned long ram_porder;
- struct kvmppc_pginfo *ram_pginfo;
+ struct revmap_entry *revmap;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
int tlbie_lock;
- int n_rma_pages;
unsigned long lpcr;
unsigned long rmor;
- struct kvmppc_rma_info *rma;
+ struct kvmppc_linear_info *rma;
+ unsigned long vrma_slb_v;
+ int rma_setup_done;
+ int using_mmu_notifiers;
struct list_head spapr_tce_tables;
+ spinlock_t slot_phys_lock;
+ unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
+ int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
+ struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
};
@@ -318,10 +371,6 @@ struct kvm_vcpu_arch {
u32 vrsave; /* also USPRG0 */
u32 mmucr;
ulong shadow_msr;
- ulong sprg4;
- ulong sprg5;
- ulong sprg6;
- ulong sprg7;
ulong csrr0;
ulong csrr1;
ulong dsrr0;
@@ -329,16 +378,14 @@ struct kvm_vcpu_arch {
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
- ulong esr;
u32 dec;
u32 decar;
u32 tbl;
u32 tbu;
u32 tcr;
- u32 tsr;
+ ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
u32 ivor[64];
ulong ivpr;
- u32 pir;
u32 pvr;
u32 shadow_pid;
@@ -427,9 +474,14 @@ struct kvm_vcpu_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu_arch_shared shregs;
+ unsigned long pgfault_addr;
+ long pgfault_index;
+ unsigned long pgfault_hpte[2];
+
struct list_head run_list;
struct task_struct *run_task;
struct kvm_run *kvm_run;
+ pgd_t *pgdir;
#endif
};
@@ -438,4 +490,12 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 1
#define KVMPPC_VCPU_RUNNABLE 2
+/* Values for vcpu->arch.io_gpr */
+#define KVM_MMIO_REG_MASK 0x001f
+#define KVM_MMIO_REG_EXT_MASK 0xffe0
+#define KVM_MMIO_REG_GPR 0x0000
+#define KVM_MMIO_REG_FPR 0x0020
+#define KVM_MMIO_REG_QPR 0x0040
+#define KVM_MMIO_REG_FQPR 0x0060
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 50533f9adf40..7b754e743003 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -22,6 +22,16 @@
#include <linux/types.h>
+/*
+ * Additions to this struct must only occur at the end, and should be
+ * accompanied by a KVM_MAGIC_FEAT flag to advertise that they are present
+ * (albeit not necessarily relevant to the current target hardware platform).
+ *
+ * Struct fields are always 32 or 64 bit aligned, depending on them being 32
+ * or 64 bit wide respectively.
+ *
+ * See Documentation/virtual/kvm/ppc-pv.txt
+ */
struct kvm_vcpu_arch_shared {
__u64 scratch1;
__u64 scratch2;
@@ -33,11 +43,35 @@ struct kvm_vcpu_arch_shared {
__u64 sprg3;
__u64 srr0;
__u64 srr1;
- __u64 dar;
+ __u64 dar; /* dear on BookE */
__u64 msr;
__u32 dsisr;
__u32 int_pending; /* Tells the guest if we have an interrupt */
__u32 sr[16];
+ __u32 mas0;
+ __u32 mas1;
+ __u64 mas7_3;
+ __u64 mas2;
+ __u32 mas4;
+ __u32 mas6;
+ __u32 esr;
+ __u32 pir;
+
+ /*
+ * SPRG4-7 are user-readable, so we can only keep these consistent
+ * between the shared area and the real registers when there's an
+ * intervening exit to KVM. This also applies to SPRG3 on some
+ * chips.
+ *
+ * This suffices for access by guest userspace, since in PR-mode
+ * KVM, an exit must occur when changing the guest's MSR[PR].
+ * If the guest kernel writes to SPRG3-7 via the shared area, it
+ * must also use the shared area for reading while in kernel space.
+ */
+ __u64 sprg4;
+ __u64 sprg5;
+ __u64 sprg6;
+ __u64 sprg7;
};
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
@@ -47,7 +81,10 @@ struct kvm_vcpu_arch_shared {
#define KVM_FEATURE_MAGIC_PAGE 1
-#define KVM_MAGIC_FEAT_SR (1 << 0)
+#define KVM_MAGIC_FEAT_SR (1 << 0)
+
+/* MASn, ESR, PIR, and high SPRGs */
+#define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1)
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 46efd1a265c9..9d6dee0f7d48 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern void kvmppc_decrementer_func(unsigned long data);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
@@ -94,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
@@ -120,15 +121,17 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm);
extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
-extern void kvmppc_map_vrma(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
+ struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
struct kvm_allocate_rma *rma);
-extern struct kvmppc_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_rma_info *ri);
+extern struct kvmppc_linear_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvmppc_linear_info *ri);
+extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
+extern void kvm_release_hpt(struct kvmppc_linear_info *li);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
@@ -175,6 +178,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
#ifdef CONFIG_KVM_BOOK3S_64_HV
@@ -183,14 +189,19 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
paca[cpu].kvm_hstate.xics_phys = addr;
}
-extern void kvm_rma_init(void);
+extern void kvm_linear_init(void);
#else
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{}
-static inline void kvm_rma_init(void)
+static inline void kvm_linear_init(void)
{}
#endif
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_config_tlb *cfg);
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_dirty_tlb *cfg);
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index bf37931d1ad6..42ce570812c1 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -99,9 +99,7 @@ struct machdep_calls {
void (*init_IRQ)(void);
- /* Return an irq, or NO_IRQ to indicate there are none pending.
- * If for some reason there is no irq, but the interrupt
- * shouldn't be counted as spurious, return NO_IRQ_IGNORE. */
+ /* Return an irq, or NO_IRQ to indicate there are none pending. */
unsigned int (*get_irq)(void);
/* PCI stuff */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index f5f89cafebd0..cdb5421877e2 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -41,9 +41,10 @@
/* MAS registers bit definitions */
#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000)
-#define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000)
-#define MAS0_NV(x) ((x) & 0x00000FFF)
#define MAS0_ESEL_MASK 0x0FFF0000
+#define MAS0_ESEL_SHIFT 16
+#define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
+#define MAS0_NV(x) ((x) & 0x00000FFF)
#define MAS0_HES 0x00004000
#define MAS0_WQ_ALLWAYS 0x00000000
#define MAS0_WQ_COND 0x00001000
@@ -167,6 +168,7 @@
#define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */
#define TLBnCFG_MAXSIZE_SHIFT 16
#define TLBnCFG_ASSOC 0xff000000 /* Associativity */
+#define TLBnCFG_ASSOC_SHIFT 24
/* TLBnPS encoding */
#define TLBnPS_4K 0x00000004
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 412ba493cb98..1c65a59881ea 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -108,11 +108,11 @@ extern char initial_stab[];
#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
/* Values for PP (assumes Ks=0, Kp=1) */
-/* pp0 will always be 0 for linux */
#define PP_RWXX 0 /* Supervisor read/write, User none */
#define PP_RWRX 1 /* Supervisor read/write, User read */
#define PP_RWRW 2 /* Supervisor read/write, User read/write */
#define PP_RXRX 3 /* Supervisor read, User read */
+#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
#ifndef __ASSEMBLY__
@@ -267,7 +267,6 @@ extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
extern void hpte_init_native(void);
extern void hpte_init_lpar(void);
-extern void hpte_init_iSeries(void);
extern void hpte_init_beat(void);
extern void hpte_init_beat_v3(void);
@@ -325,9 +324,6 @@ extern void slb_set_size(u16 size);
* WARNING - If you change these you must make sure the asm
* implementations in slb_allocate (slb_low.S), do_stab_bolted
* (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
- *
- * You'll also need to change the precomputed VSID values in head.S
- * which are used by the iSeries firmware.
*/
#define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */
@@ -484,14 +480,6 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
| (ea >> SID_SHIFT_1T), 1T);
}
-/*
- * This is only used on legacy iSeries in lparmap.c,
- * hence the 256MB segment assumption.
- */
-#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER_256M) % \
- VSID_MODULUS_256M)
-#define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea))
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 5d487657322e..ac39e6a3b25a 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -155,14 +155,7 @@ struct pci_dn {
struct pci_dev *pcidev; /* back-pointer to the pci device */
#ifdef CONFIG_EEH
- int class_code; /* pci device class */
- int eeh_mode; /* See eeh.h for possible EEH_MODEs */
- int eeh_config_addr;
- int eeh_pe_config_addr; /* new-style partition endpoint address */
- int eeh_check_count; /* # times driver ignored error */
- int eeh_freeze_count; /* # times this device froze up. */
- int eeh_false_positives; /* # times this device reported #ff's */
- u32 config_space[16]; /* saved PCI config space */
+ struct eeh_dev *edev; /* eeh device */
#endif
#define IODA_INVALID_PE (-1)
#ifdef CONFIG_PPC_POWERNV
@@ -185,6 +178,13 @@ static inline int pci_device_from_OF_node(struct device_node *np,
return 0;
}
+#if defined(CONFIG_EEH)
+static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
+{
+ return PCI_DN(dn)->edev;
+}
+#endif
+
/** Find the bus corresponding to the indicated device node */
extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 1a8093fa8f71..078019b5b353 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -47,6 +47,8 @@ struct power_pmu {
*/
#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */
#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */
+#define PPMU_NO_SIPR 4 /* no SIPR/HV in MMCRA at all */
+#define PPMU_NO_CONT_SAMPLING 8 /* no continuous sampling */
/*
* Values for flags to get_alternatives()
diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h
index c4e396b540df..f1393252bbda 100644
--- a/arch/powerpc/include/asm/posix_types.h
+++ b/arch/powerpc/include/asm/posix_types.h
@@ -7,122 +7,22 @@
* assume GCC is being used.
*/
-typedef unsigned long __kernel_ino_t;
-typedef unsigned int __kernel_mode_t;
-typedef long __kernel_off_t;
-typedef int __kernel_pid_t;
-typedef unsigned int __kernel_uid_t;
-typedef unsigned int __kernel_gid_t;
-typedef long __kernel_ptrdiff_t;
-typedef long __kernel_time_t;
-typedef long __kernel_clock_t;
-typedef int __kernel_timer_t;
-typedef int __kernel_clockid_t;
-typedef long __kernel_suseconds_t;
-typedef int __kernel_daddr_t;
-typedef char * __kernel_caddr_t;
-typedef unsigned short __kernel_uid16_t;
-typedef unsigned short __kernel_gid16_t;
-typedef unsigned int __kernel_uid32_t;
-typedef unsigned int __kernel_gid32_t;
-typedef unsigned int __kernel_old_uid_t;
-typedef unsigned int __kernel_old_gid_t;
-
#ifdef __powerpc64__
-typedef unsigned long __kernel_nlink_t;
-typedef int __kernel_ipc_pid_t;
-typedef unsigned long __kernel_size_t;
-typedef long __kernel_ssize_t;
typedef unsigned long __kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
#else
-typedef unsigned short __kernel_nlink_t;
-typedef short __kernel_ipc_pid_t;
typedef unsigned int __kernel_size_t;
typedef int __kernel_ssize_t;
-typedef unsigned int __kernel_old_dev_t;
-#endif
-
-#ifdef __powerpc64__
-typedef long long __kernel_loff_t;
-#else
-#ifdef __GNUC__
-typedef long long __kernel_loff_t;
-#endif
-#endif
-
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-
-#ifndef __GNUC__
-
-#define __FD_SET(d, set) ((set)->fds_bits[__FDELT(d)] |= __FDMASK(d))
-#define __FD_CLR(d, set) ((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d))
-#define __FD_ISSET(d, set) (((set)->fds_bits[__FDELT(d)] & __FDMASK(d)) != 0)
-#define __FD_ZERO(set) \
- ((void) memset ((void *) (set), 0, sizeof (__kernel_fd_set)))
-
-#else /* __GNUC__ */
-
-#if defined(__KERNEL__)
-/* With GNU C, use inline functions instead so args are evaluated only once: */
-
-#undef __FD_SET
-static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
-}
-
-#undef __FD_CLR
-static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
-}
-
-#undef __FD_ISSET
-static __inline__ int __FD_ISSET(unsigned long fd, __kernel_fd_set *p)
-{
- unsigned long _tmp = fd / __NFDBITS;
- unsigned long _rem = fd % __NFDBITS;
- return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
-}
-
-/*
- * This will unroll the loop for the normal constant case (8 ints,
- * for a 256-bit fd_set)
- */
-#undef __FD_ZERO
-static __inline__ void __FD_ZERO(__kernel_fd_set *p)
-{
- unsigned long *tmp = (unsigned long *)p->fds_bits;
- int i;
+typedef long __kernel_ptrdiff_t;
+#define __kernel_size_t __kernel_size_t
- if (__builtin_constant_p(__FDSET_LONGS)) {
- switch (__FDSET_LONGS) {
- case 16:
- tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
- tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+typedef unsigned short __kernel_nlink_t;
+#define __kernel_nlink_t __kernel_nlink_t
- case 8:
- tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+typedef short __kernel_ipc_pid_t;
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#endif
- case 4:
- tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
- return;
- }
- }
- i = __FDSET_LONGS;
- while (i) {
- i--;
- *tmp = 0;
- tmp++;
- }
-}
+#include <asm-generic/posix_types.h>
-#endif /* defined(__KERNEL__) */
-#endif /* __GNUC__ */
#endif /* _ASM_POWERPC_POSIX_TYPES_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index e980faae4225..d81f99430fe7 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -45,6 +45,7 @@
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff
+#define PPC_INST_SLBFEE 0x7c0007a7
#define PPC_INST_STRING 0x7c00042a
#define PPC_INST_STRING_MASK 0xfc0007fe
@@ -183,7 +184,8 @@
__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \
__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
-
+#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
+ __PPC_RT(t) | __PPC_RB(b))
/*
* Define what the VSX XX1 form instructions will look like, then add
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index b585bff1a022..8e2d0371fe1e 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -385,6 +385,36 @@ static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32)
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
+extern int powersave_nap; /* set if nap mode can be used in idle loop */
+void cpu_idle_wait(void);
+
+#ifdef CONFIG_PSERIES_IDLE
+extern void update_smt_snooze_delay(int snooze);
+extern int pseries_notify_cpuidle_add_cpu(int cpu);
+#else
+static inline void update_smt_snooze_delay(int snooze) {}
+static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
+#endif
+
+extern void flush_instruction_cache(void);
+extern void hard_reset_now(void);
+extern void poweroff_now(void);
+extern int fix_alignment(struct pt_regs *);
+extern void cvt_fd(float *from, double *to);
+extern void cvt_df(double *from, float *to);
+extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
+
+#ifdef CONFIG_PPC64
+/*
+ * We handle most unaligned accesses in hardware. On the other hand
+ * unaligned DMA can be very expensive on some ppc64 IO chips (it does
+ * powers of 2 writes until it reaches sufficient alignment).
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ */
+#define NET_IP_ALIGN 0
+#endif
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b1a215eabef6..9d7f0fb69028 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -216,6 +216,7 @@
#define DSISR_ISSTORE 0x02000000 /* access was a store */
#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */
+#define DSISR_KEYFAULT 0x00200000 /* Key fault */
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
@@ -237,6 +238,7 @@
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
+#define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))
@@ -493,6 +495,9 @@
#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
+#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
+#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 8a97aa7289d3..b86faa9107da 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -15,6 +15,11 @@
#ifndef __ASM_POWERPC_REG_BOOKE_H__
#define __ASM_POWERPC_REG_BOOKE_H__
+#ifdef CONFIG_BOOKE_WDT
+extern u32 booke_wdt_enabled;
+extern u32 booke_wdt_period;
+#endif /* CONFIG_BOOKE_WDT */
+
/* Machine State Register (MSR) Fields */
#define MSR_GS (1<<28) /* Guest state */
#define MSR_UCLE (1<<26) /* User-mode cache lock enable */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 01c143bb77ae..557cff845dee 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -74,7 +74,6 @@ struct rtas_suspend_me_data {
/* RTAS event classes */
#define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */
#define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */
-#define RTAS_POWERMGM_EVENTS 0x20000000 /* set bit 2 */
#define RTAS_HOTPLUG_EVENTS 0x10000000 /* set bit 3 */
#define RTAS_IO_EVENTS 0x08000000 /* set bit 4 */
#define RTAS_EVENT_SCAN_ALL_EVENTS 0xffffffff
@@ -204,6 +203,39 @@ struct rtas_ext_event_log_v6 {
/* Variable length. */
};
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_errorlog {
+ uint16_t id; /* 0x00 2-byte ASCII section ID */
+ uint16_t length; /* 0x02 Section length in bytes */
+ uint8_t version; /* 0x04 Section version */
+ uint8_t subtype; /* 0x05 Section subtype */
+ uint16_t creator_component; /* 0x06 Creator component ID */
+ uint8_t data[]; /* 0x08 Start of section data */
+};
+
+struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id);
+
/*
* This can be set by the rtas_flash module so that it can get called
* as the absolutely last thing before the kernel terminates.
@@ -325,5 +357,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn)
static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
#endif
+extern int call_rtas(const char *, int, int, unsigned long *, ...);
+
#endif /* __KERNEL__ */
#endif /* _POWERPC_RTAS_H */
diff --git a/arch/powerpc/include/asm/runlatch.h b/arch/powerpc/include/asm/runlatch.h
new file mode 100644
index 000000000000..54e9b963876e
--- /dev/null
+++ b/arch/powerpc/include/asm/runlatch.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_RUNLATCH_H
+#define _ASM_POWERPC_RUNLATCH_H
+
+#ifdef CONFIG_PPC64
+
+extern void __ppc64_runlatch_on(void);
+extern void __ppc64_runlatch_off(void);
+
+/*
+ * We manually hard enable-disable, this is called
+ * in the idle loop and we don't want to mess up
+ * with soft-disable/enable & interrupt replay.
+ */
+#define ppc64_runlatch_off() \
+ do { \
+ if (cpu_has_feature(CPU_FTR_CTRL) && \
+ test_thread_local_flags(_TLF_RUNLATCH)) { \
+ unsigned long msr = mfmsr(); \
+ __hard_irq_disable(); \
+ __ppc64_runlatch_off(); \
+ if (msr & MSR_EE) \
+ __hard_irq_enable(); \
+ } \
+ } while (0)
+
+#define ppc64_runlatch_on() \
+ do { \
+ if (cpu_has_feature(CPU_FTR_CTRL) && \
+ !test_thread_local_flags(_TLF_RUNLATCH)) { \
+ unsigned long msr = mfmsr(); \
+ __hard_irq_disable(); \
+ __ppc64_runlatch_on(); \
+ if (msr & MSR_EE) \
+ __hard_irq_enable(); \
+ } \
+ } while (0)
+#else
+#define ppc64_runlatch_on()
+#define ppc64_runlatch_off()
+#endif /* CONFIG_PPC64 */
+
+#endif /* _ASM_POWERPC_RUNLATCH_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 186e0fb835bd..d084ce195fc3 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -5,6 +5,28 @@
#ifndef __ASSEMBLY__
extern void ppc_printk_progress(char *s, unsigned short hex);
-#endif
+
+extern unsigned int rtas_data;
+extern int mem_init_done; /* set on boot once kmalloc can be called */
+extern int init_bootmem_done; /* set once bootmem is available */
+extern phys_addr_t memory_limit;
+extern unsigned long klimit;
+extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
+
+extern void via_cuda_init(void);
+extern void read_rtc_time(void);
+extern void pmac_find_display(void);
+
+struct device_node;
+extern void note_scsi_host(struct device_node *, void *);
+
+/* Used in very early kernel initialization. */
+extern unsigned long reloc_offset(void);
+extern unsigned long add_reloc_offset(unsigned long);
+extern void reloc_got2(unsigned long);
+
+#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
+
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index adba970ce918..ebc24dc5b1a1 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -122,7 +122,6 @@ extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
-void smp_init_iSeries(void);
void smp_init_pSeries(void);
void smp_init_cell(void);
void smp_init_celleb(void);
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
new file mode 100644
index 000000000000..caf82d0a00de
--- /dev/null
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _ASM_POWERPC_SWITCH_TO_H
+#define _ASM_POWERPC_SWITCH_TO_H
+
+struct thread_struct;
+struct task_struct;
+struct pt_regs;
+
+extern struct task_struct *__switch_to(struct task_struct *,
+ struct task_struct *);
+#define switch_to(prev, next, last) ((last) = __switch_to((prev), (next)))
+
+struct thread_struct;
+extern struct task_struct *_switch(struct thread_struct *prev,
+ struct thread_struct *next);
+
+extern void giveup_fpu(struct task_struct *);
+extern void disable_kernel_fp(void);
+extern void enable_kernel_fp(void);
+extern void flush_fp_to_thread(struct task_struct *);
+extern void enable_kernel_altivec(void);
+extern void giveup_altivec(struct task_struct *);
+extern void load_up_altivec(struct task_struct *);
+extern int emulate_altivec(struct pt_regs *);
+extern void __giveup_vsx(struct task_struct *);
+extern void giveup_vsx(struct task_struct *);
+extern void enable_kernel_spe(void);
+extern void giveup_spe(struct task_struct *);
+extern void load_up_spe(struct task_struct *);
+
+#ifndef CONFIG_SMP
+extern void discard_lazy_cpu_state(void);
+#else
+static inline void discard_lazy_cpu_state(void)
+{
+}
+#endif
+
+#ifdef CONFIG_ALTIVEC
+extern void flush_altivec_to_thread(struct task_struct *);
+#else
+static inline void flush_altivec_to_thread(struct task_struct *t)
+{
+}
+#endif
+
+#ifdef CONFIG_VSX
+extern void flush_vsx_to_thread(struct task_struct *);
+#else
+static inline void flush_vsx_to_thread(struct task_struct *t)
+{
+}
+#endif
+
+#ifdef CONFIG_SPE
+extern void flush_spe_to_thread(struct task_struct *);
+#else
+static inline void flush_spe_to_thread(struct task_struct *t)
+{
+}
+#endif
+
+#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
deleted file mode 100644
index a02883d5af43..000000000000
--- a/arch/powerpc/include/asm/system.h
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- */
-#ifndef _ASM_POWERPC_SYSTEM_H
-#define _ASM_POWERPC_SYSTEM_H
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
-
-#include <asm/hw_irq.h>
-
-/*
- * Memory barrier.
- * The sync instruction guarantees that all memory accesses initiated
- * by this processor have been performed (with respect to all other
- * mechanisms that access memory). The eieio instruction is a barrier
- * providing an ordering (separately) for (a) cacheable stores and (b)
- * loads and stores to non-cacheable memory (e.g. I/O devices).
- *
- * mb() prevents loads and stores being reordered across this point.
- * rmb() prevents loads being reordered across this point.
- * wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependent loads being reordered
- * across this point (nop on PPC).
- *
- * *mb() variants without smp_ prefix must order all types of memory
- * operations with one another. sync is the only instruction sufficient
- * to do this.
- *
- * For the smp_ barriers, ordering is for cacheable memory operations
- * only. We have to use the sync instruction for smp_mb(), since lwsync
- * doesn't order loads with respect to previous stores. Lwsync can be
- * used for smp_rmb() and smp_wmb().
- *
- * However, on CPUs that don't support lwsync, lwsync actually maps to a
- * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
- */
-#define mb() __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends() do { } while(0)
-
-#define set_mb(var, value) do { var = value; mb(); } while (0)
-
-#ifdef __KERNEL__
-#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */
-#ifdef CONFIG_SMP
-
-#ifdef __SUBARCH_HAS_LWSYNC
-# define SMPWMB LWSYNC
-#else
-# define SMPWMB eieio
-#endif
-
-#define smp_mb() mb()
-#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
-#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_read_barrier_depends() read_barrier_depends()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
-#endif /* CONFIG_SMP */
-
-/*
- * This is a barrier which prevents following instructions from being
- * started until the value of the argument x is known. For example, if
- * x is a variable loaded from memory, this prevents following
- * instructions from being executed until the load has been performed.
- */
-#define data_barrier(x) \
- asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
-
-struct task_struct;
-struct pt_regs;
-
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-
-extern int (*__debugger)(struct pt_regs *regs);
-extern int (*__debugger_ipi)(struct pt_regs *regs);
-extern int (*__debugger_bpt)(struct pt_regs *regs);
-extern int (*__debugger_sstep)(struct pt_regs *regs);
-extern int (*__debugger_iabr_match)(struct pt_regs *regs);
-extern int (*__debugger_dabr_match)(struct pt_regs *regs);
-extern int (*__debugger_fault_handler)(struct pt_regs *regs);
-
-#define DEBUGGER_BOILERPLATE(__NAME) \
-static inline int __NAME(struct pt_regs *regs) \
-{ \
- if (unlikely(__ ## __NAME)) \
- return __ ## __NAME(regs); \
- return 0; \
-}
-
-DEBUGGER_BOILERPLATE(debugger)
-DEBUGGER_BOILERPLATE(debugger_ipi)
-DEBUGGER_BOILERPLATE(debugger_bpt)
-DEBUGGER_BOILERPLATE(debugger_sstep)
-DEBUGGER_BOILERPLATE(debugger_iabr_match)
-DEBUGGER_BOILERPLATE(debugger_dabr_match)
-DEBUGGER_BOILERPLATE(debugger_fault_handler)
-
-#else
-static inline int debugger(struct pt_regs *regs) { return 0; }
-static inline int debugger_ipi(struct pt_regs *regs) { return 0; }
-static inline int debugger_bpt(struct pt_regs *regs) { return 0; }
-static inline int debugger_sstep(struct pt_regs *regs) { return 0; }
-static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; }
-static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; }
-static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
-#endif
-
-extern int set_dabr(unsigned long dabr);
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-extern void do_send_trap(struct pt_regs *regs, unsigned long address,
- unsigned long error_code, int signal_code, int brkpt);
-#else
-extern void do_dabr(struct pt_regs *regs, unsigned long address,
- unsigned long error_code);
-#endif
-extern void print_backtrace(unsigned long *);
-extern void flush_instruction_cache(void);
-extern void hard_reset_now(void);
-extern void poweroff_now(void);
-
-#ifdef CONFIG_6xx
-extern long _get_L2CR(void);
-extern long _get_L3CR(void);
-extern void _set_L2CR(unsigned long);
-extern void _set_L3CR(unsigned long);
-#else
-#define _get_L2CR() 0L
-#define _get_L3CR() 0L
-#define _set_L2CR(val) do { } while(0)
-#define _set_L3CR(val) do { } while(0)
-#endif
-
-extern void via_cuda_init(void);
-extern void read_rtc_time(void);
-extern void pmac_find_display(void);
-extern void giveup_fpu(struct task_struct *);
-extern void disable_kernel_fp(void);
-extern void enable_kernel_fp(void);
-extern void flush_fp_to_thread(struct task_struct *);
-extern void enable_kernel_altivec(void);
-extern void giveup_altivec(struct task_struct *);
-extern void load_up_altivec(struct task_struct *);
-extern int emulate_altivec(struct pt_regs *);
-extern void __giveup_vsx(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void enable_kernel_spe(void);
-extern void giveup_spe(struct task_struct *);
-extern void load_up_spe(struct task_struct *);
-extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to);
-extern void cvt_df(double *from, float *to);
-
-#ifndef CONFIG_SMP
-extern void discard_lazy_cpu_state(void);
-#else
-static inline void discard_lazy_cpu_state(void)
-{
-}
-#endif
-
-#ifdef CONFIG_ALTIVEC
-extern void flush_altivec_to_thread(struct task_struct *);
-#else
-static inline void flush_altivec_to_thread(struct task_struct *t)
-{
-}
-#endif
-
-#ifdef CONFIG_VSX
-extern void flush_vsx_to_thread(struct task_struct *);
-#else
-static inline void flush_vsx_to_thread(struct task_struct *t)
-{
-}
-#endif
-
-#ifdef CONFIG_SPE
-extern void flush_spe_to_thread(struct task_struct *);
-#else
-static inline void flush_spe_to_thread(struct task_struct *t)
-{
-}
-#endif
-
-extern int call_rtas(const char *, int, int, unsigned long *, ...);
-extern void cacheable_memzero(void *p, unsigned int nb);
-extern void *cacheable_memcpy(void *, const void *, unsigned int);
-extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
-extern void bad_page_fault(struct pt_regs *, unsigned long, int);
-extern void _exception(int, struct pt_regs *, int, unsigned long);
-extern void die(const char *, struct pt_regs *, long);
-extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
-
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-#endif /* CONFIG_BOOKE_WDT */
-
-struct device_node;
-extern void note_scsi_host(struct device_node *, void *);
-
-extern struct task_struct *__switch_to(struct task_struct *,
- struct task_struct *);
-#define switch_to(prev, next, last) ((last) = __switch_to((prev), (next)))
-
-struct thread_struct;
-extern struct task_struct *_switch(struct thread_struct *prev,
- struct thread_struct *next);
-
-extern unsigned int rtas_data;
-extern int mem_init_done; /* set on boot once kmalloc can be called */
-extern int init_bootmem_done; /* set once bootmem is available */
-extern phys_addr_t memory_limit;
-extern unsigned long klimit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
-extern int powersave_nap; /* set if nap mode can be used in idle loop */
-void cpu_idle_wait(void);
-
-#ifdef CONFIG_PSERIES_IDLE
-extern void update_smt_snooze_delay(int snooze);
-extern int pseries_notify_cpuidle_add_cpu(int cpu);
-#else
-static inline void update_smt_snooze_delay(int snooze) {}
-static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
-#endif
-
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
-static __always_inline unsigned long
-__xchg_u32(volatile void *p, unsigned long val)
-{
- unsigned long prev;
-
- __asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stwcx. %3,0,%2 \n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
- : "r" (p), "r" (val)
- : "cc", "memory");
-
- return prev;
-}
-
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
-static __always_inline unsigned long
-__xchg_u32_local(volatile void *p, unsigned long val)
-{
- unsigned long prev;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stwcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
- : "r" (p), "r" (val)
- : "cc", "memory");
-
- return prev;
-}
-
-#ifdef CONFIG_PPC64
-static __always_inline unsigned long
-__xchg_u64(volatile void *p, unsigned long val)
-{
- unsigned long prev;
-
- __asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stdcx. %3,0,%2 \n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
- : "r" (p), "r" (val)
- : "cc", "memory");
-
- return prev;
-}
-
-static __always_inline unsigned long
-__xchg_u64_local(volatile void *p, unsigned long val)
-{
- unsigned long prev;
-
- __asm__ __volatile__(
-"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stdcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
- : "r" (p), "r" (val)
- : "cc", "memory");
-
- return prev;
-}
-#endif
-
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
-{
- switch (size) {
- case 4:
- return __xchg_u32(ptr, x);
-#ifdef CONFIG_PPC64
- case 8:
- return __xchg_u64(ptr, x);
-#endif
- }
- __xchg_called_with_bad_pointer();
- return x;
-}
-
-static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
-{
- switch (size) {
- case 4:
- return __xchg_u32_local(ptr, x);
-#ifdef CONFIG_PPC64
- case 8:
- return __xchg_u64_local(ptr, x);
-#endif
- }
- __xchg_called_with_bad_pointer();
- return x;
-}
-#define xchg(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
- })
-
-#define xchg_local(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg_local((ptr), \
- (unsigned long)_x_, sizeof(*(ptr))); \
- })
-
-/*
- * Compare and exchange - if *p == old, set it to new,
- * and return the old value of *p.
- */
-#define __HAVE_ARCH_CMPXCHG 1
-
-static __always_inline unsigned long
-__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
-{
- unsigned int prev;
-
- __asm__ __volatile__ (
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
- cmpw 0,%0,%3\n\
- bne- 2f\n"
- PPC405_ERR77(0,%2)
-" stwcx. %4,0,%2\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- "\n\
-2:"
- : "=&r" (prev), "+m" (*p)
- : "r" (p), "r" (old), "r" (new)
- : "cc", "memory");
-
- return prev;
-}
-
-static __always_inline unsigned long
-__cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
- unsigned long new)
-{
- unsigned int prev;
-
- __asm__ __volatile__ (
-"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
- cmpw 0,%0,%3\n\
- bne- 2f\n"
- PPC405_ERR77(0,%2)
-" stwcx. %4,0,%2\n\
- bne- 1b"
- "\n\
-2:"
- : "=&r" (prev), "+m" (*p)
- : "r" (p), "r" (old), "r" (new)
- : "cc", "memory");
-
- return prev;
-}
-
-#ifdef CONFIG_PPC64
-static __always_inline unsigned long
-__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
-{
- unsigned long prev;
-
- __asm__ __volatile__ (
- PPC_RELEASE_BARRIER
-"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
- cmpd 0,%0,%3\n\
- bne- 2f\n\
- stdcx. %4,0,%2\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- "\n\
-2:"
- : "=&r" (prev), "+m" (*p)
- : "r" (p), "r" (old), "r" (new)
- : "cc", "memory");
-
- return prev;
-}
-
-static __always_inline unsigned long
-__cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
- unsigned long new)
-{
- unsigned long prev;
-
- __asm__ __volatile__ (
-"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
- cmpd 0,%0,%3\n\
- bne- 2f\n\
- stdcx. %4,0,%2\n\
- bne- 1b"
- "\n\
-2:"
- : "=&r" (prev), "+m" (*p)
- : "r" (p), "r" (old), "r" (new)
- : "cc", "memory");
-
- return prev;
-}
-#endif
-
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static __always_inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
- unsigned int size)
-{
- switch (size) {
- case 4:
- return __cmpxchg_u32(ptr, old, new);
-#ifdef CONFIG_PPC64
- case 8:
- return __cmpxchg_u64(ptr, old, new);
-#endif
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-
-static __always_inline unsigned long
-__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
- unsigned int size)
-{
- switch (size) {
- case 4:
- return __cmpxchg_u32_local(ptr, old, new);
-#ifdef CONFIG_PPC64
- case 8:
- return __cmpxchg_u64_local(ptr, old, new);
-#endif
- }
- __cmpxchg_called_with_bad_pointer();
- return old;
-}
-
-#define cmpxchg(ptr, o, n) \
- ({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
- (unsigned long)_n_, sizeof(*(ptr))); \
- })
-
-
-#define cmpxchg_local(ptr, o, n) \
- ({ \
- __typeof__(*(ptr)) _o_ = (o); \
- __typeof__(*(ptr)) _n_ = (n); \
- (__typeof__(*(ptr))) __cmpxchg_local((ptr), (unsigned long)_o_, \
- (unsigned long)_n_, sizeof(*(ptr))); \
- })
-
-#ifdef CONFIG_PPC64
-/*
- * We handle most unaligned accesses in hardware. On the other hand
- * unaligned DMA can be very expensive on some ppc64 IO chips (it does
- * powers of 2 writes until it reaches sufficient alignment).
- *
- * Based on this we disable the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN 0
-
-#define cmpxchg64(ptr, o, n) \
- ({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
- })
-#define cmpxchg64_local(ptr, o, n) \
- ({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
- })
-#else
-#include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-#endif
-
-extern unsigned long arch_align_stack(unsigned long sp);
-
-/* Used in very early kernel initialization. */
-extern unsigned long reloc_offset(void);
-extern unsigned long add_reloc_offset(unsigned long);
-extern void reloc_got2(unsigned long);
-
-#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
-
-extern struct dentry *powerpc_debugfs_root;
-
-#ifdef CONFIG_PPC64
-
-extern void __ppc64_runlatch_on(void);
-extern void __ppc64_runlatch_off(void);
-
-/*
- * We manually hard enable-disable, this is called
- * in the idle loop and we don't want to mess up
- * with soft-disable/enable & interrupt replay.
- */
-#define ppc64_runlatch_off() \
- do { \
- if (cpu_has_feature(CPU_FTR_CTRL) && \
- test_thread_local_flags(_TLF_RUNLATCH)) { \
- unsigned long msr = mfmsr(); \
- __hard_irq_disable(); \
- __ppc64_runlatch_off(); \
- if (msr & MSR_EE) \
- __hard_irq_enable(); \
- } \
- } while (0)
-
-#define ppc64_runlatch_on() \
- do { \
- if (cpu_has_feature(CPU_FTR_CTRL) && \
- !test_thread_local_flags(_TLF_RUNLATCH)) { \
- unsigned long msr = mfmsr(); \
- __hard_irq_disable(); \
- __ppc64_runlatch_on(); \
- if (msr & MSR_EE) \
- __hard_irq_enable(); \
- } \
- } while (0)
-#else
-#define ppc64_runlatch_on()
-#define ppc64_runlatch_off()
-#endif /* CONFIG_PPC64 */
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_SYSTEM_H */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 8338aef5a4d3..b3038817b8dc 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -44,7 +44,6 @@ extern void __init udbg_init_debug_lpar_hvsi(void);
extern void __init udbg_init_pmac_realmode(void);
extern void __init udbg_init_maple_realmode(void);
extern void __init udbg_init_pas_realmode(void);
-extern void __init udbg_init_iseries(void);
extern void __init udbg_init_rtas_panel(void);
extern void __init udbg_init_rtas_console(void);
extern void __init udbg_init_debug_beat(void);
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 0a290a195946..6bfd5ffe1d4f 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -69,6 +69,7 @@ struct vio_dev {
};
struct vio_driver {
+ const char *name;
const struct vio_device_id *id_table;
int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
int (*remove)(struct vio_dev *dev);
@@ -76,10 +77,17 @@ struct vio_driver {
* be loaded in a CMO environment if it uses DMA.
*/
unsigned long (*get_desired_dma)(struct vio_dev *dev);
+ const struct dev_pm_ops *pm;
struct device_driver driver;
};
-extern int vio_register_driver(struct vio_driver *drv);
+extern int __vio_register_driver(struct vio_driver *drv, struct module *owner,
+ const char *mod_name);
+/*
+ * vio_register_driver must be a macro so that KBUILD_MODNAME can be expanded
+ */
+#define vio_register_driver(driver) \
+ __vio_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
extern void vio_unregister_driver(struct vio_driver *drv);
extern int vio_cmo_entitlement_update(size_t);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 8184ee97e484..ee5b690a0bed 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -21,10 +21,10 @@
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/cache.h>
#include <asm/cputable.h>
#include <asm/emulated_ops.h>
+#include <asm/switch_to.h>
struct aligninfo {
unsigned char len;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index cc492e48ddfa..34b8afe94a50 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -412,16 +412,23 @@ int main(void)
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
- DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
- DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
- DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
- DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
+ DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
+ DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
+ DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
+ DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+ DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
+ DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
+ DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
+ DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
+ DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
+ DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_64_HV
DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
@@ -434,6 +441,7 @@ int main(void)
DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
+ DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
#endif
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 138ae183c440..455faa389876 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -20,6 +20,7 @@
#include <asm/cputable.h>
#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
#include <asm/mmu.h>
+#include <asm/setup.h>
struct cpu_spec* cur_cpu_spec = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index abef75176c07..fdcd8f551aff 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -27,8 +27,8 @@
#include <asm/kdump.h>
#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/system.h>
#include <asm/setjmp.h>
+#include <asm/debug.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2d0868a4e2f0..cb705fdbb458 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -101,14 +101,14 @@ data_access_not_stab:
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
#endif
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
- KVMTEST_PR, 0x300)
+ KVMTEST, 0x300)
. = 0x380
.globl data_access_slb_pSeries
data_access_slb_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
#ifdef __DISABLED__
@@ -330,8 +330,8 @@ do_stab_bolted_pSeries:
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
#endif /* CONFIG_POWER4_ONLY */
- KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300)
- KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
+ KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index cfe7a38708c3..18bdf74fa164 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -40,6 +40,8 @@
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/debug.h>
+#include <asm/setup.h>
static struct fw_dump fw_dump;
static struct fadump_mem_struct fdm;
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index e8e821146f38..6d2209ac0c44 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -26,11 +26,11 @@
#include <linux/sysctl.h>
#include <linux/tick.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/runlatch.h>
#include <asm/smp.h>
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a3d128e94cff..243dbabfe74d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -57,7 +57,6 @@
#include <linux/of_irq.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
@@ -67,6 +66,7 @@
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
+#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -208,8 +208,8 @@ notrace void arch_local_irq_restore(unsigned long en)
* we are checking the "new" CPU instead of the old one. This
* is only a problem if an event happened on the "old" CPU.
*
- * External interrupt events on non-iseries will have caused
- * interrupts to be hard-disabled, so there is no problem, we
+ * External interrupt events will have caused interrupts to
+ * be hard-disabled, so there is no problem, we
* cannot have preempted.
*/
irq_happened = get_irq_happened();
@@ -445,9 +445,9 @@ void do_IRQ(struct pt_regs *regs)
may_hard_irq_enable();
/* And finally process it */
- if (irq != NO_IRQ && irq != NO_IRQ_IGNORE)
+ if (irq != NO_IRQ)
handle_one_irq(irq);
- else if (irq != NO_IRQ_IGNORE)
+ else
__get_cpu_var(irq_stat).spurious_irqs++;
irq_exit();
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 76a6e40a6f7c..782bd0a3c2f0 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -24,6 +24,7 @@
#include <asm/current.h>
#include <asm/processor.h>
#include <asm/machdep.h>
+#include <asm/debug.h>
/*
* This table contains the mapping between PowerPC hardware trap types, and
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bc47352deb1f..e88c64331819 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -35,7 +35,6 @@
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
#define MSR_SINGLESTEP (MSR_DE)
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 2985338d0e10..62bdf2389669 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors:
* Alexander Graf <agraf@suse.de>
@@ -29,6 +30,7 @@
#include <asm/sections.h>
#include <asm/cacheflush.h>
#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
#define KVM_MAGIC_PAGE (-4096L)
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@@ -41,34 +43,30 @@
#define KVM_INST_B 0x48000000
#define KVM_INST_B_MASK 0x03ffffff
#define KVM_INST_B_MAX 0x01ffffff
+#define KVM_INST_LI 0x38000000
#define KVM_MASK_RT 0x03e00000
#define KVM_RT_30 0x03c00000
#define KVM_MASK_RB 0x0000f800
#define KVM_INST_MFMSR 0x7c0000a6
-#define KVM_INST_MFSPR_SPRG0 0x7c1042a6
-#define KVM_INST_MFSPR_SPRG1 0x7c1142a6
-#define KVM_INST_MFSPR_SPRG2 0x7c1242a6
-#define KVM_INST_MFSPR_SPRG3 0x7c1342a6
-#define KVM_INST_MFSPR_SRR0 0x7c1a02a6
-#define KVM_INST_MFSPR_SRR1 0x7c1b02a6
-#define KVM_INST_MFSPR_DAR 0x7c1302a6
-#define KVM_INST_MFSPR_DSISR 0x7c1202a6
-
-#define KVM_INST_MTSPR_SPRG0 0x7c1043a6
-#define KVM_INST_MTSPR_SPRG1 0x7c1143a6
-#define KVM_INST_MTSPR_SPRG2 0x7c1243a6
-#define KVM_INST_MTSPR_SPRG3 0x7c1343a6
-#define KVM_INST_MTSPR_SRR0 0x7c1a03a6
-#define KVM_INST_MTSPR_SRR1 0x7c1b03a6
-#define KVM_INST_MTSPR_DAR 0x7c1303a6
-#define KVM_INST_MTSPR_DSISR 0x7c1203a6
+
+#define SPR_FROM 0
+#define SPR_TO 0x100
+
+#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \
+ (((sprn) & 0x1f) << 16) | \
+ (((sprn) & 0x3e0) << 6) | \
+ (moveto))
+
+#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM)
+#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO)
#define KVM_INST_TLBSYNC 0x7c00046c
#define KVM_INST_MTMSRD_L0 0x7c000164
#define KVM_INST_MTMSRD_L1 0x7c010164
#define KVM_INST_MTMSR 0x7c000124
+#define KVM_INST_WRTEE 0x7c000106
#define KVM_INST_WRTEEI_0 0x7c000146
#define KVM_INST_WRTEEI_1 0x7c008146
@@ -270,26 +268,27 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
#ifdef CONFIG_BOOKE
-extern u32 kvm_emulate_wrteei_branch_offs;
-extern u32 kvm_emulate_wrteei_ee_offs;
-extern u32 kvm_emulate_wrteei_len;
-extern u32 kvm_emulate_wrteei[];
+extern u32 kvm_emulate_wrtee_branch_offs;
+extern u32 kvm_emulate_wrtee_reg_offs;
+extern u32 kvm_emulate_wrtee_orig_ins_offs;
+extern u32 kvm_emulate_wrtee_len;
+extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrteei(u32 *inst)
+static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
int distance_end;
ulong next_inst;
- p = kvm_alloc(kvm_emulate_wrteei_len * 4);
+ p = kvm_alloc(kvm_emulate_wrtee_len * 4);
if (!p)
return;
/* Find out where we are and put everything there */
distance_start = (ulong)p - (ulong)inst;
next_inst = ((ulong)inst + 4);
- distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs];
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs];
/* Make sure we only write valid b instructions */
if (distance_start > KVM_INST_B_MAX) {
@@ -298,10 +297,65 @@ static void kvm_patch_ins_wrteei(u32 *inst)
}
/* Modify the chunk to fit the invocation */
- memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4);
- p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK;
- p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE);
- flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4);
+ memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4);
+ p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ if (imm_one) {
+ p[kvm_emulate_wrtee_reg_offs] =
+ KVM_INST_LI | __PPC_RT(30) | MSR_EE;
+ } else {
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_wrtee_reg_offs] |= rt;
+ break;
+ }
+ }
+
+ p[kvm_emulate_wrtee_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_wrteei_0_branch_offs;
+extern u32 kvm_emulate_wrteei_0_len;
+extern u32 kvm_emulate_wrteei_0[];
+
+static void kvm_patch_ins_wrteei_0(u32 *inst)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrteei_0_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4);
+ p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4);
/* Patch the invocation */
kvm_patch_ins_b(inst, distance_start);
@@ -380,56 +434,191 @@ static void kvm_check_ins(u32 *inst, u32 features)
case KVM_INST_MFMSR:
kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
break;
- case KVM_INST_MFSPR_SPRG0:
+ case KVM_INST_MFSPR(SPRN_SPRG0):
kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
break;
- case KVM_INST_MFSPR_SPRG1:
+ case KVM_INST_MFSPR(SPRN_SPRG1):
kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
break;
- case KVM_INST_MFSPR_SPRG2:
+ case KVM_INST_MFSPR(SPRN_SPRG2):
kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
break;
- case KVM_INST_MFSPR_SPRG3:
+ case KVM_INST_MFSPR(SPRN_SPRG3):
kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
break;
- case KVM_INST_MFSPR_SRR0:
+ case KVM_INST_MFSPR(SPRN_SRR0):
kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
break;
- case KVM_INST_MFSPR_SRR1:
+ case KVM_INST_MFSPR(SPRN_SRR1):
kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
break;
- case KVM_INST_MFSPR_DAR:
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MFSPR(SPRN_DAR):
+#endif
kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
break;
- case KVM_INST_MFSPR_DSISR:
+ case KVM_INST_MFSPR(SPRN_DSISR):
kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
break;
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MFSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MFSPR(SPRN_SPRG4):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG4R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG5):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG5R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG6):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG6R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG7):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG7R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt);
+ break;
+#endif
+
+ case KVM_INST_MFSPR(SPRN_PIR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt);
+ break;
+
+
/* Stores */
- case KVM_INST_MTSPR_SPRG0:
+ case KVM_INST_MTSPR(SPRN_SPRG0):
kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
break;
- case KVM_INST_MTSPR_SPRG1:
+ case KVM_INST_MTSPR(SPRN_SPRG1):
kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
break;
- case KVM_INST_MTSPR_SPRG2:
+ case KVM_INST_MTSPR(SPRN_SPRG2):
kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
break;
- case KVM_INST_MTSPR_SPRG3:
+ case KVM_INST_MTSPR(SPRN_SPRG3):
kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
break;
- case KVM_INST_MTSPR_SRR0:
+ case KVM_INST_MTSPR(SPRN_SRR0):
kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
break;
- case KVM_INST_MTSPR_SRR1:
+ case KVM_INST_MTSPR(SPRN_SRR1):
kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
break;
- case KVM_INST_MTSPR_DAR:
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MTSPR(SPRN_DAR):
+#endif
kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
break;
- case KVM_INST_MTSPR_DSISR:
+ case KVM_INST_MTSPR(SPRN_DSISR):
kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
break;
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MTSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MTSPR(SPRN_SPRG4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG5):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(esr), inst_rt);
+ break;
+#endif
/* Nops */
case KVM_INST_TLBSYNC:
@@ -444,6 +633,11 @@ static void kvm_check_ins(u32 *inst, u32 features)
case KVM_INST_MTMSRD_L0:
kvm_patch_ins_mtmsr(inst, inst_rt);
break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEE:
+ kvm_patch_ins_wrtee(inst, inst_rt, 0);
+ break;
+#endif
}
switch (inst_no_rt & ~KVM_MASK_RB) {
@@ -461,13 +655,19 @@ static void kvm_check_ins(u32 *inst, u32 features)
switch (_inst) {
#ifdef CONFIG_BOOKE
case KVM_INST_WRTEEI_0:
+ kvm_patch_ins_wrteei_0(inst);
+ break;
+
case KVM_INST_WRTEEI_1:
- kvm_patch_ins_wrteei(inst);
+ kvm_patch_ins_wrtee(inst, 0, 1);
break;
#endif
}
}
+extern u32 kvm_template_start[];
+extern u32 kvm_template_end[];
+
static void kvm_use_magic_page(void)
{
u32 *p;
@@ -488,8 +688,23 @@ static void kvm_use_magic_page(void)
start = (void*)_stext;
end = (void*)_etext;
- for (p = start; p < end; p++)
+ /*
+ * Being interrupted in the middle of patching would
+ * be bad for SPRG4-7, which KVM can't keep in sync
+ * with emulated accesses because reads don't trap.
+ */
+ local_irq_disable();
+
+ for (p = start; p < end; p++) {
+ /* Avoid patching the template code */
+ if (p >= kvm_template_start && p < kvm_template_end) {
+ p = kvm_template_end - 1;
+ continue;
+ }
kvm_check_ins(p, features);
+ }
+
+ local_irq_enable();
printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
kvm_patching_worked ? "worked" : "failed");
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index f2b1b2523e61..e291cf3cf954 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors: Alexander Graf <agraf@suse.de>
*/
@@ -65,6 +66,9 @@ kvm_hypercall_start:
shared->critical == r1 and r2 is always != r1 */ \
STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+.global kvm_template_start
+kvm_template_start:
+
.global kvm_emulate_mtmsrd
kvm_emulate_mtmsrd:
@@ -167,6 +171,9 @@ maybe_stay_in_guest:
kvm_emulate_mtmsr_reg2:
ori r30, r0, 0
+ /* Put MSR into magic page because we don't call mtmsr */
+ STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
/* Check if we have to fetch an interrupt */
lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
cmpwi r31, 0
@@ -174,15 +181,10 @@ kvm_emulate_mtmsr_reg2:
/* Check if we may trigger an interrupt */
andi. r31, r30, MSR_EE
- beq no_mtmsr
-
- b do_mtmsr
+ bne do_mtmsr
no_mtmsr:
- /* Put MSR into magic page because we don't call mtmsr */
- STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
SCRATCH_RESTORE
/* Go back to caller */
@@ -210,24 +212,80 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+/* also used for wrteei 1 */
+.global kvm_emulate_wrtee
+kvm_emulate_wrtee:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Insert new MSR[EE] */
+kvm_emulate_wrtee_reg:
+ ori r30, r0, 0
+ rlwimi r31, r30, 0, MSR_EE
+
+ /*
+ * If MSR[EE] is now set, check for a pending interrupt.
+ * We could skip this if MSR[EE] was already on, but that
+ * should be rare, so don't bother.
+ */
+ andi. r30, r30, MSR_EE
+
+ /* Put MSR into magic page because we don't call wrtee */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ beq no_wrtee
+
+ /* Check if we have to fetch an interrupt */
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r30, 0
+ bne do_wrtee
+
+no_wrtee:
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrtee_branch:
+ b .
+
+do_wrtee:
+ SCRATCH_RESTORE
+ /* Just fire off the wrtee if it's critical */
+kvm_emulate_wrtee_orig_ins:
+ wrtee r0
-.global kvm_emulate_wrteei
-kvm_emulate_wrteei:
+ b kvm_emulate_wrtee_branch
+kvm_emulate_wrtee_end:
+
+.global kvm_emulate_wrtee_branch_offs
+kvm_emulate_wrtee_branch_offs:
+ .long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_reg_offs
+kvm_emulate_wrtee_reg_offs:
+ .long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_orig_ins_offs
+kvm_emulate_wrtee_orig_ins_offs:
+ .long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_len
+kvm_emulate_wrtee_len:
+ .long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrteei_0
+kvm_emulate_wrteei_0:
SCRATCH_SAVE
/* Fetch old MSR in r31 */
LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
/* Remove MSR_EE from old MSR */
- li r30, 0
- ori r30, r30, MSR_EE
- andc r31, r31, r30
-
- /* OR new MSR_EE onto the old MSR */
-kvm_emulate_wrteei_ee:
- ori r31, r31, 0
+ rlwinm r31, r31, 0, ~MSR_EE
/* Write new MSR value back */
STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
@@ -235,22 +293,17 @@ kvm_emulate_wrteei_ee:
SCRATCH_RESTORE
/* Go back to caller */
-kvm_emulate_wrteei_branch:
+kvm_emulate_wrteei_0_branch:
b .
-kvm_emulate_wrteei_end:
-
-.global kvm_emulate_wrteei_branch_offs
-kvm_emulate_wrteei_branch_offs:
- .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4
+kvm_emulate_wrteei_0_end:
-.global kvm_emulate_wrteei_ee_offs
-kvm_emulate_wrteei_ee_offs:
- .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4
-
-.global kvm_emulate_wrteei_len
-kvm_emulate_wrteei_len:
- .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
+.global kvm_emulate_wrteei_0_branch_offs
+kvm_emulate_wrteei_0_branch_offs:
+ .long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
+.global kvm_emulate_wrteei_0_len
+kvm_emulate_wrteei_0_len:
+ .long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -300,3 +353,6 @@ kvm_emulate_mtsrin_orig_ins_offs:
.global kvm_emulate_mtsrin_len
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+
+.global kvm_template_end
+kvm_template_end:
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index ac12bd80ad95..f5725bce9ed2 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -30,7 +30,6 @@
#include <asm/hvcall.h>
#include <asm/firmware.h>
#include <asm/rtas.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/prom.h>
#include <asm/vdso_datapage.h>
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d3114a71dd32..786a2700ec2d 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -26,7 +26,6 @@
#include <linux/cuda.h>
#include <linux/pmu.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/pci-bridge.h>
#include <asm/irq.h>
#include <asm/pmac_feature.h>
@@ -43,6 +42,7 @@
#include <asm/signal.h>
#include <asm/dcr.h>
#include <asm/ftrace.h>
+#include <asm/switch_to.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e40707032ac3..f88698c0f332 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -41,14 +41,16 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/time.h>
+#include <asm/runlatch.h>
#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 89e850af3dd6..f191bf02943a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -41,7 +41,6 @@
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/smp.h>
-#include <asm/system.h>
#include <asm/mmu.h>
#include <asm/paca.h>
#include <asm/pgtable.h>
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index e2d599048142..99860273211b 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -35,7 +35,6 @@
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/smp.h>
-#include <asm/system.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/pci.h>
@@ -447,7 +446,7 @@ static void __init __attribute__((noreturn)) prom_panic(const char *reason)
if (RELOC(of_platform) == PLATFORM_POWERMAC)
asm("trap\n");
- /* ToDo: should put up an SRC here on p/iSeries */
+ /* ToDo: should put up an SRC here on pSeries */
call_prom("exit", 0, 0);
for (;;) /* should never get here */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 5b43325402bc..8d8e028893be 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -36,7 +36,7 @@
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
+#include <asm/switch_to.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 69c4be917d07..469349d14a97 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -32,7 +32,7 @@
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
+#include <asm/switch_to.h>
/*
* does not yet catch signals sent when the child dies.
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 9f843cdfee9e..fcec38241f79 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -33,7 +33,6 @@
#include <asm/firmware.h>
#include <asm/page.h>
#include <asm/param.h>
-#include <asm/system.h>
#include <asm/delay.h>
#include <asm/uaccess.h>
#include <asm/udbg.h>
@@ -868,6 +867,40 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
}
#endif
+/**
+ * Find a specific pseries error log in an RTAS extended event log.
+ * @log: RTAS error/event log
+ * @section_id: two character section identifier
+ *
+ * Returns a pointer to the specified errorlog or NULL if not found.
+ */
+struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id)
+{
+ struct rtas_ext_event_log_v6 *ext_log =
+ (struct rtas_ext_event_log_v6 *)log->buffer;
+ struct pseries_errorlog *sect;
+ unsigned char *p, *log_end;
+
+ /* Check that we understand the format */
+ if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+ ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+ ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+ return NULL;
+
+ log_end = log->buffer + log->extended_log_length;
+ p = ext_log->vendor_log;
+
+ while (p < log_end) {
+ sect = (struct pseries_errorlog *)p;
+ if (sect->id == section_id)
+ return sect;
+ p += sect->length;
+ }
+
+ return NULL;
+}
+
asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
{
struct rtas_args args;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b0ebdeab9494..afd4f051f3f2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -51,7 +51,6 @@
#include <asm/btext.h>
#include <asm/nvram.h>
#include <asm/setup.h>
-#include <asm/system.h>
#include <asm/rtas.h>
#include <asm/iommu.h>
#include <asm/serial.h>
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ac7610815113..9825f29d1faf 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -30,7 +30,6 @@
#include <asm/btext.h>
#include <asm/machdep.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/pmac_feature.h>
#include <asm/sections.h>
#include <asm/nvram.h>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4cb8f1e9d044..389bd4f0cdb1 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -52,7 +52,6 @@
#include <asm/btext.h>
#include <asm/nvram.h>
#include <asm/setup.h>
-#include <asm/system.h>
#include <asm/rtas.h>
#include <asm/iommu.h>
#include <asm/serial.h>
@@ -598,7 +597,7 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff */
mmu_context_init();
- kvm_rma_init();
+ kvm_linear_init();
ppc64_boot_msg(0x15, "Setup Done");
}
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 7006b7f4267a..651c5963662b 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -15,6 +15,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
+#include <asm/debug.h>
#include "signal.h"
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index e061ef5dd449..45eb998557f8 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -43,6 +43,7 @@
#include <asm/syscalls.h>
#include <asm/sigcontext.h>
#include <asm/vdso.h>
+#include <asm/switch_to.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index a50b5ec281dc..2692efdb154e 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -33,6 +33,7 @@
#include <asm/cacheflush.h>
#include <asm/syscalls.h>
#include <asm/vdso.h>
+#include <asm/switch_to.h>
#include "signal.h"
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 46695febc09f..d9f94410fd7f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -43,12 +43,12 @@
#include <asm/machdep.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
-#include <asm/system.h>
#include <asm/mpic.h>
#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
+#include <asm/debug.h>
#ifdef DEBUG
#include <asm/udbg.h>
diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c
index af0e8290b4fc..29b2f81dd709 100644
--- a/arch/powerpc/kernel/softemu8xx.c
+++ b/arch/powerpc/kernel/softemu8xx.c
@@ -26,7 +26,6 @@
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
/* Eventually we may need a look-up table, but this works for now.
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index 641f9adc6205..eae33e10b65f 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -10,9 +10,9 @@
*/
#include <linux/sched.h>
-#include <asm/system.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
void save_processor_state(void)
{
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 168e88480223..0e899e47c325 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -6,7 +6,6 @@
* GPLv2
*/
-#include <asm/system.h>
#include <asm/iommu.h>
#include <linux/irq.h>
#include <linux/sched.h>
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 4e5bf1edc0f2..81c570633ead 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -50,6 +50,7 @@
#include <asm/mmu_context.h>
#include <asm/ppc-pci.h>
#include <asm/syscalls.h>
+#include <asm/switch_to.h>
asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 0c683d376b1c..3529446c2abd 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -17,7 +17,6 @@
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
-#include <asm/system.h>
#include "cacheinfo.h"
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a750409ccc4e..6aa0c663e247 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -39,7 +39,6 @@
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -58,6 +57,8 @@
#include <asm/ppc-opcode.h>
#include <asm/rio.h>
#include <asm/fadump.h>
+#include <asm/switch_to.h>
+#include <asm/debug.h>
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 57fa2c0a531c..c39c1ca77f46 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -46,9 +46,6 @@ void __init udbg_early_init(void)
#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
/* Maple real mode debug */
udbg_init_maple_realmode();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_ISERIES)
- /* For iSeries - hit Ctrl-x Ctrl-x to see the output */
- udbg_init_iseries();
#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT)
udbg_init_debug_beat();
#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d36ee1055f88..9eb5b9b536a7 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -24,7 +24,6 @@
#include <linux/memblock.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -721,10 +720,10 @@ static int __init vdso_init(void)
vdso_data->version.minor = SYSTEMCFG_MINOR;
vdso_data->processor = mfspr(SPRN_PVR);
/*
- * Fake the old platform number for pSeries and iSeries and add
+ * Fake the old platform number for pSeries and add
* in LPAR bit if necessary
*/
- vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100;
+ vdso_data->platform = 0x100;
if (firmware_has_feature(FW_FEATURE_LPAR))
vdso_data->platform |= 1;
vdso_data->physicalMemorySize = memblock_phys_mem_size();
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index bca3fc427b45..b2f7c8480bf6 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1159,17 +1159,21 @@ static int vio_bus_remove(struct device *dev)
* vio_register_driver: - Register a new vio driver
* @drv: The vio_driver structure to be registered.
*/
-int vio_register_driver(struct vio_driver *viodrv)
+int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
+ const char *mod_name)
{
- printk(KERN_DEBUG "%s: driver %s registering\n", __func__,
- viodrv->driver.name);
+ pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
/* fill in 'struct driver' fields */
+ viodrv->driver.name = viodrv->name;
+ viodrv->driver.pm = viodrv->pm;
viodrv->driver.bus = &vio_bus_type;
+ viodrv->driver.owner = owner;
+ viodrv->driver.mod_name = mod_name;
return driver_register(&viodrv->driver);
}
-EXPORT_SYMBOL(vio_register_driver);
+EXPORT_SYMBOL(__vio_register_driver);
/**
* vio_unregister_driver - Remove registration of vio driver.
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 78133deb4b64..8f64709ae331 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64
config KVM_BOOK3S_64_HV
bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
depends on KVM_BOOK3S_64
+ select MMU_NOTIFIER
---help---
Support running unmodified book3s_64 guest kernels in
virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index e41ac6f7dcf1..7d54f4ed6d96 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
return true;
}
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned long old_pending = vcpu->arch.pending_exceptions;
@@ -423,10 +423,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->sprg1 = vcpu->arch.shared->sprg1;
regs->sprg2 = vcpu->arch.shared->sprg2;
regs->sprg3 = vcpu->arch.shared->sprg3;
- regs->sprg4 = vcpu->arch.sprg4;
- regs->sprg5 = vcpu->arch.sprg5;
- regs->sprg6 = vcpu->arch.sprg6;
- regs->sprg7 = vcpu->arch.sprg7;
+ regs->sprg4 = vcpu->arch.shared->sprg4;
+ regs->sprg5 = vcpu->arch.shared->sprg5;
+ regs->sprg6 = vcpu->arch.shared->sprg6;
+ regs->sprg7 = vcpu->arch.shared->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -450,10 +450,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
vcpu->arch.shared->sprg3 = regs->sprg3;
- vcpu->arch.sprg4 = regs->sprg4;
- vcpu->arch.sprg5 = regs->sprg5;
- vcpu->arch.sprg6 = regs->sprg6;
- vcpu->arch.sprg7 = regs->sprg7;
+ vcpu->arch.shared->sprg4 = regs->sprg4;
+ vcpu->arch.shared->sprg5 = regs->sprg5;
+ vcpu->arch.shared->sprg6 = regs->sprg6;
+ vcpu->arch.shared->sprg7 = regs->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -477,41 +477,10 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Get (and clear) the dirty memory log for a memory slot.
- */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+void kvmppc_decrementer_func(unsigned long data)
{
- struct kvm_memory_slot *memslot;
- struct kvm_vcpu *vcpu;
- ulong ga, ga_end;
- int is_dirty = 0;
- int r;
- unsigned long n;
-
- mutex_lock(&kvm->slots_lock);
-
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
- if (r)
- goto out;
-
- /* If nothing is dirty, don't bother messing with page tables. */
- if (is_dirty) {
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
- ga = memslot->base_gfn << PAGE_SHIFT;
- ga_end = ga + (memslot->npages << PAGE_SHIFT);
-
- kvm_for_each_vcpu(n, vcpu, kvm)
- kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
-
- n = kvm_dirty_bitmap_bytes(memslot);
- memset(memslot->dirty_bitmap, 0, n);
- }
-
- r = 0;
-out:
- mutex_unlock(&kvm->slots_lock);
- return r;
+ kvmppc_core_queue_dec(vcpu);
+ kvm_vcpu_kick(vcpu);
}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 9fecbfbce773..f922c29bb234 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -151,13 +151,15 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
bool primary = false;
bool evict = false;
struct hpte_cache *pte;
+ int r = 0;
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
if (is_error_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
orig_pte->eaddr);
- return -EINVAL;
+ r = -EINVAL;
+ goto out;
}
hpaddr <<= PAGE_SHIFT;
@@ -249,7 +251,8 @@ next_pteg:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
- return 0;
+out:
+ return r;
}
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -297,12 +300,14 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
u64 gvsid;
u32 sr;
struct kvmppc_sid_map *map;
- struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ int r = 0;
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
/* Invalidate an entry */
svcpu->sr[esid] = SR_INVALID;
- return -ENOENT;
+ r = -ENOENT;
+ goto out;
}
map = find_sid_vsid(vcpu, gvsid);
@@ -315,17 +320,21 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
- return 0;
+out:
+ svcpu_put(svcpu);
+ return r;
}
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
int i;
- struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
svcpu->sr[i] = SR_INVALID;
+
+ svcpu_put(svcpu);
}
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index fa2f08434ba5..6f87f39a1ac2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -88,12 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
int vflags = 0;
int attempt = 0;
struct kvmppc_sid_map *map;
+ int r = 0;
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
if (is_error_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
- return -EINVAL;
+ r = -EINVAL;
+ goto out;
}
hpaddr <<= PAGE_SHIFT;
hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
@@ -110,7 +112,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
vsid, orig_pte->eaddr);
WARN_ON(true);
- return -EINVAL;
+ r = -EINVAL;
+ goto out;
}
vsid = map->host_vsid;
@@ -131,8 +134,10 @@ map_again:
/* In case we tried normal mapping already, let's nuke old entries */
if (attempt > 1)
- if (ppc_md.hpte_remove(hpteg) < 0)
- return -1;
+ if (ppc_md.hpte_remove(hpteg) < 0) {
+ r = -1;
+ goto out;
+ }
ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
@@ -162,7 +167,8 @@ map_again:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
}
- return 0;
+out:
+ return r;
}
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
@@ -207,25 +213,30 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
int i;
int max_slb_size = 64;
int found_inval = -1;
int r;
- if (!to_svcpu(vcpu)->slb_max)
- to_svcpu(vcpu)->slb_max = 1;
+ if (!svcpu->slb_max)
+ svcpu->slb_max = 1;
/* Are we overwriting? */
- for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) {
- if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V))
+ for (i = 1; i < svcpu->slb_max; i++) {
+ if (!(svcpu->slb[i].esid & SLB_ESID_V))
found_inval = i;
- else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid)
- return i;
+ else if ((svcpu->slb[i].esid & ESID_MASK) == esid) {
+ r = i;
+ goto out;
+ }
}
/* Found a spare entry that was invalidated before */
- if (found_inval > 0)
- return found_inval;
+ if (found_inval > 0) {
+ r = found_inval;
+ goto out;
+ }
/* No spare invalid entry, so create one */
@@ -233,30 +244,35 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
max_slb_size = mmu_slb_size;
/* Overflowing -> purge */
- if ((to_svcpu(vcpu)->slb_max) == max_slb_size)
+ if ((svcpu->slb_max) == max_slb_size)
kvmppc_mmu_flush_segments(vcpu);
- r = to_svcpu(vcpu)->slb_max;
- to_svcpu(vcpu)->slb_max++;
+ r = svcpu->slb_max;
+ svcpu->slb_max++;
+out:
+ svcpu_put(svcpu);
return r;
}
int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
u64 esid = eaddr >> SID_SHIFT;
u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
u64 slb_vsid = SLB_VSID_USER;
u64 gvsid;
int slb_index;
struct kvmppc_sid_map *map;
+ int r = 0;
slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
/* Invalidate an entry */
- to_svcpu(vcpu)->slb[slb_index].esid = 0;
- return -ENOENT;
+ svcpu->slb[slb_index].esid = 0;
+ r = -ENOENT;
+ goto out;
}
map = find_sid_vsid(vcpu, gvsid);
@@ -269,18 +285,22 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
slb_vsid &= ~SLB_VSID_KP;
slb_esid |= slb_index;
- to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
- to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
+ svcpu->slb[slb_index].esid = slb_esid;
+ svcpu->slb[slb_index].vsid = slb_vsid;
trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
- return 0;
+out:
+ svcpu_put(svcpu);
+ return r;
}
void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
{
- to_svcpu(vcpu)->slb_max = 1;
- to_svcpu(vcpu)->slb[0].esid = 0;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ svcpu->slb_max = 1;
+ svcpu->slb[0].esid = 0;
+ svcpu_put(svcpu);
}
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bc3a2ea94217..ddc485a529f2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,6 +23,7 @@
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -33,15 +34,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER 24
-#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
-#define HPT_HASH_MASK (HPT_NPTEG - 1)
-
-/* Pages in the VRMA are 16MB pages */
-#define VRMA_PAGE_ORDER 24
-#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
-
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
#define NR_LPIDS (LPID_RSVD + 1)
@@ -51,21 +43,41 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
{
unsigned long hpt;
unsigned long lpid;
+ struct revmap_entry *rev;
+ struct kvmppc_linear_info *li;
+
+ /* Allocate guest's hashed page table */
+ li = kvm_alloc_hpt();
+ if (li) {
+ /* using preallocated memory */
+ hpt = (ulong)li->base_virt;
+ kvm->arch.hpt_li = li;
+ } else {
+ /* using dynamic memory */
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+ __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
+ }
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
- HPT_ORDER - PAGE_SHIFT);
if (!hpt) {
pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
return -ENOMEM;
}
kvm->arch.hpt_virt = hpt;
+ /* Allocate reverse map array */
+ rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+ if (!rev) {
+ pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
+ goto out_freehpt;
+ }
+ kvm->arch.revmap = rev;
+
+ /* Allocate the guest's logical partition ID */
do {
lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
if (lpid >= NR_LPIDS) {
pr_err("kvm_alloc_hpt: No LPIDs free\n");
- free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
- return -ENOMEM;
+ goto out_freeboth;
}
} while (test_and_set_bit(lpid, lpid_inuse));
@@ -74,37 +86,64 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
return 0;
+
+ out_freeboth:
+ vfree(rev);
+ out_freehpt:
+ free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+ return -ENOMEM;
}
void kvmppc_free_hpt(struct kvm *kvm)
{
clear_bit(kvm->arch.lpid, lpid_inuse);
- free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+ vfree(kvm->arch.revmap);
+ if (kvm->arch.hpt_li)
+ kvm_release_hpt(kvm->arch.hpt_li);
+ else
+ free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+}
+
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize == 0x10000) ? 0x1000 : 0;
}
-void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
{
unsigned long i;
- unsigned long npages = kvm->arch.ram_npages;
- unsigned long pfn;
- unsigned long *hpte;
- unsigned long hash;
- struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
+ unsigned long npages;
+ unsigned long hp_v, hp_r;
+ unsigned long addr, hash;
+ unsigned long psize;
+ unsigned long hp0, hp1;
+ long ret;
- if (!pginfo)
- return;
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
/* VRMA can't be > 1TB */
- if (npages > 1ul << (40 - kvm->arch.ram_porder))
- npages = 1ul << (40 - kvm->arch.ram_porder);
+ if (npages > 1ul << (40 - porder))
+ npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
for (i = 0; i < npages; ++i) {
- pfn = pginfo[i].pfn;
- if (!pfn)
- break;
+ addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
/*
@@ -113,15 +152,15 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
* at most one HPTE per HPTEG, we just assume entry 7
* is available and use it.
*/
- hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
- hpte += 7 * 2;
- /* HPTE low word - RPN, protection, etc. */
- hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
- HPTE_R_M | PP_RWXX;
- wmb();
- hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
- (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
- HPTE_V_LARGE | HPTE_V_VALID;
+ hash = (hash << 3) + 7;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
+ ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+ if (ret != H_SUCCESS) {
+ pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
+ addr, ret);
+ break;
+ }
}
}
@@ -158,10 +197,814 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
}
+/*
+ * This is called to get a reference to a guest page if there isn't
+ * one already in the kvm->arch.slot_phys[][] arrays.
+ */
+static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
+{
+ unsigned long start;
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
+ unsigned long *physp;
+ unsigned int is_io, got, pgorder;
+ struct vm_area_struct *vma;
+ unsigned long pfn, i, npages;
+
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return -EINVAL;
+ if (physp[gfn - memslot->base_gfn])
+ return 0;
+
+ is_io = 0;
+ got = 0;
+ page = NULL;
+ pgsize = psize;
+ err = -EINVAL;
+ start = gfn_to_hva_memslot(memslot, gfn);
+
+ /* Instantiate and get the page we want access to */
+ np = get_user_pages_fast(start, 1, 1, pages);
+ if (np != 1) {
+ /* Look up the vma for the page */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, start);
+ if (!vma || vma->vm_start > start ||
+ start + psize > vma->vm_end ||
+ !(vma->vm_flags & VM_PFNMAP))
+ goto up_err;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ /* check alignment of pfn vs. requested page size */
+ if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
+ goto up_err;
+ up_read(&current->mm->mmap_sem);
+
+ } else {
+ page = pages[0];
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ page = hpage;
+ }
+ }
+ if (s < psize)
+ goto out;
+ pfn = page_to_pfn(page);
+ }
+
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) +
+ got + is_io + pgorder;
+ got = 0;
+ }
+ }
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
+
+ out:
+ if (got) {
+ if (PageHuge(page))
+ page = compound_head(page);
+ put_page(page);
+ }
+ return err;
+
+ up_err:
+ up_read(&current->mm->mmap_sem);
+ return err;
+}
+
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long psize, gpa, gfn;
+ struct kvm_memory_slot *memslot;
+ long ret;
+
+ if (kvm->arch.using_mmu_notifiers)
+ goto do_insert;
+
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
+ return H_PARAMETER;
+ }
+
+ do_insert:
+ /* Protect linux PTE lookup from page table destruction */
+ rcu_read_lock_sched(); /* this disables preemption too */
+ vcpu->arch.pgdir = current->mm->pgd;
+ ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+ rcu_read_unlock_sched();
+ if (ret == H_TOO_HARD) {
+ /* this can't happen */
+ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
+ ret = H_RESOURCE; /* or something */
+ }
+ return ret;
+
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
+ gva_t eaddr)
+{
+ u64 mask;
+ int i;
+
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
+ continue;
+
+ if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
+ mask = ESID_MASK_1T;
+ else
+ mask = ESID_MASK;
+
+ if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
+ return &vcpu->arch.slb[i];
+ }
+ return NULL;
+}
+
+static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
+ unsigned long ea)
+{
+ unsigned long ra_mask;
+
+ ra_mask = hpte_page_size(v, r) - 1;
+ return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
+}
+
static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *gpte, bool data)
+ struct kvmppc_pte *gpte, bool data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_slb *slbe;
+ unsigned long slb_v;
+ unsigned long pp, key;
+ unsigned long v, gr;
+ unsigned long *hptep;
+ int index;
+ int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+
+ /* Get SLB entry */
+ if (virtmode) {
+ slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
+ if (!slbe)
+ return -EINVAL;
+ slb_v = slbe->origv;
+ } else {
+ /* real mode access */
+ slb_v = vcpu->kvm->arch.vrma_slb_v;
+ }
+
+ /* Find the HPTE in the hash table */
+ index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
+ HPTE_V_VALID | HPTE_V_ABSENT);
+ if (index < 0)
+ return -ENOENT;
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hptep[0] & ~HPTE_V_HVLOCK;
+ gr = kvm->arch.revmap[index].guest_rpte;
+
+ /* Unlock the HPTE */
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = v;
+
+ gpte->eaddr = eaddr;
+ gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
+
+ /* Get PP bits and key for permission check */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ key &= slb_v;
+
+ /* Calculate permissions */
+ gpte->may_read = hpte_read_permission(pp, key);
+ gpte->may_write = hpte_write_permission(pp, key);
+ gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
+
+ /* Storage key permission check for POWER7 */
+ if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+ int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (amrfield & 1)
+ gpte->may_read = 0;
+ if (amrfield & 2)
+ gpte->may_write = 0;
+ }
+
+ /* Get the guest physical address */
+ gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
+ return 0;
+}
+
+/*
+ * Quick test for whether an instruction is a load or a store.
+ * If the instruction is a load or a store, then this will indicate
+ * which it is, at least on server processors. (Embedded processors
+ * have some external PID instructions that don't follow the rule
+ * embodied here.) If the instruction isn't a load or store, then
+ * this doesn't return anything useful.
+ */
+static int instruction_is_store(unsigned int instr)
+{
+ unsigned int mask;
+
+ mask = 0x10000000;
+ if ((instr & 0xfc000000) == 0x7c000000)
+ mask = 0x100; /* major opcode 31 */
+ return (instr & mask) != 0;
+}
+
+static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long gpa, int is_store)
+{
+ int ret;
+ u32 last_inst;
+ unsigned long srr0 = kvmppc_get_pc(vcpu);
+
+ /* We try to load the last instruction. We don't let
+ * emulate_instruction do it as it doesn't check what
+ * kvmppc_ld returns.
+ * If we fail, we just return to the guest and try executing it again.
+ */
+ if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
+ ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+ if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
+ return RESUME_GUEST;
+ vcpu->arch.last_inst = last_inst;
+ }
+
+ /*
+ * WARNING: We do not know for sure whether the instruction we just
+ * read from memory is the same that caused the fault in the first
+ * place. If the instruction we read is neither an load or a store,
+ * then it can't access memory, so we don't need to worry about
+ * enforcing access permissions. So, assuming it is a load or
+ * store, we just check that its direction (load or store) is
+ * consistent with the original fault, since that's what we
+ * checked the access permissions against. If there is a mismatch
+ * we just return and retry the instruction.
+ */
+
+ if (instruction_is_store(vcpu->arch.last_inst) != !!is_store)
+ return RESUME_GUEST;
+
+ /*
+ * Emulated accesses are emulated by looking at the hash for
+ * translation once, then performing the access later. The
+ * translation could be invalidated in the meantime in which
+ * point performing the subsequent memory access on the old
+ * physical address could possibly be a security hole for the
+ * guest (but not the host).
+ *
+ * This is less of an issue for MMIO stores since they aren't
+ * globally visible. It could be an issue for MMIO loads to
+ * a certain extent but we'll ignore it for now.
+ */
+
+ vcpu->arch.paddr_accessed = gpa;
+ return kvmppc_emulate_mmio(run, vcpu);
+}
+
+int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long *hptep, hpte[3], r;
+ unsigned long mmu_seq, psize, pte_size;
+ unsigned long gfn, hva, pfn;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ struct revmap_entry *rev;
+ struct page *page, *pages[1];
+ long index, ret, npages;
+ unsigned long is_io;
+ unsigned int writing, write_ok;
+ struct vm_area_struct *vma;
+ unsigned long rcbits;
+
+ /*
+ * Real-mode code has already searched the HPT and found the
+ * entry we're interested in. Lock the entry and check that
+ * it hasn't changed. If it has, just return and re-execute the
+ * instruction.
+ */
+ if (ea != vcpu->arch.pgfault_addr)
+ return RESUME_GUEST;
+ index = vcpu->arch.pgfault_index;
+ hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ rev = &kvm->arch.revmap[index];
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
+ hpte[1] = hptep[1];
+ hpte[2] = r = rev->guest_rpte;
+ asm volatile("lwsync" : : : "memory");
+ hptep[0] = hpte[0];
+ preempt_enable();
+
+ if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
+ hpte[1] != vcpu->arch.pgfault_hpte[1])
+ return RESUME_GUEST;
+
+ /* Translate the logical address and get the page */
+ psize = hpte_page_size(hpte[0], r);
+ gfn = hpte_rpn(r, psize);
+ memslot = gfn_to_memslot(kvm, gfn);
+
+ /* No memslot means it's an emulated MMIO region */
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa,
+ dsisr & DSISR_ISSTORE);
+ }
+
+ if (!kvm->arch.using_mmu_notifiers)
+ return -EFAULT; /* should never get here */
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ is_io = 0;
+ pfn = 0;
+ page = NULL;
+ pte_size = PAGE_SIZE;
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+ /* If writing != 0, then the HPTE must allow writing, if we get here */
+ write_ok = writing;
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, writing, pages);
+ if (npages < 1) {
+ /* Check if it's an I/O mapping */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
+ (vma->vm_flags & VM_PFNMAP)) {
+ pfn = vma->vm_pgoff +
+ ((hva - vma->vm_start) >> PAGE_SHIFT);
+ pte_size = psize;
+ is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ write_ok = vma->vm_flags & VM_WRITE;
+ }
+ up_read(&current->mm->mmap_sem);
+ if (!pfn)
+ return -EFAULT;
+ } else {
+ page = pages[0];
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ pte_size <<= compound_order(page);
+ }
+ /* if the guest wants write access, see if that is OK */
+ if (!writing && hpte_is_writable(r)) {
+ pte_t *ptep, pte;
+
+ /*
+ * We need to protect against page table destruction
+ * while looking up and updating the pte.
+ */
+ rcu_read_lock_sched();
+ ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+ hva, NULL);
+ if (ptep && pte_present(*ptep)) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1);
+ if (pte_write(pte))
+ write_ok = 1;
+ }
+ rcu_read_unlock_sched();
+ }
+ pfn = page_to_pfn(page);
+ }
+
+ ret = -EFAULT;
+ if (psize > pte_size)
+ goto out_put;
+
+ /* Check WIMG vs. the actual page we're accessing */
+ if (!hpte_cache_flags_ok(r, is_io)) {
+ if (is_io)
+ return -EFAULT;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
+ }
+
+ /* Set the HPTE to point to pfn */
+ r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
+ if (hpte_is_writable(r) && !write_ok)
+ r = hpte_make_readonly(r);
+ ret = RESUME_GUEST;
+ preempt_disable();
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+ if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
+ rev->guest_rpte != hpte[2])
+ /* HPTE has been changed under us; let the guest retry */
+ goto out_unlock;
+ hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+
+ rmap = &memslot->rmap[gfn - memslot->base_gfn];
+ lock_rmap(rmap);
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+ ret = RESUME_GUEST;
+ if (mmu_notifier_retry(vcpu, mmu_seq)) {
+ unlock_rmap(rmap);
+ goto out_unlock;
+ }
+
+ /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+
+ if (hptep[0] & HPTE_V_VALID) {
+ /* HPTE was previously valid, so we need to invalidate it */
+ unlock_rmap(rmap);
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, index);
+ /* don't lose previous R and C bits */
+ r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+ }
+
+ hptep[1] = r;
+ eieio();
+ hptep[0] = hpte[0];
+ asm volatile("ptesync" : : : "memory");
+ preempt_enable();
+ if (page && hpte_is_writable(r))
+ SetPageDirty(page);
+
+ out_put:
+ if (page)
+ put_page(page);
+ return ret;
+
+ out_unlock:
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ goto out_put;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn))
+{
+ int ret;
+ int retval = 0;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+
+ ret = handler(kvm, &memslot->rmap[gfn_offset],
+ memslot->base_gfn + gfn_offset);
+ retval |= ret;
+ }
+ }
+
+ return retval;
+}
+
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long h, i, j;
+ unsigned long *hptep;
+ unsigned long ptel, psize, rcbits;
+
+ for (;;) {
+ lock_rmap(rmapp);
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ break;
+ }
+
+ /*
+ * To avoid an ABBA deadlock with the HPTE lock bit,
+ * we can't spin on the HPTE lock while holding the
+ * rmap chain lock.
+ */
+ i = *rmapp & KVMPPC_RMAP_INDEX;
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ continue;
+ }
+ j = rev[i].forw;
+ if (j == i) {
+ /* chain is now empty */
+ *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ } else {
+ /* remove i from chain */
+ h = rev[i].back;
+ rev[h].forw = j;
+ rev[j].back = h;
+ rev[i].forw = rev[i].back = i;
+ *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+ }
+
+ /* Now check and modify the HPTE */
+ ptel = rev[i].guest_rpte;
+ psize = hpte_page_size(hptep[0], ptel);
+ if ((hptep[0] & HPTE_V_VALID) &&
+ hpte_rpn(ptel, psize) == gfn) {
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ /* Harvest R and C */
+ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
+ *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ rev[i].guest_rpte = ptel | rcbits;
+ }
+ unlock_rmap(rmapp);
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ }
+ return 0;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (kvm->arch.using_mmu_notifiers)
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return 0;
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hptep;
+ int ret = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED) {
+ *rmapp &= ~KVMPPC_RMAP_REFERENCED;
+ ret = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return ret;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ /* If this HPTE isn't referenced, ignore it */
+ if (!(hptep[1] & HPTE_R_R))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
+ kvmppc_clear_ref_hpte(kvm, hptep, i);
+ rev[i].guest_rpte |= HPTE_R_R;
+ ret = 1;
+ }
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+}
+
+static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long gfn)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hp;
+ int ret = 1;
+
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ return 1;
+
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_REFERENCED)
+ goto out;
+
+ if (*rmapp & KVMPPC_RMAP_PRESENT) {
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+ if (hp[1] & HPTE_R_R)
+ goto out;
+ } while ((i = j) != head);
+ }
+ ret = 0;
+
+ out:
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ if (!kvm->arch.using_mmu_notifiers)
+ return 0;
+ return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
- return -ENOENT;
+ if (!kvm->arch.using_mmu_notifiers)
+ return;
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+}
+
+static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+{
+ struct revmap_entry *rev = kvm->arch.revmap;
+ unsigned long head, i, j;
+ unsigned long *hptep;
+ int ret = 0;
+
+ retry:
+ lock_rmap(rmapp);
+ if (*rmapp & KVMPPC_RMAP_CHANGED) {
+ *rmapp &= ~KVMPPC_RMAP_CHANGED;
+ ret = 1;
+ }
+ if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
+ unlock_rmap(rmapp);
+ return ret;
+ }
+
+ i = head = *rmapp & KVMPPC_RMAP_INDEX;
+ do {
+ hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+ j = rev[i].forw;
+
+ if (!(hptep[1] & HPTE_R_C))
+ continue;
+
+ if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
+ /* unlock rmap before spinning on the HPTE lock */
+ unlock_rmap(rmapp);
+ while (hptep[0] & HPTE_V_HVLOCK)
+ cpu_relax();
+ goto retry;
+ }
+
+ /* Now check and modify the HPTE */
+ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
+ /* need to make it temporarily absent to clear C */
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~HPTE_R_C;
+ eieio();
+ hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
+ rev[i].guest_rpte |= HPTE_R_C;
+ ret = 1;
+ }
+ hptep[0] &= ~HPTE_V_HVLOCK;
+ } while ((i = j) != head);
+
+ unlock_rmap(rmapp);
+ return ret;
+}
+
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ unsigned long i;
+ unsigned long *rmapp, *map;
+
+ preempt_disable();
+ rmapp = memslot->rmap;
+ map = memslot->dirty_bitmap;
+ for (i = 0; i < memslot->npages; ++i) {
+ if (kvm_test_clear_dirty(kvm, rmapp))
+ __set_bit_le(i, map);
+ ++rmapp;
+ }
+ preempt_enable();
+ return 0;
+}
+
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long *nb_ret)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ struct page *page, *pages[1];
+ int npages;
+ unsigned long hva, psize, offset;
+ unsigned long pa;
+ unsigned long *physp;
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return NULL;
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return NULL;
+ physp += gfn - memslot->base_gfn;
+ pa = *physp;
+ if (!pa) {
+ if (kvmppc_get_guest_page(kvm, gfn, memslot,
+ PAGE_SIZE) < 0)
+ return NULL;
+ pa = *physp;
+ }
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ } else {
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, 1, pages);
+ if (npages < 1)
+ return NULL;
+ page = pages[0];
+ }
+ psize = PAGE_SIZE;
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ psize <<= compound_order(page);
+ }
+ if (!kvm->arch.using_mmu_notifiers)
+ get_page(page);
+ offset = gpa & (psize - 1);
+ if (nb_ret)
+ *nb_ret = psize - offset;
+ return page_address(page) + offset;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+{
+ struct page *page = virt_to_page(va);
+
+ page = compound_head(page);
+ put_page(page);
}
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 0c9dc62532d0..135663a3e4fc 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -21,6 +21,7 @@
#include <asm/disassemble.h>
#include <asm/kvm_book3s.h>
#include <asm/reg.h>
+#include <asm/switch_to.h>
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
@@ -230,9 +231,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = kvmppc_st(vcpu, &addr, 32, zeros, true);
if ((r == -ENOENT) || (r == -EPERM)) {
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+
+ svcpu = svcpu_get(vcpu);
*advance = 0;
vcpu->arch.shared->dar = vaddr;
- to_svcpu(vcpu)->fault_dar = vaddr;
+ svcpu->fault_dar = vaddr;
dsisr = DSISR_ISSTORE;
if (r == -ENOENT)
@@ -241,7 +245,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
dsisr |= DSISR_PROTFAULT;
vcpu->arch.shared->dsisr = dsisr;
- to_svcpu(vcpu)->fault_dsisr = dsisr;
+ svcpu->fault_dsisr = dsisr;
+ svcpu_put(svcpu);
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_DATA_STORAGE);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a7267167a550..01294a5099dd 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -45,25 +45,18 @@
#include <asm/cputhreads.h>
#include <asm/page.h>
#include <asm/hvcall.h>
+#include <asm/switch_to.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
-
-/*
- * For now, limit memory to 64GB and require it to be large pages.
- * This value is chosen because it makes the ram_pginfo array be
- * 64kB in size, which is about as large as we want to be trying
- * to allocate with kmalloc.
- */
-#define MAX_MEM_ORDER 36
-
-#define LARGE_PAGE_ORDER 24 /* 16MB pages */
+#include <linux/hugetlb.h>
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
@@ -146,10 +139,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long vcpuid, unsigned long vpa)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long pg_index, ra, len;
- unsigned long pg_offset;
+ unsigned long len, nb;
void *va;
struct kvm_vcpu *tvcpu;
+ int err = H_PARAMETER;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
@@ -162,45 +155,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
if (flags < 4) {
if (vpa & 0x7f)
return H_PARAMETER;
+ if (flags >= 2 && !tvcpu->arch.vpa)
+ return H_RESOURCE;
/* registering new area; convert logical addr to real */
- pg_index = vpa >> kvm->arch.ram_porder;
- pg_offset = vpa & (kvm->arch.ram_psize - 1);
- if (pg_index >= kvm->arch.ram_npages)
+ va = kvmppc_pin_guest_page(kvm, vpa, &nb);
+ if (va == NULL)
return H_PARAMETER;
- if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
- return H_PARAMETER;
- ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
- ra |= pg_offset;
- va = __va(ra);
if (flags <= 1)
len = *(unsigned short *)(va + 4);
else
len = *(unsigned int *)(va + 4);
- if (pg_offset + len > kvm->arch.ram_psize)
- return H_PARAMETER;
+ if (len > nb)
+ goto out_unpin;
switch (flags) {
case 1: /* register VPA */
if (len < 640)
- return H_PARAMETER;
+ goto out_unpin;
+ if (tvcpu->arch.vpa)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
tvcpu->arch.vpa = va;
init_vpa(vcpu, va);
break;
case 2: /* register DTL */
if (len < 48)
- return H_PARAMETER;
- if (!tvcpu->arch.vpa)
- return H_RESOURCE;
+ goto out_unpin;
len -= len % 48;
+ if (tvcpu->arch.dtl)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
tvcpu->arch.dtl = va;
tvcpu->arch.dtl_end = va + len;
break;
case 3: /* register SLB shadow buffer */
- if (len < 8)
- return H_PARAMETER;
- if (!tvcpu->arch.vpa)
- return H_RESOURCE;
- tvcpu->arch.slb_shadow = va;
- len = (len - 16) / 16;
+ if (len < 16)
+ goto out_unpin;
+ if (tvcpu->arch.slb_shadow)
+ kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
tvcpu->arch.slb_shadow = va;
break;
}
@@ -209,17 +198,30 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
case 5: /* unregister VPA */
if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
return H_RESOURCE;
+ if (!tvcpu->arch.vpa)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
tvcpu->arch.vpa = NULL;
break;
case 6: /* unregister DTL */
+ if (!tvcpu->arch.dtl)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
tvcpu->arch.dtl = NULL;
break;
case 7: /* unregister SLB shadow buffer */
+ if (!tvcpu->arch.slb_shadow)
+ break;
+ kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
tvcpu->arch.slb_shadow = NULL;
break;
}
}
return H_SUCCESS;
+
+ out_unpin:
+ kvmppc_unpin_guest_page(kvm, va);
+ return err;
}
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -229,6 +231,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
struct kvm_vcpu *tvcpu;
switch (req) {
+ case H_ENTER:
+ ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ break;
case H_CEDE:
break;
case H_PROD:
@@ -318,20 +326,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
/*
- * We get these next two if the guest does a bad real-mode access,
- * as we have enabled VRMA (virtualized real mode area) mode in the
- * LPCR. We just generate an appropriate DSI/ISI to the guest.
+ * We get these next two if the guest accesses a page which it thinks
+ * it has mapped but which is not actually present, either because
+ * it is for an emulated I/O device or because the corresonding
+ * host page has been paged out. Any other HDSI/HISI interrupts
+ * have been handled already.
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
- vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
- kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
- r = RESUME_GUEST;
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
- kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
- 0x08000000);
- r = RESUME_GUEST;
+ r = kvmppc_book3s_hv_page_fault(run, vcpu,
+ kvmppc_get_pc(vcpu), 0);
break;
/*
* This occurs if the guest executes an illegal instruction.
@@ -391,6 +398,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = put_user(0, (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ {
+ u64 hior;
+ /* Only allow this to be set to zero */
+ r = get_user(hior, (u64 __user *)reg->addr);
+ if (!r && (hior != 0))
+ r = -EINVAL;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return r;
+}
+
int kvmppc_core_check_processor_compat(void)
{
if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -410,7 +453,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
goto out;
err = -ENOMEM;
- vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
goto out;
@@ -462,15 +505,21 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
return vcpu;
free_vcpu:
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(err);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
+ if (vcpu->arch.dtl)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
+ if (vcpu->arch.slb_shadow)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
+ if (vcpu->arch.vpa)
+ kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
kvm_vcpu_uninit(vcpu);
- kfree(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
@@ -481,7 +530,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
if (now > vcpu->arch.dec_expires) {
/* decrementer has already gone negative */
kvmppc_core_queue_dec(vcpu);
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
return;
}
dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
@@ -796,7 +845,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
list_for_each_entry_safe(v, vn, &vc->runnable_threads,
arch.run_list) {
- kvmppc_core_deliver_interrupts(v);
+ kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
kvmppc_remove_runnable(vc, v);
v->stat.signal_exits++;
@@ -835,20 +884,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
return -EINVAL;
}
+ kvmppc_core_prepare_to_enter(vcpu);
+
/* No need to go into the guest when all we'll do is come back out */
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
return -EINTR;
}
- /* On PPC970, check that we have an RMA region */
- if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
- return -EPERM;
+ /* On the first time here, set up VRMA or RMA */
+ if (!vcpu->kvm->arch.rma_setup_done) {
+ r = kvmppc_hv_setup_rma(vcpu);
+ if (r)
+ return r;
+ }
flush_fp_to_thread(current);
flush_altivec_to_thread(current);
flush_vsx_to_thread(current);
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+ vcpu->arch.pgdir = current->mm->pgd;
do {
r = kvmppc_run_vcpu(run, vcpu);
@@ -856,7 +911,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
r = kvmppc_pseries_do_hcall(vcpu);
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
}
} while (r == RESUME_GUEST);
return r;
@@ -1000,7 +1055,7 @@ static inline int lpcr_rmls(unsigned long rma_size)
static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct kvmppc_rma_info *ri = vma->vm_file->private_data;
+ struct kvmppc_linear_info *ri = vma->vm_file->private_data;
struct page *page;
if (vmf->pgoff >= ri->npages)
@@ -1025,7 +1080,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_rma_release(struct inode *inode, struct file *filp)
{
- struct kvmppc_rma_info *ri = filp->private_data;
+ struct kvmppc_linear_info *ri = filp->private_data;
kvm_release_rma(ri);
return 0;
@@ -1038,7 +1093,7 @@ static struct file_operations kvm_rma_fops = {
long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
{
- struct kvmppc_rma_info *ri;
+ struct kvmppc_linear_info *ri;
long fd;
ri = kvm_alloc_rma();
@@ -1053,89 +1108,189 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
-static struct page *hva_to_page(unsigned long addr)
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- struct page *page[1];
- int npages;
+ struct kvm_memory_slot *memslot;
+ int r;
+ unsigned long n;
- might_sleep();
+ mutex_lock(&kvm->slots_lock);
- npages = get_user_pages_fast(addr, 1, 1, page);
+ r = -EINVAL;
+ if (log->slot >= KVM_MEMORY_SLOTS)
+ goto out;
- if (unlikely(npages != 1))
- return 0;
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+
+ r = kvmppc_hv_get_dirty_log(kvm, memslot);
+ if (r)
+ goto out;
- return page[0];
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ goto out;
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- unsigned long psize, porder;
- unsigned long i, npages, totalpages;
- unsigned long pg_ix;
- struct kvmppc_pginfo *pginfo;
- unsigned long hva;
- struct kvmppc_rma_info *ri = NULL;
+ unsigned long npages;
+ unsigned long *phys;
+
+ /* Allocate a slot_phys array */
+ phys = kvm->arch.slot_phys[mem->slot];
+ if (!kvm->arch.using_mmu_notifiers && !phys) {
+ npages = mem->memory_size >> PAGE_SHIFT;
+ phys = vzalloc(npages * sizeof(unsigned long));
+ if (!phys)
+ return -ENOMEM;
+ kvm->arch.slot_phys[mem->slot] = phys;
+ kvm->arch.slot_npages[mem->slot] = npages;
+ }
+
+ return 0;
+}
+
+static void unpin_slot(struct kvm *kvm, int slot_id)
+{
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
struct page *page;
- /* For now, only allow 16MB pages */
- porder = LARGE_PAGE_ORDER;
- psize = 1ul << porder;
- if ((mem->memory_size & (psize - 1)) ||
- (mem->guest_phys_addr & (psize - 1))) {
- pr_err("bad memory_size=%llx @ %llx\n",
- mem->memory_size, mem->guest_phys_addr);
- return -EINVAL;
+ physp = kvm->arch.slot_phys[slot_id];
+ npages = kvm->arch.slot_npages[slot_id];
+ if (physp) {
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ if (PageHuge(page))
+ page = compound_head(page);
+ SetPageDirty(page);
+ put_page(page);
+ }
+ kvm->arch.slot_phys[slot_id] = NULL;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ vfree(physp);
}
+}
- npages = mem->memory_size >> porder;
- totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
+{
+}
- /* More memory than we have space to track? */
- if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
- return -EINVAL;
+static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
+{
+ int err = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_linear_info *ri = NULL;
+ unsigned long hva;
+ struct kvm_memory_slot *memslot;
+ struct vm_area_struct *vma;
+ unsigned long lpcr, senc;
+ unsigned long psize, porder;
+ unsigned long rma_size;
+ unsigned long rmls;
+ unsigned long *physp;
+ unsigned long i, npages;
- /* Do we already have an RMA registered? */
- if (mem->guest_phys_addr == 0 && kvm->arch.rma)
- return -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done)
+ goto out; /* another vcpu beat us to it */
- if (totalpages > kvm->arch.ram_npages)
- kvm->arch.ram_npages = totalpages;
+ /* Look up the memslot for guest physical address 0 */
+ memslot = gfn_to_memslot(kvm, 0);
+
+ /* We must have some memory at 0 by now */
+ err = -EINVAL;
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ goto out;
+
+ /* Look up the VMA for the start of this memory slot */
+ hva = memslot->userspace_addr;
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma(current->mm, hva);
+ if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ goto up_out;
+
+ psize = vma_kernel_pagesize(vma);
+ porder = __ilog2(psize);
/* Is this one of our preallocated RMAs? */
- if (mem->guest_phys_addr == 0) {
- struct vm_area_struct *vma;
-
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, mem->userspace_addr);
- if (vma && vma->vm_file &&
- vma->vm_file->f_op == &kvm_rma_fops &&
- mem->userspace_addr == vma->vm_start)
- ri = vma->vm_file->private_data;
- up_read(&current->mm->mmap_sem);
- if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
- pr_err("CPU requires an RMO\n");
- return -EINVAL;
+ if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
+ hva == vma->vm_start)
+ ri = vma->vm_file->private_data;
+
+ up_read(&current->mm->mmap_sem);
+
+ if (!ri) {
+ /* On POWER7, use VRMA; on PPC970, give up */
+ err = -EPERM;
+ if (cpu_has_feature(CPU_FTR_ARCH_201)) {
+ pr_err("KVM: CPU requires an RMO\n");
+ goto out;
}
- }
- if (ri) {
- unsigned long rma_size;
- unsigned long lpcr;
- long rmls;
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out;
+
+ /* Update VRMASD field in the LPCR */
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
+ kvm->arch.lpcr = lpcr;
- rma_size = ri->npages << PAGE_SHIFT;
- if (rma_size > mem->memory_size)
- rma_size = mem->memory_size;
+ /* Create HPTEs in the hash page table for the VRMA */
+ kvmppc_map_vrma(vcpu, memslot, porder);
+
+ } else {
+ /* Set up to use an RMO region */
+ rma_size = ri->npages;
+ if (rma_size > memslot->npages)
+ rma_size = memslot->npages;
+ rma_size <<= PAGE_SHIFT;
rmls = lpcr_rmls(rma_size);
+ err = -EINVAL;
if (rmls < 0) {
- pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
- return -EINVAL;
+ pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
+ goto out;
}
atomic_inc(&ri->use_count);
kvm->arch.rma = ri;
- kvm->arch.n_rma_pages = rma_size >> porder;
/* Update LPCR and RMOR */
lpcr = kvm->arch.lpcr;
@@ -1155,53 +1310,35 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
}
kvm->arch.lpcr = lpcr;
- pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
+ pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
- }
- pg_ix = mem->guest_phys_addr >> porder;
- pginfo = kvm->arch.ram_pginfo + pg_ix;
- for (i = 0; i < npages; ++i, ++pg_ix) {
- if (ri && pg_ix < kvm->arch.n_rma_pages) {
- pginfo[i].pfn = ri->base_pfn +
- (pg_ix << (porder - PAGE_SHIFT));
- continue;
- }
- hva = mem->userspace_addr + (i << porder);
- page = hva_to_page(hva);
- if (!page) {
- pr_err("oops, no pfn for hva %lx\n", hva);
- goto err;
- }
- /* Check it's a 16MB page */
- if (!PageHead(page) ||
- compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
- pr_err("page at %lx isn't 16MB (o=%d)\n",
- hva, compound_order(page));
- goto err;
- }
- pginfo[i].pfn = page_to_pfn(page);
+ /* Initialize phys addrs of pages in RMO */
+ npages = ri->npages;
+ porder = __ilog2(npages);
+ physp = kvm->arch.slot_phys[memslot->id];
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i)
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
+ spin_unlock(&kvm->arch.slot_phys_lock);
}
- return 0;
-
- err:
- return -EINVAL;
-}
+ /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = 1;
+ err = 0;
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
-{
- if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
- !kvm->arch.rma)
- kvmppc_map_vrma(kvm, mem);
+ up_out:
+ up_read(&current->mm->mmap_sem);
+ goto out;
}
int kvmppc_core_init_vm(struct kvm *kvm)
{
long r;
- unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
- long err = -ENOMEM;
unsigned long lpcr;
/* Allocate hashed page table */
@@ -1211,19 +1348,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
- GFP_KERNEL);
- if (!kvm->arch.ram_pginfo) {
- pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
- npages * sizeof(struct kvmppc_pginfo));
- goto out_free;
- }
-
- kvm->arch.ram_npages = 0;
- kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
- kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
- kvm->arch.n_rma_pages = 0;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -1241,30 +1366,25 @@ int kvmppc_core_init_vm(struct kvm *kvm)
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
lpcr &= LPCR_PECE | LPCR_LPES;
lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
- LPCR_VPM0 | LPCR_VRMA_L;
+ LPCR_VPM0 | LPCR_VPM1;
+ kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
}
kvm->arch.lpcr = lpcr;
+ kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
+ spin_lock_init(&kvm->arch.slot_phys_lock);
return 0;
-
- out_free:
- kvmppc_free_hpt(kvm);
- return err;
}
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
- struct kvmppc_pginfo *pginfo;
unsigned long i;
- if (kvm->arch.ram_pginfo) {
- pginfo = kvm->arch.ram_pginfo;
- kvm->arch.ram_pginfo = NULL;
- for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
- if (pginfo[i].pfn)
- put_page(pfn_to_page(pginfo[i].pfn));
- kfree(pginfo);
- }
+ if (!kvm->arch.using_mmu_notifiers)
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ unpin_slot(kvm, i);
+
if (kvm->arch.rma) {
kvm_release_rma(kvm->arch.rma);
kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index a795a13f4a70..bed1279aa6a8 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -18,6 +18,15 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
+#define KVM_LINEAR_RMA 0
+#define KVM_LINEAR_HPT 1
+
+static void __init kvm_linear_init_one(ulong size, int count, int type);
+static struct kvmppc_linear_info *kvm_alloc_linear(int type);
+static void kvm_release_linear(struct kvmppc_linear_info *ri);
+
+/*************** RMA *************/
+
/*
* This maintains a list of RMAs (real mode areas) for KVM guests to use.
* Each RMA has to be physically contiguous and of a size that the
@@ -29,32 +38,6 @@
static unsigned long kvm_rma_size = 64 << 20; /* 64MB */
static unsigned long kvm_rma_count;
-static int __init early_parse_rma_size(char *p)
-{
- if (!p)
- return 1;
-
- kvm_rma_size = memparse(p, &p);
-
- return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-static int __init early_parse_rma_count(char *p)
-{
- if (!p)
- return 1;
-
- kvm_rma_count = simple_strtoul(p, NULL, 0);
-
- return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-static struct kvmppc_rma_info *rma_info;
-static LIST_HEAD(free_rmas);
-static DEFINE_SPINLOCK(rma_lock);
-
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
static inline int lpcr_rmls(unsigned long rma_size)
@@ -81,45 +64,106 @@ static inline int lpcr_rmls(unsigned long rma_size)
}
}
+static int __init early_parse_rma_size(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_size = memparse(p, &p);
+
+ return 0;
+}
+early_param("kvm_rma_size", early_parse_rma_size);
+
+static int __init early_parse_rma_count(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_rma_count = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("kvm_rma_count", early_parse_rma_count);
+
+struct kvmppc_linear_info *kvm_alloc_rma(void)
+{
+ return kvm_alloc_linear(KVM_LINEAR_RMA);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_rma);
+
+void kvm_release_rma(struct kvmppc_linear_info *ri)
+{
+ kvm_release_linear(ri);
+}
+EXPORT_SYMBOL_GPL(kvm_release_rma);
+
+/*************** HPT *************/
+
/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the real memory
- * areas for guests.
+ * This maintains a list of big linear HPT tables that contain the GVA->HPA
+ * memory mappings. If we don't reserve those early on, we might not be able
+ * to get a big (usually 16MB) linear memory region from the kernel anymore.
*/
-void __init kvm_rma_init(void)
+
+static unsigned long kvm_hpt_count;
+
+static int __init early_parse_hpt_count(char *p)
+{
+ if (!p)
+ return 1;
+
+ kvm_hpt_count = simple_strtoul(p, NULL, 0);
+
+ return 0;
+}
+early_param("kvm_hpt_count", early_parse_hpt_count);
+
+struct kvmppc_linear_info *kvm_alloc_hpt(void)
+{
+ return kvm_alloc_linear(KVM_LINEAR_HPT);
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+
+void kvm_release_hpt(struct kvmppc_linear_info *li)
+{
+ kvm_release_linear(li);
+}
+EXPORT_SYMBOL_GPL(kvm_release_hpt);
+
+/*************** generic *************/
+
+static LIST_HEAD(free_linears);
+static DEFINE_SPINLOCK(linear_lock);
+
+static void __init kvm_linear_init_one(ulong size, int count, int type)
{
unsigned long i;
unsigned long j, npages;
- void *rma;
+ void *linear;
struct page *pg;
+ const char *typestr;
+ struct kvmppc_linear_info *linear_info;
- /* Only do this on PPC970 in HV mode */
- if (!cpu_has_feature(CPU_FTR_HVMODE) ||
- !cpu_has_feature(CPU_FTR_ARCH_201))
- return;
-
- if (!kvm_rma_size || !kvm_rma_count)
+ if (!count)
return;
- /* Check that the requested size is one supported in hardware */
- if (lpcr_rmls(kvm_rma_size) < 0) {
- pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
- return;
- }
-
- npages = kvm_rma_size >> PAGE_SHIFT;
- rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info));
- for (i = 0; i < kvm_rma_count; ++i) {
- rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size);
- pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma,
- kvm_rma_size >> 20);
- rma_info[i].base_virt = rma;
- rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT;
- rma_info[i].npages = npages;
- list_add_tail(&rma_info[i].list, &free_rmas);
- atomic_set(&rma_info[i].use_count, 0);
-
- pg = pfn_to_page(rma_info[i].base_pfn);
+ typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
+
+ npages = size >> PAGE_SHIFT;
+ linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
+ for (i = 0; i < count; ++i) {
+ linear = alloc_bootmem_align(size, size);
+ pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
+ size >> 20);
+ linear_info[i].base_virt = linear;
+ linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
+ linear_info[i].npages = npages;
+ linear_info[i].type = type;
+ list_add_tail(&linear_info[i].list, &free_linears);
+ atomic_set(&linear_info[i].use_count, 0);
+
+ pg = pfn_to_page(linear_info[i].base_pfn);
for (j = 0; j < npages; ++j) {
atomic_inc(&pg->_count);
++pg;
@@ -127,30 +171,59 @@ void __init kvm_rma_init(void)
}
}
-struct kvmppc_rma_info *kvm_alloc_rma(void)
+static struct kvmppc_linear_info *kvm_alloc_linear(int type)
{
- struct kvmppc_rma_info *ri;
+ struct kvmppc_linear_info *ri;
ri = NULL;
- spin_lock(&rma_lock);
- if (!list_empty(&free_rmas)) {
- ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list);
+ spin_lock(&linear_lock);
+ list_for_each_entry(ri, &free_linears, list) {
+ if (ri->type != type)
+ continue;
+
list_del(&ri->list);
atomic_inc(&ri->use_count);
+ break;
}
- spin_unlock(&rma_lock);
+ spin_unlock(&linear_lock);
+ memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
return ri;
}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-void kvm_release_rma(struct kvmppc_rma_info *ri)
+static void kvm_release_linear(struct kvmppc_linear_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
- spin_lock(&rma_lock);
- list_add_tail(&ri->list, &free_rmas);
- spin_unlock(&rma_lock);
+ spin_lock(&linear_lock);
+ list_add_tail(&ri->list, &free_linears);
+ spin_unlock(&linear_lock);
}
}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
+/*
+ * Called at boot time while the bootmem allocator is active,
+ * to allocate contiguous physical memory for the hash page
+ * tables for guests.
+ */
+void __init kvm_linear_init(void)
+{
+ /* HPT */
+ kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
+
+ /* RMA */
+ /* Only do this on PPC970 in HV mode */
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_201))
+ return;
+
+ if (!kvm_rma_size || !kvm_rma_count)
+ return;
+
+ /* Check that the requested size is one supported in hardware */
+ if (lpcr_rmls(kvm_rma_size) < 0) {
+ pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+ return;
+ }
+
+ kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index bacb0cfa3602..def880aea63a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -11,6 +11,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/hugetlb.h>
+#include <linux/module.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -20,95 +21,307 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER 24
-#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
-#define HPT_HASH_MASK (HPT_NPTEG - 1)
+/* Translate address of a vmalloc'd thing to a linear map address */
+static void *real_vmalloc_addr(void *x)
+{
+ unsigned long addr = (unsigned long) x;
+ pte_t *p;
-#define HPTE_V_HVLOCK 0x40UL
+ p = find_linux_pte(swapper_pg_dir, addr);
+ if (!p || !pte_present(*p))
+ return NULL;
+ /* assume we don't have huge pages in vmalloc space... */
+ addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
+ return __va(addr);
+}
-static inline long lock_hpte(unsigned long *hpte, unsigned long bits)
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+ unsigned long *rmap, long pte_index, int realmode)
{
- unsigned long tmp, old;
+ struct revmap_entry *head, *tail;
+ unsigned long i;
- asm volatile(" ldarx %0,0,%2\n"
- " and. %1,%0,%3\n"
- " bne 2f\n"
- " ori %0,%0,%4\n"
- " stdcx. %0,0,%2\n"
- " beq+ 2f\n"
- " li %1,%3\n"
- "2: isync"
- : "=&r" (tmp), "=&r" (old)
- : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
- : "cc", "memory");
- return old == 0;
+ if (*rmap & KVMPPC_RMAP_PRESENT) {
+ i = *rmap & KVMPPC_RMAP_INDEX;
+ head = &kvm->arch.revmap[i];
+ if (realmode)
+ head = real_vmalloc_addr(head);
+ tail = &kvm->arch.revmap[head->back];
+ if (realmode)
+ tail = real_vmalloc_addr(tail);
+ rev->forw = i;
+ rev->back = head->back;
+ tail->forw = pte_index;
+ head->back = pte_index;
+ } else {
+ rev->forw = rev->back = pte_index;
+ i = pte_index;
+ }
+ smp_wmb();
+ *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+}
+EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+ struct revmap_entry *rev,
+ unsigned long hpte_v, unsigned long hpte_r)
+{
+ struct revmap_entry *next, *prev;
+ unsigned long gfn, ptel, head;
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ unsigned long rcbits;
+
+ rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
+ ptel = rev->guest_rpte |= rcbits;
+ gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return;
+
+ rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+ lock_rmap(rmap);
+
+ head = *rmap & KVMPPC_RMAP_INDEX;
+ next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
+ prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+ next->back = rev->back;
+ prev->forw = rev->forw;
+ if (head == pte_index) {
+ head = rev->forw;
+ if (head == pte_index)
+ *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ else
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
+ }
+ *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ unlock_rmap(rmap);
+}
+
+static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+ int writing, unsigned long *pte_sizep)
+{
+ pte_t *ptep;
+ unsigned long ps = *pte_sizep;
+ unsigned int shift;
+
+ ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+ if (!ptep)
+ return __pte(0);
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ if (ps > *pte_sizep)
+ return __pte(0);
+ if (!pte_present(*ptep))
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing);
+}
+
+static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+{
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hpte[0] = hpte_v;
}
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel)
{
- unsigned long porder;
struct kvm *kvm = vcpu->kvm;
- unsigned long i, lpn, pa;
+ unsigned long i, pa, gpa, gfn, psize;
+ unsigned long slot_fn, hva;
unsigned long *hpte;
+ struct revmap_entry *rev;
+ unsigned long g_ptel = ptel;
+ struct kvm_memory_slot *memslot;
+ unsigned long *physp, pte_size;
+ unsigned long is_io;
+ unsigned long *rmap;
+ pte_t pte;
+ unsigned int writing;
+ unsigned long mmu_seq;
+ unsigned long rcbits;
+ bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
- /* only handle 4k, 64k and 16M pages for now */
- porder = 12;
- if (pteh & HPTE_V_LARGE) {
- if (cpu_has_feature(CPU_FTR_ARCH_206) &&
- (ptel & 0xf000) == 0x1000) {
- /* 64k page */
- porder = 16;
- } else if ((ptel & 0xff000) == 0) {
- /* 16M page */
- porder = 24;
- /* lowest AVA bit must be 0 for 16M pages */
- if (pteh & 0x80)
- return H_PARAMETER;
- } else
+ psize = hpte_page_size(pteh, ptel);
+ if (!psize)
+ return H_PARAMETER;
+ writing = hpte_is_writable(ptel);
+ pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+
+ /* used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /* Find the memslot (if any) for this address */
+ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ pa = 0;
+ is_io = ~0ul;
+ rmap = NULL;
+ if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
+ /* PPC970 can't do emulated MMIO */
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
return H_PARAMETER;
+ /* Emulated MMIO - mark this with key=31 */
+ pteh |= HPTE_V_ABSENT;
+ ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ goto do_insert;
}
- lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
- if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
- return H_PARAMETER;
- pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
- if (!pa)
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
return H_PARAMETER;
- /* Check WIMG */
- if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
- (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
+ slot_fn = gfn - memslot->base_gfn;
+ rmap = &memslot->rmap[slot_fn];
+
+ if (!kvm->arch.using_mmu_notifiers) {
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return H_PARAMETER;
+ physp += slot_fn;
+ if (realmode)
+ physp = real_vmalloc_addr(physp);
+ pa = *physp;
+ if (!pa)
+ return H_TOO_HARD;
+ is_io = pa & (HPTE_R_I | HPTE_R_W);
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
+ pa &= PAGE_MASK;
+ } else {
+ /* Translate to host virtual address */
+ hva = gfn_to_hva_memslot(memslot, gfn);
+
+ /* Look up the Linux PTE for the backing page */
+ pte_size = psize;
+ pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+ if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ }
+ }
+ if (pte_size < psize)
return H_PARAMETER;
- pteh &= ~0x60UL;
- ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
+ if (pa && pte_size > psize)
+ pa |= gpa & (pte_size - 1);
+
+ ptel &= ~(HPTE_R_PP0 - psize);
ptel |= pa;
- if (pte_index >= (HPT_NPTEG << 3))
+
+ if (pa)
+ pteh |= HPTE_V_VALID;
+ else
+ pteh |= HPTE_V_ABSENT;
+
+ /* Check WIMG */
+ if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
+ if (is_io)
+ return H_PARAMETER;
+ /*
+ * Allow guest to map emulated device memory as
+ * uncacheable, but actually make it cacheable.
+ */
+ ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
+ ptel |= HPTE_R_M;
+ }
+
+ /* Find and lock the HPTEG slot to use */
+ do_insert:
+ if (pte_index >= HPT_NPTE)
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- for (i = 0; ; ++i) {
- if (i == 8)
- return H_PTEG_FULL;
+ for (i = 0; i < 8; ++i) {
if ((*hpte & HPTE_V_VALID) == 0 &&
- lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
+ try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT))
break;
hpte += 2;
}
+ if (i == 8) {
+ /*
+ * Since try_lock_hpte doesn't retry (not even stdcx.
+ * failures), it could be that there is a free slot
+ * but we transiently failed to lock it. Try again,
+ * actually locking each slot and checking it.
+ */
+ hpte -= 16;
+ for (i = 0; i < 8; ++i) {
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ break;
+ *hpte &= ~HPTE_V_HVLOCK;
+ hpte += 2;
+ }
+ if (i == 8)
+ return H_PTEG_FULL;
+ }
+ pte_index += i;
} else {
- i = 0;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
- return H_PTEG_FULL;
+ if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
+ HPTE_V_ABSENT)) {
+ /* Lock the slot and check again */
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ *hpte &= ~HPTE_V_HVLOCK;
+ return H_PTEG_FULL;
+ }
+ }
}
+
+ /* Save away the guest's idea of the second HPTE dword */
+ rev = &kvm->arch.revmap[pte_index];
+ if (realmode)
+ rev = real_vmalloc_addr(rev);
+ if (rev)
+ rev->guest_rpte = g_ptel;
+
+ /* Link HPTE into reverse-map chain */
+ if (pteh & HPTE_V_VALID) {
+ if (realmode)
+ rmap = real_vmalloc_addr(rmap);
+ lock_rmap(rmap);
+ /* Check for pending invalidations under the rmap chain lock */
+ if (kvm->arch.using_mmu_notifiers &&
+ mmu_notifier_retry(vcpu, mmu_seq)) {
+ /* inval in progress, write a non-present HPTE */
+ pteh |= HPTE_V_ABSENT;
+ pteh &= ~HPTE_V_VALID;
+ unlock_rmap(rmap);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
+ realmode);
+ /* Only set R/C in real HPTE if already set in *rmap */
+ rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
+ ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
+ }
+ }
+
hpte[1] = ptel;
+
+ /* Write the first HPTE dword, unlocking the HPTE and making it valid */
eieio();
hpte[0] = pteh;
asm volatile("ptesync" : : : "memory");
- atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
- vcpu->arch.gpr[4] = pte_index + i;
+
+ vcpu->arch.gpr[4] = pte_index;
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -137,37 +350,46 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
unsigned long v, r, rb;
+ struct revmap_entry *rev;
- if (pte_index >= (HPT_NPTEG << 3))
+ if (pte_index >= HPT_NPTE)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- while (!lock_hpte(hpte, HPTE_V_HVLOCK))
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- if ((hpte[0] & HPTE_V_VALID) == 0 ||
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
hpte[0] &= ~HPTE_V_HVLOCK;
return H_NOT_FOUND;
}
- if (atomic_read(&kvm->online_vcpus) == 1)
- flags |= H_LOCAL;
- vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
- vcpu->arch.gpr[5] = r = hpte[1];
- rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = 0;
- if (!(flags & H_LOCAL)) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- asm volatile("tlbiel %0" : : "r" (rb));
- asm volatile("ptesync" : : : "memory");
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ if (v & HPTE_V_VALID) {
+ hpte[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(v, hpte[1], pte_index);
+ if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
+ /* Read PTE low word after tlbie to get final R/C values */
+ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
+ r = rev->guest_rpte;
+ unlock_hpte(hpte, 0);
+
+ vcpu->arch.gpr[4] = v;
+ vcpu->arch.gpr[5] = r;
return H_SUCCESS;
}
@@ -175,78 +397,117 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
unsigned long *args = &vcpu->arch.gpr[4];
- unsigned long *hp, tlbrb[4];
- long int i, found;
- long int n_inval = 0;
- unsigned long flags, req, pte_index;
+ unsigned long *hp, *hptes[4], tlbrb[4];
+ long int i, j, k, n, found, indexes[4];
+ unsigned long flags, req, pte_index, rcbits;
long int local = 0;
long int ret = H_SUCCESS;
+ struct revmap_entry *rev, *revs[4];
if (atomic_read(&kvm->online_vcpus) == 1)
local = 1;
- for (i = 0; i < 4; ++i) {
- pte_index = args[i * 2];
- flags = pte_index >> 56;
- pte_index &= ((1ul << 56) - 1);
- req = flags >> 6;
- flags &= 3;
- if (req == 3)
- break;
- if (req != 1 || flags == 3 ||
- pte_index >= (HPT_NPTEG << 3)) {
- /* parameter error */
- args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
- ret = H_PARAMETER;
- break;
- }
- hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- while (!lock_hpte(hp, HPTE_V_HVLOCK))
- cpu_relax();
- found = 0;
- if (hp[0] & HPTE_V_VALID) {
- switch (flags & 3) {
- case 0: /* absolute */
- found = 1;
+ for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+ n = 0;
+ for (; i < 4; ++i) {
+ j = i * 2;
+ pte_index = args[j];
+ flags = pte_index >> 56;
+ pte_index &= ((1ul << 56) - 1);
+ req = flags >> 6;
+ flags &= 3;
+ if (req == 3) { /* no more requests */
+ i = 4;
break;
- case 1: /* andcond */
- if (!(hp[0] & args[i * 2 + 1]))
- found = 1;
+ }
+ if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+ /* parameter error */
+ args[j] = ((0xa0 | flags) << 56) + pte_index;
+ ret = H_PARAMETER;
break;
- case 2: /* AVPN */
- if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
+ }
+ hp = (unsigned long *)
+ (kvm->arch.hpt_virt + (pte_index << 4));
+ /* to avoid deadlock, don't spin except for first */
+ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+ if (n)
+ break;
+ while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+ cpu_relax();
+ }
+ found = 0;
+ if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+ switch (flags & 3) {
+ case 0: /* absolute */
found = 1;
- break;
+ break;
+ case 1: /* andcond */
+ if (!(hp[0] & args[j + 1]))
+ found = 1;
+ break;
+ case 2: /* AVPN */
+ if ((hp[0] & ~0x7fUL) == args[j + 1])
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ hp[0] &= ~HPTE_V_HVLOCK;
+ args[j] = ((0x90 | flags) << 56) + pte_index;
+ continue;
}
+
+ args[j] = ((0x80 | flags) << 56) + pte_index;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+
+ if (!(hp[0] & HPTE_V_VALID)) {
+ /* insert R and C bits from PTE */
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ continue;
+ }
+
+ hp[0] &= ~HPTE_V_VALID; /* leave it locked */
+ tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+ indexes[n] = j;
+ hptes[n] = hp;
+ revs[n] = rev;
+ ++n;
+ }
+
+ if (!n)
+ break;
+
+ /* Now that we've collected a batch, do the tlbies */
+ if (!local) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile(PPC_TLBIE(%1,%0) : :
+ "r" (tlbrb[k]),
+ "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
+ asm volatile("ptesync" : : : "memory");
}
- if (!found) {
- hp[0] &= ~HPTE_V_HVLOCK;
- args[i * 2] = ((0x90 | flags) << 56) + pte_index;
- continue;
+
+ /* Read PTE low words after tlbie to get final R/C values */
+ for (k = 0; k < n; ++k) {
+ j = indexes[k];
+ pte_index = args[j] & ((1ul << 56) - 1);
+ hp = hptes[k];
+ rev = revs[k];
+ remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+ hp[0] = 0;
}
- /* insert R and C bits from PTE */
- flags |= (hp[1] >> 5) & 0x0c;
- args[i * 2] = ((0x80 | flags) << 56) + pte_index;
- tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
- hp[0] = 0;
- }
- if (n_inval == 0)
- return ret;
-
- if (!local) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < n_inval; ++i)
- asm volatile(PPC_TLBIE(%1,%0)
- : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < n_inval; ++i)
- asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
- asm volatile("ptesync" : : : "memory");
}
+
return ret;
}
@@ -256,40 +517,55 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
{
struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
- unsigned long v, r, rb;
+ struct revmap_entry *rev;
+ unsigned long v, r, rb, mask, bits;
- if (pte_index >= (HPT_NPTEG << 3))
+ if (pte_index >= HPT_NPTE)
return H_PARAMETER;
+
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- while (!lock_hpte(hpte, HPTE_V_HVLOCK))
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- if ((hpte[0] & HPTE_V_VALID) == 0 ||
+ if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
hpte[0] &= ~HPTE_V_HVLOCK;
return H_NOT_FOUND;
}
+
if (atomic_read(&kvm->online_vcpus) == 1)
flags |= H_LOCAL;
v = hpte[0];
- r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
- HPTE_R_KEY_HI | HPTE_R_KEY_LO);
- r |= (flags << 55) & HPTE_R_PP0;
- r |= (flags << 48) & HPTE_R_KEY_HI;
- r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
- rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = v & ~HPTE_V_VALID;
- if (!(flags & H_LOCAL)) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- asm volatile("tlbiel %0" : : "r" (rb));
- asm volatile("ptesync" : : : "memory");
+ bits = (flags << 55) & HPTE_R_PP0;
+ bits |= (flags << 48) & HPTE_R_KEY_HI;
+ bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+ /* Update guest view of 2nd HPTE dword */
+ mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+ HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ if (rev) {
+ r = (rev->guest_rpte & ~mask) | bits;
+ rev->guest_rpte = r;
+ }
+ r = (hpte[1] & ~mask) | bits;
+
+ /* Update HPTE */
+ if (v & HPTE_V_VALID) {
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = v & ~HPTE_V_VALID;
+ if (!(flags & H_LOCAL)) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
}
hpte[1] = r;
eieio();
@@ -298,40 +574,243 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
-static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
-{
- long int i;
- unsigned long offset, rpn;
-
- offset = realaddr & (kvm->arch.ram_psize - 1);
- rpn = (realaddr - offset) >> PAGE_SHIFT;
- for (i = 0; i < kvm->arch.ram_npages; ++i)
- if (rpn == kvm->arch.ram_pginfo[i].pfn)
- return (i << PAGE_SHIFT) + offset;
- return HPTE_R_RPN; /* all 1s in the RPN field */
-}
-
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long *hpte, r;
+ unsigned long *hpte, v, r;
int i, n = 1;
+ struct revmap_entry *rev = NULL;
- if (pte_index >= (HPT_NPTEG << 3))
+ if (pte_index >= HPT_NPTE)
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
n = 4;
}
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
for (i = 0; i < n; ++i, ++pte_index) {
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
r = hpte[1];
- if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
- r = reverse_xlate(kvm, r & HPTE_R_RPN) |
- (r & ~HPTE_R_RPN);
- vcpu->arch.gpr[4 + i * 2] = hpte[0];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ if (v & HPTE_V_VALID)
+ r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r;
}
return H_SUCCESS;
}
+
+void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+
+ hptep[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
+
+void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+ unsigned long pte_index)
+{
+ unsigned long rb;
+ unsigned char rbyte;
+
+ rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+ rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+ /* modify only the second-last byte, which contains the ref bit */
+ *((char *)hptep + 14) = rbyte;
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
+
+static int slb_base_page_shift[4] = {
+ 24, /* 16M */
+ 16, /* 64k */
+ 34, /* 16G */
+ 20, /* 1M, unsupported */
+};
+
+long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
+ unsigned long valid)
+{
+ unsigned int i;
+ unsigned int pshift;
+ unsigned long somask;
+ unsigned long vsid, hash;
+ unsigned long avpn;
+ unsigned long *hpte;
+ unsigned long mask, val;
+ unsigned long v, r;
+
+ /* Get page shift, work out hash and AVPN etc. */
+ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
+ val = 0;
+ pshift = 12;
+ if (slb_v & SLB_VSID_L) {
+ mask |= HPTE_V_LARGE;
+ val |= HPTE_V_LARGE;
+ pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
+ }
+ if (slb_v & SLB_VSID_B_1T) {
+ somask = (1UL << 40) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
+ vsid ^= vsid << 25;
+ } else {
+ somask = (1UL << 28) - 1;
+ vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
+ }
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
+ avpn = slb_v & ~(somask >> 16); /* also includes B */
+ avpn |= (eaddr & somask) >> 16;
+
+ if (pshift >= 24)
+ avpn &= ~((1UL << (pshift - 16)) - 1);
+ else
+ avpn &= ~0x7fUL;
+ val |= avpn;
+
+ for (;;) {
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
+
+ for (i = 0; i < 16; i += 2) {
+ /* Read the PTE racily */
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+
+ /* Check valid/absent, hash, segment size and AVPN */
+ if (!(v & valid) || (v & mask) != val)
+ continue;
+
+ /* Lock the PTE and read it under the lock */
+ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hpte[i] & ~HPTE_V_HVLOCK;
+ r = hpte[i+1];
+
+ /*
+ * Check the HPTE again, including large page size
+ * Since we don't currently allow any MPSS (mixed
+ * page-size segment) page sizes, it is sufficient
+ * to check against the actual page size.
+ */
+ if ((v & valid) && (v & mask) == val &&
+ hpte_page_size(v, r) == (1ul << pshift))
+ /* Return with the HPTE still locked */
+ return (hash << 3) + (i >> 1);
+
+ /* Unlock and move on */
+ hpte[i] = v;
+ }
+
+ if (val & HPTE_V_SECONDARY)
+ break;
+ val |= HPTE_V_SECONDARY;
+ hash = hash ^ HPT_HASH_MASK;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
+
+/*
+ * Called in real mode to check whether an HPTE not found fault
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
+ * Returns a possibly modified status (DSISR) value if not
+ * (i.e. pass the interrupt to the guest),
+ * -1 to pass the fault up to host kernel mode code, -2 to do that
+ * and also load the instruction word (for MMIO emulation),
+ * or 0 if we should make the guest retry the access.
+ */
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ long int index;
+ unsigned long v, r, gr;
+ unsigned long *hpte;
+ unsigned long valid;
+ struct revmap_entry *rev;
+ unsigned long pp, key;
+
+ /* For protection fault, expect to find a valid HPTE */
+ valid = HPTE_V_VALID;
+ if (status & DSISR_NOHPTE)
+ valid |= HPTE_V_ABSENT;
+
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ r = hpte[1];
+ rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ gr = rev->guest_rpte;
+
+ unlock_hpte(hpte, v);
+
+ /* For not found, if the HPTE is valid by now, retry the instruction */
+ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
+ return 0;
+
+ /* Check access permissions to the page */
+ pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
+ key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
+ if (!data) {
+ if (gr & (HPTE_R_N | HPTE_R_G))
+ return status | SRR1_ISI_N_OR_G;
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | SRR1_ISI_PROT;
+ } else if (status & DSISR_ISSTORE) {
+ /* check write permission */
+ if (!hpte_write_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ } else {
+ if (!hpte_read_permission(pp, slb_v & key))
+ return status | DSISR_PROTFAULT;
+ }
+
+ /* Check storage key, if applicable */
+ if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
+ unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
+ if (status & DSISR_ISSTORE)
+ perm >>= 1;
+ if (perm & 1)
+ return status | DSISR_KEYFAULT;
+ }
+
+ /* Save HPTE info for virtual-mode handler */
+ vcpu->arch.pgfault_addr = addr;
+ vcpu->arch.pgfault_index = index;
+ vcpu->arch.pgfault_hpte[0] = v;
+ vcpu->arch.pgfault_hpte[1] = r;
+
+ /* Check the storage key to see if it is possibly emulated MMIO */
+ if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
+ return -2; /* MMIO emulation - load instr word */
+
+ return -1; /* send fault up to host kernel mode */
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5c8b26183f50..b70bf22a3ff3 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -601,6 +601,30 @@ kvmppc_interrupt:
stw r12,VCPU_TRAP(r9)
+ /* Save HEIR (HV emulation assist reg) in last_inst
+ if this is an HEI (HV emulation interrupt, e40) */
+ li r3,KVM_INST_FETCH_FAILED
+BEGIN_FTR_SECTION
+ cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
+ bne 11f
+ mfspr r3,SPRN_HEIR
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+11: stw r3,VCPU_LAST_INST(r9)
+
+ /* these are volatile across C function calls */
+ mfctr r3
+ mfxer r4
+ std r3, VCPU_CTR(r9)
+ stw r4, VCPU_XER(r9)
+
+BEGIN_FTR_SECTION
+ /* If this is a page table miss then see if it's theirs or ours */
+ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq kvmppc_hdsi
+ cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ beq kvmppc_hisi
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
/* See if this is a leftover HDEC interrupt */
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
@@ -608,7 +632,7 @@ kvmppc_interrupt:
cmpwi r3,0
bge ignore_hdec
2:
- /* See if this is something we can handle in real mode */
+ /* See if this is an hcall we can handle in real mode */
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
@@ -624,6 +648,7 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+nohpte_cont:
hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
mfspr r5,SPRN_DEC
@@ -632,36 +657,21 @@ hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
add r5,r5,r6
std r5,VCPU_DEC_EXPIRES(r9)
- /* Save HEIR (HV emulation assist reg) in last_inst
- if this is an HEI (HV emulation interrupt, e40) */
- li r3,-1
-BEGIN_FTR_SECTION
- cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
- bne 11f
- mfspr r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11: stw r3,VCPU_LAST_INST(r9)
-
/* Save more register state */
- mfxer r5
mfdar r6
mfdsisr r7
- mfctr r8
-
- stw r5, VCPU_XER(r9)
std r6, VCPU_DAR(r9)
stw r7, VCPU_DSISR(r9)
- std r8, VCPU_CTR(r9)
- /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */
BEGIN_FTR_SECTION
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
beq 6f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-7: std r6, VCPU_FAULT_DAR(r9)
+ std r6, VCPU_FAULT_DAR(r9)
stw r7, VCPU_FAULT_DSISR(r9)
/* Save guest CTRL register, set runlatch to 1 */
- mfspr r6,SPRN_CTRLF
+6: mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
andi. r0,r6,1
bne 4f
@@ -1094,9 +1104,131 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_HSRR1, r7
ba 0x500
-6: mfspr r6,SPRN_HDAR
- mfspr r7,SPRN_HDSISR
- b 7b
+/*
+ * Check whether an HDSI is an HPTE not found fault or something else.
+ * If it is an HPTE not found fault that is due to the guest accessing
+ * a page that they have mapped but which we have paged out, then
+ * we continue on with the guest exit path. In all other cases,
+ * reflect the HDSI to the guest as a DSI.
+ */
+kvmppc_hdsi:
+ mfspr r4, SPRN_HDAR
+ mfspr r6, SPRN_HDSISR
+ /* HPTE not found fault or protection fault? */
+ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
+ beq 1f /* if not, send it to the guest */
+ andi. r0, r11, MSR_DR /* data relocation enabled? */
+ beq 3f
+ clrrdi r0, r4, 28
+ PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4: std r4, VCPU_FAULT_DAR(r9)
+ stw r6, VCPU_FAULT_DSISR(r9)
+
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ li r7, 1 /* data fault */
+ bl .kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq 6f
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq nohpte_cont
+ cmpdi r3, -2 /* MMIO emulation; need instr word */
+ beq 2f
+
+ /* Synthesize a DSI for the guest */
+ ld r4, VCPU_FAULT_DAR(r9)
+ mr r6, r3
+1: mtspr SPRN_DAR, r4
+ mtspr SPRN_DSISR, r6
+ mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_DATA_STORAGE
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+6: ld r7, VCPU_CTR(r9)
+ lwz r8, VCPU_XER(r9)
+ mtctr r7
+ mtxer r8
+ mr r4, r9
+ b fast_guest_return
+
+3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r5)
+ b 4b
+
+ /* If this is for emulated MMIO, load the instruction word */
+2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */
+
+ /* Set guest mode to 'jump over instruction' so if lwz faults
+ * we'll just continue at the next IP. */
+ li r0, KVM_GUEST_MODE_SKIP
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ /* Do the access with MSR:DR enabled */
+ mfmsr r3
+ ori r4, r3, MSR_DR /* Enable paging for data */
+ mtmsrd r4
+ lwz r8, 0(r10)
+ mtmsrd r3
+
+ /* Store the result */
+ stw r8, VCPU_LAST_INST(r9)
+
+ /* Unset guest mode. */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+ b nohpte_cont
+
+/*
+ * Similarly for an HISI, reflect it to the guest as an ISI unless
+ * it is an HPTE not found fault for a page that we have paged out.
+ */
+kvmppc_hisi:
+ andis. r0, r11, SRR1_ISI_NOPT@h
+ beq 1f
+ andi. r0, r11, MSR_IR /* instruction relocation enabled? */
+ beq 3f
+ clrrdi r0, r10, 28
+ PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */
+ bne 1f /* if no SLB entry found */
+4:
+ /* Search the hash table. */
+ mr r3, r9 /* vcpu pointer */
+ mr r4, r10
+ mr r6, r11
+ li r7, 0 /* instruction fault */
+ bl .kvmppc_hpte_hv_fault
+ ld r9, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_PC(r9)
+ ld r11, VCPU_MSR(r9)
+ li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
+ cmpdi r3, 0 /* retry the instruction */
+ beq 6f
+ cmpdi r3, -1 /* handle in kernel mode */
+ beq nohpte_cont
+
+ /* Synthesize an ISI for the guest */
+ mr r11, r3
+1: mtspr SPRN_SRR0, r10
+ mtspr SPRN_SRR1, r11
+ li r10, BOOK3S_INTERRUPT_INST_STORAGE
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+6: ld r7, VCPU_CTR(r9)
+ lwz r8, VCPU_XER(r9)
+ mtctr r7
+ mtxer r8
+ mr r4, r9
+ b fast_guest_return
+
+3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
+ ld r5, KVM_VRMA_SLB_V(r6)
+ b 4b
/*
* Try to handle an hcall in real mode.
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 7b0ee96c1bed..a59a25a13218 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -24,6 +24,7 @@
#include <asm/kvm_fpu.h>
#include <asm/reg.h>
#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
#include <linux/vmalloc.h>
/* #define DEBUG */
@@ -196,7 +197,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ len, 1);
goto done_load;
}
@@ -286,11 +288,13 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ 4, 1);
vcpu->arch.qpr[rs] = tmp[1];
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1);
+ emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+ 8, 1);
goto done_load;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 220fcdf26978..642d88574b07 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -33,6 +33,7 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
+#include <asm/switch_to.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
@@ -51,15 +52,19 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
+#define __hard_irq_disable local_irq_disable
+#define __hard_irq_enable local_irq_enable
#endif
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
- to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
+ svcpu_put(svcpu);
#endif
#ifdef CONFIG_PPC_BOOK3S_32
@@ -70,10 +75,12 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PPC_BOOK3S_64
- memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
- to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
+ to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
+ svcpu_put(svcpu);
#endif
kvmppc_giveup_ext(vcpu, MSR_FP);
@@ -151,14 +158,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
#ifdef CONFIG_PPC_BOOK3S_64
if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
kvmppc_mmu_book3s_64_init(vcpu);
- to_book3s(vcpu)->hior = 0xfff00000;
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0xfff00000;
to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
} else
#endif
{
kvmppc_mmu_book3s_32_init(vcpu);
- to_book3s(vcpu)->hior = 0;
+ if (!to_book3s(vcpu)->hior_explicit)
+ to_book3s(vcpu)->hior = 0;
to_book3s(vcpu)->msr_mask = 0xffffffffULL;
vcpu->arch.cpu_type = KVM_CPU_3S_32;
}
@@ -308,19 +317,22 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (page_found == -ENOENT) {
/* Page not found in guest PTE entries */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
+ svcpu_put(svcpu);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
- vcpu->arch.shared->dsisr =
- to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+ vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
vcpu->arch.shared->msr |=
- (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+ svcpu->shadow_srr1 & 0x00000000f8000000ULL;
+ svcpu_put(svcpu);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
@@ -517,24 +529,29 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
run->ready_for_interrupt_injection = 1;
trace_kvm_book3s_exit(exit_nr, vcpu);
+ preempt_enable();
kvm_resched(vcpu);
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
vcpu->stat.pf_instruc++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
- == SR_INVALID) {
+ if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
r = RESUME_GUEST;
+ svcpu_put(svcpu);
break;
}
#endif
+ svcpu_put(svcpu);
/* only care about PTEG not found errors, but leave NX alone */
- if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
+ if (shadow_srr1 & 0x40000000) {
r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -547,33 +564,37 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
} else {
- vcpu->arch.shared->msr |=
- to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+ vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
break;
+ }
case BOOK3S_INTERRUPT_DATA_STORAGE:
{
ulong dar = kvmppc_get_fault_dar(vcpu);
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ u32 fault_dsisr = svcpu->fault_dsisr;
vcpu->stat.pf_storage++;
#ifdef CONFIG_PPC_BOOK3S_32
/* We set segments as unused segments when invalidating them. So
* treat the respective fault as segment fault. */
- if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+ if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
kvmppc_mmu_map_segment(vcpu, dar);
r = RESUME_GUEST;
+ svcpu_put(svcpu);
break;
}
#endif
+ svcpu_put(svcpu);
/* The only case we need to handle is missing shadow PTEs */
- if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
+ if (fault_dsisr & DSISR_NOHPTE) {
r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
} else {
vcpu->arch.shared->dar = dar;
- vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+ vcpu->arch.shared->dsisr = fault_dsisr;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
}
@@ -609,10 +630,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_PROGRAM:
{
enum emulation_result er;
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
ulong flags;
program_interrupt:
- flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
+ svcpu = svcpu_get(vcpu);
+ flags = svcpu->shadow_srr1 & 0x1f0000ull;
+ svcpu_put(svcpu);
if (vcpu->arch.shared->msr & MSR_PR) {
#ifdef EXIT_DEBUG
@@ -740,20 +764,33 @@ program_interrupt:
r = RESUME_GUEST;
break;
default:
+ {
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+ ulong shadow_srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
/* Ugh - bork here! What did we get? */
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
- exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
+ exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
r = RESUME_HOST;
BUG();
break;
}
-
+ }
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
* reason. */
+
+ /*
+ * Interrupts could be timers for the guest which we have to
+ * inject again, so let's postpone them until we're in the guest
+ * and if we really did time things so badly, then we just exit
+ * again due to a host external interrupt.
+ */
+ __hard_irq_disable();
if (signal_pending(current)) {
+ __hard_irq_enable();
#ifdef EXIT_DEBUG
printk(KERN_EMERG "KVM: Going back to host\n");
#endif
@@ -761,10 +798,12 @@ program_interrupt:
run->exit_reason = KVM_EXIT_INTR;
r = -EINTR;
} else {
+ preempt_disable();
+
/* In case an interrupt came in that was triggered
* from userspace (like DEC), we need to check what
* to inject now! */
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
}
}
@@ -836,6 +875,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_HIOR:
+ r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr);
+ if (!r)
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
int kvmppc_core_check_processor_compat(void)
{
return 0;
@@ -923,16 +994,31 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
ulong ext_msr;
+ preempt_disable();
+
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ /*
+ * Interrupts could be timers for the guest which we have to inject
+ * again, so let's postpone them until we're in the guest and if we
+ * really did time things so badly, then we just exit again due to
+ * a host external interrupt.
+ */
+ __hard_irq_disable();
+
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
+ __hard_irq_enable();
kvm_run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
+ ret = -EINTR;
+ goto out;
}
/* Save FPU state in stack */
@@ -974,8 +1060,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvm_guest_exit();
- local_irq_disable();
-
current->thread.regs->msr = ext_msr;
/* Make sure we save the guest FPU/Altivec/VSX state */
@@ -1002,9 +1086,50 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
current->thread.used_vsr = used_vsr;
#endif
+out:
+ preempt_enable();
return ret;
}
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm_vcpu *vcpu;
+ ulong ga, ga_end;
+ int is_dirty = 0;
+ int r;
+ unsigned long n;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
+ memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ ga = memslot->base_gfn << PAGE_SHIFT;
+ ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+ kvm_for_each_vcpu(n, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index bb6c988f010a..ee9e1ee9c858 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,12 +124,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
vcpu->arch.shared->msr = new_msr;
kvmppc_mmu_msr_notify(vcpu, old_msr);
-
- if (vcpu->arch.shared->msr & MSR_WE) {
- kvm_vcpu_block(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- };
-
kvmppc_vcpu_sync_spe(vcpu);
}
@@ -258,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
allowed = vcpu->arch.shared->msr & MSR_ME;
msr_mask = 0;
break;
- case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
+ keep_irq = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_EXTERNAL:
allowed = vcpu->arch.shared->msr & MSR_EE;
allowed = allowed && !crit;
msr_mask = MSR_CE|MSR_ME|MSR_DE;
@@ -276,7 +272,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
- vcpu->arch.esr = vcpu->arch.queued_esr;
+ vcpu->arch.shared->esr = vcpu->arch.queued_esr;
if (update_dear == true)
vcpu->arch.shared->dar = vcpu->arch.queued_dear;
kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
@@ -288,13 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
return allowed;
}
-/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+static void update_timer_ints(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
+ kvmppc_core_queue_dec(vcpu);
+ else
+ kvmppc_core_dequeue_dec(vcpu);
+}
+
+static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
- unsigned long old_pending = vcpu->arch.pending_exceptions;
unsigned int priority;
+ if (vcpu->requests) {
+ if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
+ smp_mb();
+ update_timer_ints(vcpu);
+ }
+ }
+
priority = __ffs(*pending);
while (priority <= BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -306,10 +315,24 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
}
/* Tell the guest about our interrupt status */
- if (*pending)
- vcpu->arch.shared->int_pending = 1;
- else if (old_pending)
- vcpu->arch.shared->int_pending = 0;
+ vcpu->arch.shared->int_pending = !!*pending;
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!irqs_disabled());
+
+ kvmppc_core_check_exceptions(vcpu);
+
+ if (vcpu->arch.shared->msr & MSR_WE) {
+ local_irq_enable();
+ kvm_vcpu_block(vcpu);
+ local_irq_disable();
+
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
+ kvmppc_core_check_exceptions(vcpu);
+ };
}
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -322,11 +345,21 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
local_irq_disable();
+
+ kvmppc_core_prepare_to_enter(vcpu);
+
+ if (signal_pending(current)) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ ret = -EINTR;
+ goto out;
+ }
+
kvm_guest_enter();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
kvm_guest_exit();
- local_irq_enable();
+out:
+ local_irq_enable();
return ret;
}
@@ -603,7 +636,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_disable();
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_core_prepare_to_enter(vcpu);
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
@@ -628,6 +661,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.pc = 0;
vcpu->arch.shared->msr = 0;
vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
@@ -662,10 +696,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->sprg1 = vcpu->arch.shared->sprg1;
regs->sprg2 = vcpu->arch.shared->sprg2;
regs->sprg3 = vcpu->arch.shared->sprg3;
- regs->sprg4 = vcpu->arch.sprg4;
- regs->sprg5 = vcpu->arch.sprg5;
- regs->sprg6 = vcpu->arch.sprg6;
- regs->sprg7 = vcpu->arch.sprg7;
+ regs->sprg4 = vcpu->arch.shared->sprg4;
+ regs->sprg5 = vcpu->arch.shared->sprg5;
+ regs->sprg6 = vcpu->arch.shared->sprg6;
+ regs->sprg7 = vcpu->arch.shared->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -690,10 +724,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
vcpu->arch.shared->sprg3 = regs->sprg3;
- vcpu->arch.sprg4 = regs->sprg4;
- vcpu->arch.sprg5 = regs->sprg5;
- vcpu->arch.sprg6 = regs->sprg6;
- vcpu->arch.sprg7 = regs->sprg7;
+ vcpu->arch.shared->sprg4 = regs->sprg4;
+ vcpu->arch.shared->sprg5 = regs->sprg5;
+ vcpu->arch.shared->sprg6 = regs->sprg6;
+ vcpu->arch.shared->sprg7 = regs->sprg7;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -711,7 +745,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
sregs->u.e.csrr0 = vcpu->arch.csrr0;
sregs->u.e.csrr1 = vcpu->arch.csrr1;
sregs->u.e.mcsr = vcpu->arch.mcsr;
- sregs->u.e.esr = vcpu->arch.esr;
+ sregs->u.e.esr = vcpu->arch.shared->esr;
sregs->u.e.dear = vcpu->arch.shared->dar;
sregs->u.e.tsr = vcpu->arch.tsr;
sregs->u.e.tcr = vcpu->arch.tcr;
@@ -729,28 +763,19 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
vcpu->arch.csrr0 = sregs->u.e.csrr0;
vcpu->arch.csrr1 = sregs->u.e.csrr1;
vcpu->arch.mcsr = sregs->u.e.mcsr;
- vcpu->arch.esr = sregs->u.e.esr;
+ vcpu->arch.shared->esr = sregs->u.e.esr;
vcpu->arch.shared->dar = sregs->u.e.dear;
vcpu->arch.vrsave = sregs->u.e.vrsave;
- vcpu->arch.tcr = sregs->u.e.tcr;
+ kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
- if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
+ if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
vcpu->arch.dec = sregs->u.e.dec;
-
- kvmppc_emulate_dec(vcpu);
+ kvmppc_emulate_dec(vcpu);
+ }
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
- /*
- * FIXME: existing KVM timer handling is incomplete.
- * TSR cannot be read by the guest, and its value in
- * vcpu->arch is always zero. For now, just handle
- * the case where the caller is trying to inject a
- * decrementer interrupt.
- */
-
- if ((sregs->u.e.tsr & TSR_DIS) &&
- (vcpu->arch.tcr & TCR_DIE))
- kvmppc_core_queue_dec(vcpu);
+ vcpu->arch.tsr = sregs->u.e.tsr;
+ update_timer_ints(vcpu);
}
return 0;
@@ -761,7 +786,7 @@ static void get_sregs_arch206(struct kvm_vcpu *vcpu,
{
sregs->u.e.features |= KVM_SREGS_E_ARCH206;
- sregs->u.e.pir = 0;
+ sregs->u.e.pir = vcpu->vcpu_id;
sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
sregs->u.e.decar = vcpu->arch.decar;
@@ -774,7 +799,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
return 0;
- if (sregs->u.e.pir != 0)
+ if (sregs->u.e.pir != vcpu->vcpu_id)
return -EINVAL;
vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
@@ -862,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return kvmppc_core_set_sregs(vcpu, sregs);
}
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ return -EINVAL;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ return -EINVAL;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -ENOTSUPP;
@@ -906,6 +941,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
{
}
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
+{
+ vcpu->arch.tcr = new_tcr;
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ set_bits(tsr_bits, &vcpu->arch.tsr);
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
+{
+ clear_bits(tsr_bits, &vcpu->arch.tsr);
+ update_timer_ints(vcpu);
+}
+
+void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ kvmppc_set_tsr_bits(vcpu, TSR_DIS);
+}
+
int __init kvmppc_booke_init(void)
{
unsigned long ivor[16];
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 8e1fe33d64e5..2fe202705a3f 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers;
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
+void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
+void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
+
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 1260f5f24c0c..3e652da36534 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -107,7 +108,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_DEAR:
vcpu->arch.shared->dar = spr_val; break;
case SPRN_ESR:
- vcpu->arch.esr = spr_val; break;
+ vcpu->arch.shared->esr = spr_val; break;
case SPRN_DBCR0:
vcpu->arch.dbcr0 = spr_val; break;
case SPRN_DBCR1:
@@ -115,23 +116,23 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_DBSR:
vcpu->arch.dbsr &= ~spr_val; break;
case SPRN_TSR:
- vcpu->arch.tsr &= ~spr_val; break;
+ kvmppc_clr_tsr_bits(vcpu, spr_val);
+ break;
case SPRN_TCR:
- vcpu->arch.tcr = spr_val;
- kvmppc_emulate_dec(vcpu);
+ kvmppc_set_tcr(vcpu, spr_val);
break;
/* Note: SPRG4-7 are user-readable. These values are
* loaded into the real SPRGs when resuming the
* guest. */
case SPRN_SPRG4:
- vcpu->arch.sprg4 = spr_val; break;
+ vcpu->arch.shared->sprg4 = spr_val; break;
case SPRN_SPRG5:
- vcpu->arch.sprg5 = spr_val; break;
+ vcpu->arch.shared->sprg5 = spr_val; break;
case SPRN_SPRG6:
- vcpu->arch.sprg6 = spr_val; break;
+ vcpu->arch.shared->sprg6 = spr_val; break;
case SPRN_SPRG7:
- vcpu->arch.sprg7 = spr_val; break;
+ vcpu->arch.shared->sprg7 = spr_val; break;
case SPRN_IVPR:
vcpu->arch.ivpr = spr_val;
@@ -202,13 +203,17 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_DEAR:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
case SPRN_ESR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break;
case SPRN_DBCR0:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
case SPRN_DBCR1:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
case SPRN_DBSR:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
+ case SPRN_TSR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
+ case SPRN_TCR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
case SPRN_IVOR0:
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 42f2fb1f66e9..10d8ef602e5c 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -402,19 +402,25 @@ lightweight_exit:
/* Save vcpu pointer for the exception handlers. */
mtspr SPRN_SPRG_WVCPU, r4
+ lwz r5, VCPU_SHARED(r4)
+
/* Can't switch the stack pointer until after IVPR is switched,
* because host interrupt handlers would get confused. */
lwz r1, VCPU_GPR(r1)(r4)
- /* Host interrupt handlers may have clobbered these guest-readable
- * SPRGs, so we need to reload them here with the guest's values. */
- lwz r3, VCPU_SPRG4(r4)
+ /*
+ * Host interrupt handlers may have clobbered these
+ * guest-readable SPRGs, or the guest kernel may have
+ * written directly to the shared area, so we
+ * need to reload them here with the guest's values.
+ */
+ lwz r3, VCPU_SHARED_SPRG4(r5)
mtspr SPRN_SPRG4W, r3
- lwz r3, VCPU_SPRG5(r4)
+ lwz r3, VCPU_SHARED_SPRG5(r5)
mtspr SPRN_SPRG5W, r3
- lwz r3, VCPU_SPRG6(r4)
+ lwz r3, VCPU_SHARED_SPRG6(r5)
mtspr SPRN_SPRG6W, r3
- lwz r3, VCPU_SPRG7(r4)
+ lwz r3, VCPU_SHARED_SPRG7(r5)
mtspr SPRN_SPRG7W, r3
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 8c0d45a6faf7..ddcd896fa2ff 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -71,9 +71,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
- /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
- vcpu->vcpu_id = 0;
-
vcpu->arch.cpu_type = KVM_CPU_E500V2;
return 0;
@@ -118,12 +115,12 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
- sregs->u.e.mas0 = vcpu_e500->mas0;
- sregs->u.e.mas1 = vcpu_e500->mas1;
- sregs->u.e.mas2 = vcpu_e500->mas2;
- sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
- sregs->u.e.mas4 = vcpu_e500->mas4;
- sregs->u.e.mas6 = vcpu_e500->mas6;
+ sregs->u.e.mas0 = vcpu->arch.shared->mas0;
+ sregs->u.e.mas1 = vcpu->arch.shared->mas1;
+ sregs->u.e.mas2 = vcpu->arch.shared->mas2;
+ sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3;
+ sregs->u.e.mas4 = vcpu->arch.shared->mas4;
+ sregs->u.e.mas6 = vcpu->arch.shared->mas6;
sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
@@ -151,13 +148,12 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
}
if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
- vcpu_e500->mas0 = sregs->u.e.mas0;
- vcpu_e500->mas1 = sregs->u.e.mas1;
- vcpu_e500->mas2 = sregs->u.e.mas2;
- vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
- vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
- vcpu_e500->mas4 = sregs->u.e.mas4;
- vcpu_e500->mas6 = sregs->u.e.mas6;
+ vcpu->arch.shared->mas0 = sregs->u.e.mas0;
+ vcpu->arch.shared->mas1 = sregs->u.e.mas1;
+ vcpu->arch.shared->mas2 = sregs->u.e.mas2;
+ vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3;
+ vcpu->arch.shared->mas4 = sregs->u.e.mas4;
+ vcpu->arch.shared->mas6 = sregs->u.e.mas6;
}
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
@@ -233,6 +229,10 @@ static int __init kvmppc_e500_init(void)
unsigned long ivor[3];
unsigned long max_ivor = 0;
+ r = kvmppc_core_check_processor_compat();
+ if (r)
+ return r;
+
r = kvmppc_booke_init();
if (r)
return r;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index d48ae396f41e..6d0b2bd54fb0 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,19 +89,23 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
return EMULATE_FAIL;
vcpu_e500->pid[2] = spr_val; break;
case SPRN_MAS0:
- vcpu_e500->mas0 = spr_val; break;
+ vcpu->arch.shared->mas0 = spr_val; break;
case SPRN_MAS1:
- vcpu_e500->mas1 = spr_val; break;
+ vcpu->arch.shared->mas1 = spr_val; break;
case SPRN_MAS2:
- vcpu_e500->mas2 = spr_val; break;
+ vcpu->arch.shared->mas2 = spr_val; break;
case SPRN_MAS3:
- vcpu_e500->mas3 = spr_val; break;
+ vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= spr_val;
+ break;
case SPRN_MAS4:
- vcpu_e500->mas4 = spr_val; break;
+ vcpu->arch.shared->mas4 = spr_val; break;
case SPRN_MAS6:
- vcpu_e500->mas6 = spr_val; break;
+ vcpu->arch.shared->mas6 = spr_val; break;
case SPRN_MAS7:
- vcpu_e500->mas7 = spr_val; break;
+ vcpu->arch.shared->mas7_3 &= (u64)0xffffffff;
+ vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32;
+ break;
case SPRN_L1CSR0:
vcpu_e500->l1csr0 = spr_val;
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
@@ -143,6 +147,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
+ unsigned long val;
switch (sprn) {
case SPRN_PID:
@@ -152,20 +157,23 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
case SPRN_PID2:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
case SPRN_MAS0:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break;
case SPRN_MAS1:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break;
case SPRN_MAS2:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break;
case SPRN_MAS3:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
+ val = (u32)vcpu->arch.shared->mas7_3;
+ kvmppc_set_gpr(vcpu, rt, val);
+ break;
case SPRN_MAS4:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break;
case SPRN_MAS6:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break;
case SPRN_MAS7:
- kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
-
+ val = vcpu->arch.shared->mas7_3 >> 32;
+ kvmppc_set_gpr(vcpu, rt, val);
+ break;
case SPRN_TLB0CFG:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
case SPRN_TLB1CFG:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 13c432ea2fa8..6e53e4164de1 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -12,12 +12,19 @@
* published by the Free Software Foundation.
*/
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_e500.h>
@@ -26,7 +33,7 @@
#include "trace.h"
#include "timing.h"
-#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
struct id {
unsigned long val;
@@ -63,7 +70,14 @@ static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids);
* The valid range of shadow ID is [1..255] */
static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid);
-static unsigned int tlb1_entry_num;
+static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
+
+static struct kvm_book3e_206_tlb_entry *get_entry(
+ struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry)
+{
+ int offset = vcpu_e500->gtlb_offset[tlbsel];
+ return &vcpu_e500->gtlb_arch[offset + entry];
+}
/*
* Allocate a free shadow id and setup a valid sid mapping in given entry.
@@ -116,13 +130,11 @@ static inline int local_sid_lookup(struct id *entry)
return -1;
}
-/* Invalidate all id mappings on local core */
+/* Invalidate all id mappings on local core -- call with preempt disabled */
static inline void local_sid_destroy_all(void)
{
- preempt_disable();
__get_cpu_var(pcpu_last_used_sid) = 0;
memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
- preempt_enable();
}
static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -218,34 +230,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500)
preempt_enable();
}
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe *tlbe;
- int i, tlbsel;
-
- printk("| %8s | %8s | %8s | %8s | %8s |\n",
- "nr", "mas1", "mas2", "mas3", "mas7");
-
- for (tlbsel = 0; tlbsel < 2; tlbsel++) {
- printk("Guest TLB%d:\n", tlbsel);
- for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) {
- tlbe = &vcpu_e500->gtlb_arch[tlbsel][i];
- if (tlbe->mas1 & MAS1_VALID)
- printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
- tlbsel, i, tlbe->mas1, tlbe->mas2,
- tlbe->mas3, tlbe->mas7);
- }
- }
-}
-
-static inline unsigned int tlb0_get_next_victim(
+static inline unsigned int gtlb0_get_next_victim(
struct kvmppc_vcpu_e500 *vcpu_e500)
{
unsigned int victim;
victim = vcpu_e500->gtlb_nv[0]++;
- if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+ if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways))
vcpu_e500->gtlb_nv[0] = 0;
return victim;
@@ -254,12 +245,12 @@ static inline unsigned int tlb0_get_next_victim(
static inline unsigned int tlb1_max_shadow_size(void)
{
/* reserve one entry for magic page */
- return tlb1_entry_num - tlbcam_index - 1;
+ return host_tlb_params[1].entries - tlbcam_index - 1;
}
-static inline int tlbe_is_writable(struct tlbe *tlbe)
+static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
{
- return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+ return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
}
static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
@@ -290,40 +281,66 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
/*
* writing shadow tlb entry to host TLB
*/
-static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0)
+static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
+ uint32_t mas0)
{
unsigned long flags;
local_irq_save(flags);
mtspr(SPRN_MAS0, mas0);
mtspr(SPRN_MAS1, stlbe->mas1);
- mtspr(SPRN_MAS2, stlbe->mas2);
- mtspr(SPRN_MAS3, stlbe->mas3);
- mtspr(SPRN_MAS7, stlbe->mas7);
+ mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2);
+ mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
+ mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
asm volatile("isync; tlbwe" : : : "memory");
local_irq_restore(flags);
+
+ trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1,
+ stlbe->mas2, stlbe->mas7_3);
+}
+
+/*
+ * Acquire a mas0 with victim hint, as if we just took a TLB miss.
+ *
+ * We don't care about the address we're searching for, other than that it's
+ * in the right set and is not present in the TLB. Using a zero PID and a
+ * userspace address means we don't have to set and then restore MAS5, or
+ * calculate a proper MAS6 value.
+ */
+static u32 get_host_mas0(unsigned long eaddr)
+{
+ unsigned long flags;
+ u32 mas0;
+
+ local_irq_save(flags);
+ mtspr(SPRN_MAS6, 0);
+ asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
+ mas0 = mfspr(SPRN_MAS0);
+ local_irq_restore(flags);
+
+ return mas0;
}
+/* sesel is for tlb1 only */
static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel, struct tlbe *stlbe)
+ int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe)
{
+ u32 mas0;
+
if (tlbsel == 0) {
- __write_host_tlbe(stlbe,
- MAS0_TLBSEL(0) |
- MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1)));
+ mas0 = get_host_mas0(stlbe->mas2);
+ __write_host_tlbe(stlbe, mas0);
} else {
__write_host_tlbe(stlbe,
MAS0_TLBSEL(1) |
- MAS0_ESEL(to_htlb1_esel(esel)));
+ MAS0_ESEL(to_htlb1_esel(sesel)));
}
- trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
- stlbe->mas3, stlbe->mas7);
}
void kvmppc_map_magic(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe magic;
+ struct kvm_book3e_206_tlb_entry magic;
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
unsigned int stid;
pfn_t pfn;
@@ -337,9 +354,9 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) |
MAS1_TSIZE(BOOK3E_PAGESZ_4K);
magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M;
- magic.mas3 = (pfn << PAGE_SHIFT) |
- MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
- magic.mas7 = pfn >> (32 - PAGE_SHIFT);
+ magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) |
+ MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
+ magic.mas8 = 0;
__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
preempt_enable();
@@ -357,10 +374,11 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
{
}
-static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel, int esel)
+static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
{
- struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
struct vcpu_id_table *idt = vcpu_e500->idt;
unsigned int pr, tid, ts, pid;
u32 val, eaddr;
@@ -414,25 +432,57 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
preempt_enable();
}
+static int tlb0_set_base(gva_t addr, int sets, int ways)
+{
+ int set_base;
+
+ set_base = (addr >> PAGE_SHIFT) & (sets - 1);
+ set_base *= ways;
+
+ return set_base;
+}
+
+static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr)
+{
+ return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets,
+ vcpu_e500->gtlb_params[0].ways);
+}
+
+static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel = get_tlb_esel_bit(vcpu);
+
+ if (tlbsel == 0) {
+ esel &= vcpu_e500->gtlb_params[0].ways - 1;
+ esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2);
+ } else {
+ esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1;
+ }
+
+ return esel;
+}
+
/* Search the guest TLB for a matching entry. */
static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
gva_t eaddr, int tlbsel, unsigned int pid, int as)
{
- int size = vcpu_e500->gtlb_size[tlbsel];
- int set_base;
+ int size = vcpu_e500->gtlb_params[tlbsel].entries;
+ unsigned int set_base, offset;
int i;
if (tlbsel == 0) {
- int mask = size / KVM_E500_TLB0_WAY_NUM - 1;
- set_base = (eaddr >> PAGE_SHIFT) & mask;
- set_base *= KVM_E500_TLB0_WAY_NUM;
- size = KVM_E500_TLB0_WAY_NUM;
+ set_base = gtlb0_set_base(vcpu_e500, eaddr);
+ size = vcpu_e500->gtlb_params[0].ways;
} else {
set_base = 0;
}
+ offset = vcpu_e500->gtlb_offset[tlbsel];
+
for (i = 0; i < size; i++) {
- struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i];
+ struct kvm_book3e_206_tlb_entry *tlbe =
+ &vcpu_e500->gtlb_arch[offset + set_base + i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@@ -457,27 +507,55 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
return -1;
}
-static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv,
- struct tlbe *gtlbe,
- pfn_t pfn)
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ pfn_t pfn)
{
- priv->pfn = pfn;
- priv->flags = E500_TLB_VALID;
+ ref->pfn = pfn;
+ ref->flags = E500_TLB_VALID;
if (tlbe_is_writable(gtlbe))
- priv->flags |= E500_TLB_DIRTY;
+ ref->flags |= E500_TLB_DIRTY;
}
-static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv)
+static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
{
- if (priv->flags & E500_TLB_VALID) {
- if (priv->flags & E500_TLB_DIRTY)
- kvm_release_pfn_dirty(priv->pfn);
+ if (ref->flags & E500_TLB_VALID) {
+ if (ref->flags & E500_TLB_DIRTY)
+ kvm_release_pfn_dirty(ref->pfn);
else
- kvm_release_pfn_clean(priv->pfn);
+ kvm_release_pfn_clean(ref->pfn);
+
+ ref->flags = 0;
+ }
+}
+
+static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int tlbsel = 0;
+ int i;
+
+ for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) {
+ struct tlbe_ref *ref =
+ &vcpu_e500->gtlb_priv[tlbsel][i].ref;
+ kvmppc_e500_ref_release(ref);
+ }
+}
+
+static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int stlbsel = 1;
+ int i;
+
+ kvmppc_e500_id_table_reset_all(vcpu_e500);
- priv->flags = 0;
+ for (i = 0; i < host_tlb_params[stlbsel].entries; i++) {
+ struct tlbe_ref *ref =
+ &vcpu_e500->tlb_refs[stlbsel][i];
+ kvmppc_e500_ref_release(ref);
}
+
+ clear_tlb_privs(vcpu_e500);
}
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
@@ -488,59 +566,54 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
int tlbsel;
/* since we only have two TLBs, only lower bit is used. */
- tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
- victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
- pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
- tsized = (vcpu_e500->mas4 >> 7) & 0x1f;
+ tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
+ pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf;
+ tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f;
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
- vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+ vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
| MAS1_TID(vcpu_e500->pid[pidsel])
| MAS1_TSIZE(tsized);
- vcpu_e500->mas2 = (eaddr & MAS2_EPN)
- | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
- vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
- vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
+ vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN)
+ | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK);
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1)
| (get_cur_pid(vcpu) << 16)
| (as ? MAS6_SAS : 0);
- vcpu_e500->mas7 = 0;
}
-static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
- struct tlbe *gtlbe, int tsize,
- struct tlbe_priv *priv,
- u64 gvaddr, struct tlbe *stlbe)
+/* TID must be supplied by the caller */
+static inline void kvmppc_e500_setup_stlbe(
+ struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tsize, struct tlbe_ref *ref, u64 gvaddr,
+ struct kvm_book3e_206_tlb_entry *stlbe)
{
- pfn_t pfn = priv->pfn;
- unsigned int stid;
+ pfn_t pfn = ref->pfn;
- stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
- get_tlb_tid(gtlbe),
- get_cur_pr(&vcpu_e500->vcpu), 0);
+ BUG_ON(!(ref->flags & E500_TLB_VALID));
/* Force TS=1 IPROT=0 for all guest mappings. */
- stlbe->mas1 = MAS1_TSIZE(tsize)
- | MAS1_TID(stid) | MAS1_TS | MAS1_VALID;
+ stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe->mas2,
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN)
- | e500_shadow_mas3_attrib(gtlbe->mas3,
+ stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT)
+ | e500_shadow_mas3_attrib(gtlbe->mas7_3,
vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
- stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN;
}
-
static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel,
- struct tlbe *stlbe)
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe,
+ struct tlbe_ref *ref)
{
struct kvm_memory_slot *slot;
unsigned long pfn, hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
- struct tlbe_priv *priv;
/*
* Translate guest physical to true physical, acquiring
@@ -621,12 +694,31 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
pfn &= ~(tsize_pages - 1);
break;
}
+ } else if (vma && hva >= vma->vm_start &&
+ (vma->vm_flags & VM_HUGETLB)) {
+ unsigned long psize = vma_kernel_pagesize(vma);
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * Take the largest page size that satisfies both host
+ * and guest mapping
+ */
+ tsize = min(__ilog2(psize) - 10, tsize);
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
}
up_read(&current->mm->mmap_sem);
}
if (likely(!pfnmap)) {
+ unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
if (is_error_pfn(pfn)) {
printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
@@ -634,45 +726,52 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
kvm_release_pfn_clean(pfn);
return;
}
+
+ /* Align guest and physical address to page map boundaries */
+ pfn &= ~(tsize_pages - 1);
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
}
- /* Drop old priv and setup new one. */
- priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
- kvmppc_e500_priv_release(priv);
- kvmppc_e500_priv_setup(priv, gtlbe, pfn);
+ /* Drop old ref and setup new one. */
+ kvmppc_e500_ref_release(ref);
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn);
- kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe);
+ kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe);
}
/* XXX only map the one-one case, for now use TLB0 */
-static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- int esel, struct tlbe *stlbe)
+static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int esel,
+ struct kvm_book3e_206_tlb_entry *stlbe)
{
- struct tlbe *gtlbe;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ struct tlbe_ref *ref;
- gtlbe = &vcpu_e500->gtlb_arch[0][esel];
+ gtlbe = get_entry(vcpu_e500, 0, esel);
+ ref = &vcpu_e500->gtlb_priv[0][esel].ref;
kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
- gtlbe, 0, esel, stlbe);
-
- return esel;
+ gtlbe, 0, stlbe, ref);
}
/* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB. */
/* XXX for both one-one and one-to-many , for now use TLB1 */
static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
- u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe)
+ u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe)
{
+ struct tlbe_ref *ref;
unsigned int victim;
- victim = vcpu_e500->gtlb_nv[1]++;
+ victim = vcpu_e500->host_tlb1_nv++;
- if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size()))
- vcpu_e500->gtlb_nv[1] = 0;
+ if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size()))
+ vcpu_e500->host_tlb1_nv = 0;
- kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe);
+ ref = &vcpu_e500->tlb_refs[1][victim];
+ kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref);
return victim;
}
@@ -689,7 +788,8 @@ static inline int kvmppc_e500_gtlbe_invalidate(
struct kvmppc_vcpu_e500 *vcpu_e500,
int tlbsel, int esel)
{
- struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ struct kvm_book3e_206_tlb_entry *gtlbe =
+ get_entry(vcpu_e500, tlbsel, esel);
if (unlikely(get_tlb_iprot(gtlbe)))
return -1;
@@ -704,10 +804,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
int esel;
if (value & MMUCSR0_TLB0FI)
- for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel);
if (value & MMUCSR0_TLB1FI)
- for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel);
/* Invalidate all vcpu id mappings */
@@ -732,7 +832,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
if (ia) {
/* invalidate all entries */
- for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++)
+ for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries;
+ esel++)
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
} else {
ea &= 0xfffff000;
@@ -752,18 +853,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int tlbsel, esel;
- struct tlbe *gtlbe;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
- tlbsel = get_tlb_tlbsel(vcpu_e500);
- esel = get_tlb_esel(vcpu_e500, tlbsel);
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
- gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
- vcpu_e500->mas0 &= ~MAS0_NV(~0);
- vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
- vcpu_e500->mas1 = gtlbe->mas1;
- vcpu_e500->mas2 = gtlbe->mas2;
- vcpu_e500->mas3 = gtlbe->mas3;
- vcpu_e500->mas7 = gtlbe->mas7;
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
+ vcpu->arch.shared->mas0 &= ~MAS0_NV(~0);
+ vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
return EMULATE_DONE;
}
@@ -771,10 +871,10 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- int as = !!get_cur_sas(vcpu_e500);
- unsigned int pid = get_cur_spid(vcpu_e500);
+ int as = !!get_cur_sas(vcpu);
+ unsigned int pid = get_cur_spid(vcpu);
int esel, tlbsel;
- struct tlbe *gtlbe = NULL;
+ struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
gva_t ea;
ea = kvmppc_get_gpr(vcpu, rb);
@@ -782,70 +882,90 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
if (esel >= 0) {
- gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
break;
}
}
if (gtlbe) {
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+ esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1;
+
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
- vcpu_e500->mas1 = gtlbe->mas1;
- vcpu_e500->mas2 = gtlbe->mas2;
- vcpu_e500->mas3 = gtlbe->mas3;
- vcpu_e500->mas7 = gtlbe->mas7;
+ vcpu->arch.shared->mas1 = gtlbe->mas1;
+ vcpu->arch.shared->mas2 = gtlbe->mas2;
+ vcpu->arch.shared->mas7_3 = gtlbe->mas7_3;
} else {
int victim;
/* since we only have two TLBs, only lower bit is used. */
- tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
- victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+ tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1;
+ victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0;
- vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel)
+ | MAS0_ESEL(victim)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
- vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
- | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
- | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
- vcpu_e500->mas2 &= MAS2_EPN;
- vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
- vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
- vcpu_e500->mas7 = 0;
+ vcpu->arch.shared->mas1 =
+ (vcpu->arch.shared->mas6 & MAS6_SPID0)
+ | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
+ vcpu->arch.shared->mas2 &= MAS2_EPN;
+ vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
+ MAS2_ATTRIB_MASK;
+ vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 |
+ MAS3_U2 | MAS3_U3;
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
}
+/* sesel is for tlb1 only */
+static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ struct kvm_book3e_206_tlb_entry *gtlbe,
+ struct kvm_book3e_206_tlb_entry *stlbe,
+ int stlbsel, int sesel)
+{
+ int stid;
+
+ preempt_disable();
+ stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe),
+ get_tlb_tid(gtlbe),
+ get_cur_pr(&vcpu_e500->vcpu), 0);
+
+ stlbe->mas1 |= MAS1_TID(stid);
+ write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe);
+ preempt_enable();
+}
+
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe *gtlbe;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
int tlbsel, esel;
- tlbsel = get_tlb_tlbsel(vcpu_e500);
- esel = get_tlb_esel(vcpu_e500, tlbsel);
+ tlbsel = get_tlb_tlbsel(vcpu);
+ esel = get_tlb_esel(vcpu, tlbsel);
- gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
if (get_tlb_v(gtlbe))
- kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
+ inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
- gtlbe->mas1 = vcpu_e500->mas1;
- gtlbe->mas2 = vcpu_e500->mas2;
- gtlbe->mas3 = vcpu_e500->mas3;
- gtlbe->mas7 = vcpu_e500->mas7;
+ gtlbe->mas1 = vcpu->arch.shared->mas1;
+ gtlbe->mas2 = vcpu->arch.shared->mas2;
+ gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
- trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2,
- gtlbe->mas3, gtlbe->mas7);
+ trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
+ gtlbe->mas2, gtlbe->mas7_3);
/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
if (tlbe_is_host_safe(vcpu, gtlbe)) {
- struct tlbe stlbe;
+ struct kvm_book3e_206_tlb_entry stlbe;
int stlbsel, sesel;
u64 eaddr;
u64 raddr;
- preempt_disable();
switch (tlbsel) {
case 0:
/* TLB0 */
@@ -853,7 +973,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
stlbsel = 0;
- sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ sesel = 0; /* unused */
break;
@@ -874,8 +995,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
default:
BUG();
}
- write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
- preempt_enable();
+
+ write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
@@ -914,9 +1035,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
gva_t eaddr)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
- struct tlbe *gtlbe =
- &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)];
- u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+ struct kvm_book3e_206_tlb_entry *gtlbe;
+ u64 pgmask;
+
+ gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index));
+ pgmask = get_tlb_bytes(gtlbe) - 1;
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
}
@@ -930,22 +1053,21 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
struct tlbe_priv *priv;
- struct tlbe *gtlbe, stlbe;
+ struct kvm_book3e_206_tlb_entry *gtlbe, stlbe;
int tlbsel = tlbsel_of(index);
int esel = esel_of(index);
int stlbsel, sesel;
- gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel];
+ gtlbe = get_entry(vcpu_e500, tlbsel, esel);
- preempt_disable();
switch (tlbsel) {
case 0:
stlbsel = 0;
- sesel = esel;
- priv = &vcpu_e500->gtlb_priv[stlbsel][sesel];
+ sesel = 0; /* unused */
+ priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K,
- priv, eaddr, &stlbe);
+ &priv->ref, eaddr, &stlbe);
break;
case 1: {
@@ -962,8 +1084,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
break;
}
- write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe);
- preempt_enable();
+ write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
@@ -993,85 +1114,279 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
{
- struct tlbe *tlbe;
+ struct kvm_book3e_206_tlb_entry *tlbe;
/* Insert large initial mapping for guest. */
- tlbe = &vcpu_e500->gtlb_arch[1][0];
+ tlbe = get_entry(vcpu_e500, 1, 0);
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
tlbe->mas2 = 0;
- tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
- tlbe->mas7 = 0;
+ tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK;
/* 4K map for serial output. Used by kernel wrapper. */
- tlbe = &vcpu_e500->gtlb_arch[1][1];
+ tlbe = get_entry(vcpu_e500, 1, 1);
tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
- tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
- tlbe->mas7 = 0;
+ tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+}
+
+static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ int i;
+
+ clear_tlb_refs(vcpu_e500);
+ kfree(vcpu_e500->gtlb_priv[0]);
+ kfree(vcpu_e500->gtlb_priv[1]);
+
+ if (vcpu_e500->shared_tlb_pages) {
+ vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch,
+ PAGE_SIZE)));
+
+ for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) {
+ set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]);
+ put_page(vcpu_e500->shared_tlb_pages[i]);
+ }
+
+ vcpu_e500->num_shared_tlb_pages = 0;
+ vcpu_e500->shared_tlb_pages = NULL;
+ } else {
+ kfree(vcpu_e500->gtlb_arch);
+ }
+
+ vcpu_e500->gtlb_arch = NULL;
+}
+
+int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_config_tlb *cfg)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct kvm_book3e_206_tlb_params params;
+ char *virt;
+ struct page **pages;
+ struct tlbe_priv *privs[2] = {};
+ size_t array_len;
+ u32 sets;
+ int num_pages, ret, i;
+
+ if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV)
+ return -EINVAL;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)cfg->params,
+ sizeof(params)))
+ return -EFAULT;
+
+ if (params.tlb_sizes[1] > 64)
+ return -EINVAL;
+ if (params.tlb_ways[1] != params.tlb_sizes[1])
+ return -EINVAL;
+ if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0)
+ return -EINVAL;
+ if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0)
+ return -EINVAL;
+
+ if (!is_power_of_2(params.tlb_ways[0]))
+ return -EINVAL;
+
+ sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]);
+ if (!is_power_of_2(sets))
+ return -EINVAL;
+
+ array_len = params.tlb_sizes[0] + params.tlb_sizes[1];
+ array_len *= sizeof(struct kvm_book3e_206_tlb_entry);
+
+ if (cfg->array_len < array_len)
+ return -EINVAL;
+
+ num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
+ cfg->array / PAGE_SIZE;
+ pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+ if (ret < 0)
+ goto err_pages;
+
+ if (ret != num_pages) {
+ num_pages = ret;
+ ret = -EFAULT;
+ goto err_put_page;
+ }
+
+ virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
+ if (!virt)
+ goto err_put_page;
+
+ privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
+ GFP_KERNEL);
+ privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
+ GFP_KERNEL);
+
+ if (!privs[0] || !privs[1])
+ goto err_put_page;
+
+ free_gtlb(vcpu_e500);
+
+ vcpu_e500->gtlb_priv[0] = privs[0];
+ vcpu_e500->gtlb_priv[1] = privs[1];
+
+ vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *)
+ (virt + (cfg->array & (PAGE_SIZE - 1)));
+
+ vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0];
+ vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1];
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0];
+
+ vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ if (params.tlb_sizes[0] <= 2048)
+ vcpu_e500->tlb0cfg |= params.tlb_sizes[0];
+ vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb1cfg |= params.tlb_sizes[1];
+ vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->shared_tlb_pages = pages;
+ vcpu_e500->num_shared_tlb_pages = num_pages;
+
+ vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0];
+ vcpu_e500->gtlb_params[0].sets = sets;
+
+ vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1];
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ return 0;
+
+err_put_page:
+ kfree(privs[0]);
+ kfree(privs[1]);
+
+ for (i = 0; i < num_pages; i++)
+ put_page(pages[i]);
+
+err_pages:
+ kfree(pages);
+ return ret;
+}
+
+int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
+ struct kvm_dirty_tlb *dirty)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ clear_tlb_refs(vcpu_e500);
+ return 0;
}
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
- tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
-
- vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE;
- vcpu_e500->gtlb_arch[0] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->gtlb_arch[0] == NULL)
- goto err_out;
-
- vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE;
- vcpu_e500->gtlb_arch[1] =
- kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
- if (vcpu_e500->gtlb_arch[1] == NULL)
- goto err_out_guest0;
-
- vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *)
- kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
- if (vcpu_e500->gtlb_priv[0] == NULL)
- goto err_out_guest1;
- vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *)
- kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
-
- if (vcpu_e500->gtlb_priv[1] == NULL)
- goto err_out_priv0;
+ int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
+ int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
+
+ host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY;
+ host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ /*
+ * This should never happen on real e500 hardware, but is
+ * architecturally possible -- e.g. in some weird nested
+ * virtualization case.
+ */
+ if (host_tlb_params[0].entries == 0 ||
+ host_tlb_params[1].entries == 0) {
+ pr_err("%s: need to know host tlb size\n", __func__);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >>
+ TLBnCFG_ASSOC_SHIFT;
+ host_tlb_params[1].ways = host_tlb_params[1].entries;
+
+ if (!is_power_of_2(host_tlb_params[0].entries) ||
+ !is_power_of_2(host_tlb_params[0].ways) ||
+ host_tlb_params[0].entries < host_tlb_params[0].ways ||
+ host_tlb_params[0].ways == 0) {
+ pr_err("%s: bad tlb0 host config: %u entries %u ways\n",
+ __func__, host_tlb_params[0].entries,
+ host_tlb_params[0].ways);
+ return -ENODEV;
+ }
+
+ host_tlb_params[0].sets =
+ host_tlb_params[0].entries / host_tlb_params[0].ways;
+ host_tlb_params[1].sets = 1;
+
+ vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
+ vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
+
+ vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM;
+ vcpu_e500->gtlb_params[0].sets =
+ KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM;
+
+ vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
+ vcpu_e500->gtlb_params[1].sets = 1;
+
+ vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+ if (!vcpu_e500->gtlb_arch)
+ return -ENOMEM;
+
+ vcpu_e500->gtlb_offset[0] = 0;
+ vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
+
+ vcpu_e500->tlb_refs[0] =
+ kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->tlb_refs[0])
+ goto err;
+
+ vcpu_e500->tlb_refs[1] =
+ kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->tlb_refs[1])
+ goto err;
+
+ vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[0].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[0])
+ goto err;
+
+ vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
+ vcpu_e500->gtlb_params[1].entries,
+ GFP_KERNEL);
+ if (!vcpu_e500->gtlb_priv[1])
+ goto err;
if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
- goto err_out_priv1;
+ goto err;
/* Init TLB configuration register */
- vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
- vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0];
- vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
- vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1];
+ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries;
+ vcpu_e500->tlb0cfg |=
+ vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT;
+
+ vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) &
+ ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC);
+ vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries;
+ vcpu_e500->tlb0cfg |=
+ vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT;
return 0;
-err_out_priv1:
- kfree(vcpu_e500->gtlb_priv[1]);
-err_out_priv0:
- kfree(vcpu_e500->gtlb_priv[0]);
-err_out_guest1:
- kfree(vcpu_e500->gtlb_arch[1]);
-err_out_guest0:
- kfree(vcpu_e500->gtlb_arch[0]);
-err_out:
+err:
+ free_gtlb(vcpu_e500);
+ kfree(vcpu_e500->tlb_refs[0]);
+ kfree(vcpu_e500->tlb_refs[1]);
return -1;
}
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
{
- int stlbsel, i;
-
- /* release all privs */
- for (stlbsel = 0; stlbsel < 2; stlbsel++)
- for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) {
- struct tlbe_priv *priv =
- &vcpu_e500->gtlb_priv[stlbsel][i];
- kvmppc_e500_priv_release(priv);
- }
-
+ free_gtlb(vcpu_e500);
kvmppc_e500_id_table_free(vcpu_e500);
- kfree(vcpu_e500->gtlb_arch[1]);
- kfree(vcpu_e500->gtlb_arch[0]);
+
+ kfree(vcpu_e500->tlb_refs[0]);
+ kfree(vcpu_e500->tlb_refs[1]);
}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 59b88e99a235..5c6d2d7bf058 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -20,13 +20,9 @@
#include <asm/tlb.h>
#include <asm/kvm_e500.h>
-#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
-#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
-#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
-
-#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
-#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
-#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
+/* This geometry is the legacy default -- can be overridden by userspace */
+#define KVM_E500_TLB0_WAY_SIZE 128
+#define KVM_E500_TLB0_WAY_NUM 2
#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
#define KVM_E500_TLB1_SIZE 16
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *);
/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return (tlbe->mas1 >> 7) & 0x1f;
}
-static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return tlbe->mas2 & 0xfffff000;
}
-static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1ULL << 10 << pgsize;
}
-static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe)
{
u64 bytes = get_tlb_bytes(tlbe);
return get_tlb_eaddr(tlbe) + bytes - 1;
}
-static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe)
{
- u64 rpn = tlbe->mas7;
- return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+ return tlbe->mas7_3 & ~0xfffULL;
}
-static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return (tlbe->mas1 >> 16) & 0xff;
}
-static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return (tlbe->mas1 >> 12) & 0x1;
}
-static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return (tlbe->mas1 >> 31) & 0x1;
}
-static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+static inline unsigned int
+get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe)
{
return (tlbe->mas1 >> 30) & 0x1;
}
@@ -121,59 +121,37 @@ static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu)
return !!(vcpu->arch.shared->msr & MSR_PR);
}
-static inline unsigned int get_cur_spid(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
+static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu)
{
- return (vcpu_e500->mas6 >> 16) & 0xff;
+ return (vcpu->arch.shared->mas6 >> 16) & 0xff;
}
-static inline unsigned int get_cur_sas(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
+static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu)
{
- return vcpu_e500->mas6 & 0x1;
+ return vcpu->arch.shared->mas6 & 0x1;
}
-static inline unsigned int get_tlb_tlbsel(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
+static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu)
{
/*
* Manual says that tlbsel has 2 bits wide.
* Since we only have two TLBs, only lower bit is used.
*/
- return (vcpu_e500->mas0 >> 28) & 0x1;
-}
-
-static inline unsigned int get_tlb_nv_bit(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
-{
- return vcpu_e500->mas0 & 0xfff;
+ return (vcpu->arch.shared->mas0 >> 28) & 0x1;
}
-static inline unsigned int get_tlb_esel_bit(
- const struct kvmppc_vcpu_e500 *vcpu_e500)
+static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu)
{
- return (vcpu_e500->mas0 >> 16) & 0xfff;
+ return vcpu->arch.shared->mas0 & 0xfff;
}
-static inline unsigned int get_tlb_esel(
- const struct kvmppc_vcpu_e500 *vcpu_e500,
- int tlbsel)
+static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu)
{
- unsigned int esel = get_tlb_esel_bit(vcpu_e500);
-
- if (tlbsel == 0) {
- esel &= KVM_E500_TLB0_WAY_NUM_MASK;
- esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
- << KVM_E500_TLB0_WAY_NUM_BIT;
- } else {
- esel &= KVM_E500_TLB1_SIZE - 1;
- }
-
- return esel;
+ return (vcpu->arch.shared->mas0 >> 16) & 0xfff;
}
static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct tlbe *tlbe)
+ const struct kvm_book3e_206_tlb_entry *tlbe)
{
gpa_t gpa;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 141dce3c6810..968f40101883 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -13,6 +13,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
@@ -69,54 +70,55 @@
#define OP_STH 44
#define OP_STHU 45
-#ifdef CONFIG_PPC_BOOK3S
-static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
-{
- return 1;
-}
-#else
-static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.tcr & TCR_DIE;
-}
-#endif
-
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
unsigned long dec_nsec;
+ unsigned long long dec_time;
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+
#ifdef CONFIG_PPC_BOOK3S
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
/* POWER4+ triggers a dec interrupt if the value is < 0 */
if (vcpu->arch.dec & 0x80000000) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
kvmppc_core_queue_dec(vcpu);
return;
}
#endif
- if (kvmppc_dec_enabled(vcpu)) {
- /* The decrementer ticks at the same rate as the timebase, so
- * that's how we convert the guest DEC value to the number of
- * host ticks. */
-
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- dec_nsec = vcpu->arch.dec;
- dec_nsec *= 1000;
- dec_nsec /= tb_ticks_per_usec;
- hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
- HRTIMER_MODE_REL);
- vcpu->arch.dec_jiffies = get_tb();
- } else {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- }
+
+#ifdef CONFIG_BOOKE
+ /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
+ if (vcpu->arch.dec == 0)
+ return;
+#endif
+
+ /*
+ * The decrementer ticks at the same rate as the timebase, so
+ * that's how we convert the guest DEC value to the number of
+ * host ticks.
+ */
+
+ dec_time = vcpu->arch.dec;
+ dec_time *= 1000;
+ do_div(dec_time, tb_ticks_per_usec);
+ dec_nsec = do_div(dec_time, NSEC_PER_SEC);
+ hrtimer_start(&vcpu->arch.dec_timer,
+ ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
+ vcpu->arch.dec_jiffies = get_tb();
}
u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
{
u64 jd = tb - vcpu->arch.dec_jiffies;
+
+#ifdef CONFIG_BOOKE
+ if (vcpu->arch.dec < jd)
+ return 0;
+#endif
+
return vcpu->arch.dec - jd;
}
@@ -159,7 +161,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_TRAP_64:
kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
#else
- kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
+ kvmppc_core_queue_program(vcpu,
+ vcpu->arch.shared->esr | ESR_PTR);
#endif
advance = 0;
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 607fbdf24b84..00d7e345b3fe 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,7 +39,8 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions);
+ !!(v->arch.pending_exceptions) ||
+ v->requests;
}
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -66,7 +67,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
vcpu->arch.magic_page_pa = param1;
vcpu->arch.magic_page_ea = param2;
- r2 = KVM_MAGIC_FEAT_SR;
+ r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
r = HC_EV_SUCCESS;
break;
@@ -171,8 +172,11 @@ void kvm_arch_check_processor_compat(void *rtn)
*(int *)rtn = kvmppc_core_check_processor_compat();
}
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ if (type)
+ return -EINVAL;
+
return kvmppc_core_init_vm(kvm);
}
@@ -208,17 +212,22 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_BOOKE_SREGS:
#else
case KVM_CAP_PPC_SEGSTATE:
+ case KVM_CAP_PPC_HIOR:
case KVM_CAP_PPC_PAPR:
#endif
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_ONE_REG:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
+#ifdef CONFIG_KVM_E500
+ case KVM_CAP_SW_TLB:
+#endif
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -238,7 +247,26 @@ int kvm_dev_ioctl_check_extension(long ext)
if (cpu_has_feature(CPU_FTR_ARCH_201))
r = 2;
break;
+ case KVM_CAP_SYNC_MMU:
+ r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+ break;
#endif
+ case KVM_CAP_NR_VCPUS:
+ /*
+ * Recommending a number of CPUs is somewhat arbitrary; we
+ * return the number of present CPUs for -HV (since a host
+ * will have secondary threads "offline"), and for other KVM
+ * implementations just count online CPUs.
+ */
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ r = num_present_cpus();
+#else
+ r = num_online_cpus();
+#endif
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
default:
r = 0;
break;
@@ -253,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
+void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+{
+ return 0;
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
@@ -279,9 +317,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id);
- vcpu->arch.wqp = &vcpu->wq;
- if (!IS_ERR(vcpu))
+ if (!IS_ERR(vcpu)) {
+ vcpu->arch.wqp = &vcpu->wq;
kvmppc_create_vcpu_debugfs(vcpu, id);
+ }
return vcpu;
}
@@ -305,18 +344,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-static void kvmppc_decrementer_func(unsigned long data)
-{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
- kvmppc_core_queue_dec(vcpu);
-
- if (waitqueue_active(vcpu->arch.wqp)) {
- wake_up_interruptible(vcpu->arch.wqp);
- vcpu->stat.halt_wakeup++;
- }
-}
-
/*
* low level hrtimer wake routine. Because this runs in hardirq context
* we schedule a tasklet to do the real work.
@@ -431,20 +458,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
- switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
- case KVM_REG_GPR:
+ switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
+ case KVM_MMIO_REG_GPR:
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
break;
- case KVM_REG_FPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_FPR:
+ vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#ifdef CONFIG_PPC_BOOK3S
- case KVM_REG_QPR:
- vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_QPR:
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
- case KVM_REG_FQPR:
- vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
- vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
+ case KVM_MMIO_REG_FQPR:
+ vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
+ vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#endif
default:
@@ -553,8 +580,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.hcall_needed = 0;
}
- kvmppc_core_deliver_interrupts(vcpu);
-
r = kvmppc_vcpu_run(run, vcpu);
if (vcpu->sigset_active)
@@ -563,6 +588,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
return r;
}
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+
+ me = get_cpu();
+ if (waitqueue_active(vcpu->arch.wqp)) {
+ wake_up_interruptible(vcpu->arch.wqp);
+ vcpu->stat.halt_wakeup++;
+ } else if (cpu != me && cpu != -1) {
+ smp_send_reschedule(vcpu->cpu);
+ }
+ put_cpu();
+}
+
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
@@ -571,13 +611,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
}
kvmppc_core_queue_external(vcpu, irq);
-
- if (waitqueue_active(vcpu->arch.wqp)) {
- wake_up_interruptible(vcpu->arch.wqp);
- vcpu->stat.halt_wakeup++;
- } else if (vcpu->cpu != -1) {
- smp_send_reschedule(vcpu->cpu);
- }
+ kvm_vcpu_kick(vcpu);
return 0;
}
@@ -599,6 +633,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
+#ifdef CONFIG_KVM_E500
+ case KVM_CAP_SW_TLB: {
+ struct kvm_config_tlb cfg;
+ void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0];
+
+ r = -EFAULT;
+ if (copy_from_user(&cfg, user_ptr, sizeof(cfg)))
+ break;
+
+ r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
+ break;
+ }
+#endif
default:
r = -EINVAL;
break;
@@ -648,6 +695,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG:
+ {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ goto out;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
+
+#ifdef CONFIG_KVM_E500
+ case KVM_DIRTY_TLB: {
+ struct kvm_dirty_tlb dirty;
+ r = -EFAULT;
+ if (copy_from_user(&dirty, argp, sizeof(dirty)))
+ goto out;
+ r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+ break;
+ }
+#endif
+
default:
r = -EINVAL;
}
@@ -656,6 +729,11 @@ out:
return r;
}
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
{
u32 inst_lis = 0x3c000000;
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index b135d3d397db..877186b7b1c3 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -118,11 +118,14 @@ TRACE_EVENT(kvm_book3s_exit,
),
TP_fast_assign(
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
__entry->exit_nr = exit_nr;
__entry->pc = kvmppc_get_pc(vcpu);
__entry->dar = kvmppc_get_fault_dar(vcpu);
__entry->msr = vcpu->arch.shared->msr;
- __entry->srr1 = to_svcpu(vcpu)->shadow_srr1;
+ svcpu = svcpu_get(vcpu);
+ __entry->srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
),
TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
@@ -337,6 +340,63 @@ TRACE_EVENT(kvm_book3s_slbmte,
#endif /* CONFIG_PPC_BOOK3S */
+
+/*************************************************************************
+ * Book3E trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_BOOKE
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas8 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas8 = mas8;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas8, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+ TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+ TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+ TP_STRUCT__entry(
+ __field( __u32, mas0 )
+ __field( __u32, mas1 )
+ __field( __u64, mas2 )
+ __field( __u64, mas7_3 )
+ ),
+
+ TP_fast_assign(
+ __entry->mas0 = mas0;
+ __entry->mas1 = mas1;
+ __entry->mas2 = mas2;
+ __entry->mas7_3 = mas7_3;
+ ),
+
+ TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+ __entry->mas0, __entry->mas1,
+ __entry->mas2, __entry->mas7_3)
+);
+
+#endif
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 13b676c20d12..da22c84a8fed 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -3,8 +3,8 @@
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/string.h>
+#include <asm/setup.h>
-#include <asm/system.h>
void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
{
diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c b/arch/powerpc/lib/copyuser_power7_vmx.c
index 6e1efadac48b..bf2654f2b68e 100644
--- a/arch/powerpc/lib/copyuser_power7_vmx.c
+++ b/arch/powerpc/lib/copyuser_power7_vmx.c
@@ -20,6 +20,7 @@
*/
#include <linux/uaccess.h>
#include <linux/hardirq.h>
+#include <asm/switch_to.h>
int enter_vmx_copy(void)
{
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 388b95e1a009..2c9441ee6bb8 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -27,7 +27,6 @@
#include <linux/memblock.h>
#include <asm/mmu.h>
-#include <asm/system.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 19f2f9498b27..08ffcf52a856 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -38,10 +38,10 @@
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/siginfo.h>
+#include <asm/debug.h>
#include <mm/mmu_decl.h>
#include "icswx.h"
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3e8c37a4e395..377e5cbedbbb 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -40,7 +40,6 @@
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/types.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/machdep.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 57c7465e656e..fb05b123218f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -12,6 +12,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
+#include <linux/export.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
@@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
*shift = hugepd_shift(*hpdp);
return hugepte_offset(hpdp, ea, pdshift);
}
+EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
@@ -310,7 +312,8 @@ void __init reserve_hugetlb_gpages(void)
int i;
strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup);
+ parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
+ &do_gpage_early_setup);
/*
* Walk gpage list in reverse, allocating larger page sizes first.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6157be2a7049..01e2db97a210 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -45,7 +45,6 @@
#include <asm/btext.h>
#include <asm/tlb.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include <asm/hugetlb.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index e94b57fb79a0..620b7acd2fdf 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -61,7 +61,6 @@
#include <asm/mmzone.h>
#include <asm/cputable.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include <asm/iommu.h>
#include <asm/abs_addr.h>
#include <asm/vdso.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3feefc3842a8..b6edbb3b4a54 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -24,11 +24,11 @@
#include <linux/node.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/smp.h>
#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/hvcall.h>
+#include <asm/setup.h>
static int numa_enabled = 1;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 0907f92ce309..6c856fb8c15b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -33,6 +33,7 @@
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
#include <asm/io.h>
+#include <asm/setup.h>
#include "mmu_decl.h"
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index ad36ede469cc..249a0631c4db 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -51,7 +51,6 @@
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include <asm/abs_addr.h>
#include <asm/firmware.h>
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index 6f01624f317f..4f51025f5b00 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -18,7 +18,6 @@
#include <linux/smp.h>
#include <linux/errno.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/pmc.h>
#include <asm/cputable.h>
#include <asm/oprofile_impl.h>
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
index f8d36f940e88..ff617246d128 100644
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -19,7 +19,6 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/page.h>
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index cb515cff745c..b9589c19ccda 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -34,7 +34,6 @@
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/rtas.h>
-#include <asm/system.h>
#include <asm/cell-regs.h>
#include "../platforms/cell/interrupt.h"
diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c
index d4e6507277b5..ccc1daa33aed 100644
--- a/arch/powerpc/oprofile/op_model_fsl_emb.c
+++ b/arch/powerpc/oprofile/op_model_fsl_emb.c
@@ -17,7 +17,6 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/reg_fsl_emb.h>
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index e6bec74be131..95ae77dec3f6 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -14,7 +14,6 @@
#include <linux/smp.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/rtas.h>
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
index a20afe45d936..9b801b8c8c5a 100644
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -11,7 +11,6 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/oprofile_impl.h>
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c2e27ede07ec..02aee03e713c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -116,14 +116,45 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
*addrp = mfspr(SPRN_SDAR);
}
+static inline u32 perf_flags_from_msr(struct pt_regs *regs)
+{
+ if (regs->msr & MSR_PR)
+ return PERF_RECORD_MISC_USER;
+ if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
+ return PERF_RECORD_MISC_HYPERVISOR;
+ return PERF_RECORD_MISC_KERNEL;
+}
+
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
unsigned long mmcra = regs->dsisr;
unsigned long sihv = MMCRA_SIHV;
unsigned long sipr = MMCRA_SIPR;
+ /* Not a PMU interrupt: Make up flags from regs->msr */
if (TRAP(regs) != 0xf00)
- return 0; /* not a PMU interrupt */
+ return perf_flags_from_msr(regs);
+
+ /*
+ * If we don't support continuous sampling and this
+ * is not a marked event, same deal
+ */
+ if ((ppmu->flags & PPMU_NO_CONT_SAMPLING) &&
+ !(mmcra & MMCRA_SAMPLE_ENABLE))
+ return perf_flags_from_msr(regs);
+
+ /*
+ * If we don't have flags in MMCRA, rather than using
+ * the MSR, we intuit the flags from the address in
+ * SIAR which should give slightly more reliable
+ * results
+ */
+ if (ppmu->flags & PPMU_NO_SIPR) {
+ unsigned long siar = mfspr(SPRN_SIAR);
+ if (siar >= PAGE_OFFSET)
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ }
if (ppmu->flags & PPMU_ALT_SIPR) {
sihv = POWER6_MMCRA_SIHV;
@@ -1299,13 +1330,18 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
*/
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- unsigned long ip;
+ unsigned long mmcra = regs->dsisr;
+ /* Not a PMU interrupt */
if (TRAP(regs) != 0xf00)
- return regs->nip; /* not a PMU interrupt */
+ return regs->nip;
+
+ /* Processor doesn't support sampling non marked events */
+ if ((ppmu->flags & PPMU_NO_CONT_SAMPLING) &&
+ !(mmcra & MMCRA_SAMPLE_ENABLE))
+ return regs->nip;
- ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
- return ip;
+ return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
}
static bool pmc_overflow(unsigned long val)
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
index b4f1dda4d089..9103a1de864d 100644
--- a/arch/powerpc/perf/power4-pmu.c
+++ b/arch/powerpc/perf/power4-pmu.c
@@ -607,6 +607,7 @@ static struct power_pmu power4_pmu = {
.n_generic = ARRAY_SIZE(p4_generic_events),
.generic_events = p4_generic_events,
.cache_events = &power4_cache_events,
+ .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
static int __init init_power4_pmu(void)
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 111eb25bb0b6..20139ceeacf6 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -487,6 +487,7 @@ static struct power_pmu ppc970_pmu = {
.n_generic = ARRAY_SIZE(ppc970_generic_events),
.generic_events = ppc970_generic_events,
.cache_events = &ppc970_cache_events,
+ .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
static int __init init_ppc970_pmu(void)
diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c
index eda0fc2a3914..870b70f5d1bd 100644
--- a/arch/powerpc/platforms/52xx/lite5200_pm.c
+++ b/arch/powerpc/platforms/52xx/lite5200_pm.c
@@ -3,6 +3,7 @@
#include <asm/io.h>
#include <asm/time.h>
#include <asm/mpc52xx.h>
+#include <asm/switch_to.h>
/* defined in lite5200_sleep.S and only used here */
extern void lite5200_low_power(void __iomem *sram, void __iomem *mbar);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index bfb11e01133e..e2d401ad8fbb 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -93,7 +93,7 @@ struct mpc52xx_pci {
};
/* MPC5200 device tree match tables */
-const struct of_device_id mpc52xx_pci_ids[] __initdata = {
+const struct of_device_id mpc52xx_pci_ids[] __initconst = {
{ .type = "pci", .compatible = "fsl,mpc5200-pci", },
{ .type = "pci", .compatible = "mpc5200-pci", },
{}
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
index d111b024eafd..fb94d10e5a4d 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -17,7 +17,6 @@
#include <asm/cpm2.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
-#include <asm/system.h>
#include <platforms/82xx/pq2.h>
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index 65eb792a0d00..a266ba876863 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -27,7 +27,6 @@
#include <linux/of_platform.h>
#include <linux/of_device.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index e36bc611dd6e..d440435e055c 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -26,7 +26,6 @@
#include <linux/of_platform.h>
#include <linux/of_device.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
index 39849dd1b5bb..a494fa57bdf9 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_itx.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -25,7 +25,6 @@
#include <linux/root_dev.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c
index 5828d8e97c37..553e793a4a93 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c
@@ -25,7 +25,6 @@
#include <linux/root_dev.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index ad8e4bcd7d55..1b1f6c8a1a12 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -33,7 +33,6 @@
#include <linux/of_platform.h>
#include <linux/of_device.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c
index 8a81d7640b1f..26cb3e934722 100644
--- a/arch/powerpc/platforms/83xx/sbc834x.c
+++ b/arch/powerpc/platforms/83xx/sbc834x.c
@@ -27,7 +27,6 @@
#include <linux/root_dev.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index edf66870d978..1a046715e461 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -27,6 +27,7 @@
#include <asm/io.h>
#include <asm/time.h>
#include <asm/mpc6xx.h>
+#include <asm/switch_to.h>
#include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
index df69e99e511c..dd3617c531d7 100644
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -18,7 +18,6 @@
#include <linux/interrupt.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index d50056f424f6..18014629416d 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -24,7 +24,6 @@
#include <linux/of_platform.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index 60120e55da41..3dc1bda3ddc3 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -20,7 +20,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index f58872688d8f..585bd22b1406 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -19,7 +19,6 @@
#include <linux/of_platform.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index d19f675cb369..29ee8fcd75a2 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -19,7 +19,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index ab5f0bf19454..11156fb53d83 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -27,7 +27,6 @@
#include <linux/fsl_devices.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <linux/atomic.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 6e23e3e34bd9..1fd91e9e0ffb 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -22,7 +22,6 @@
#include <linux/of_platform.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index f33662b46b8d..9a6f04406e0d 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -35,7 +35,6 @@
#include <linux/phy.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/time.h>
#include <asm/io.h>
@@ -271,7 +270,7 @@ static void __init mpc85xx_mds_qe_init(void)
if (machine_is(p1021_mds)) {
- struct ccsr_guts_85xx __iomem *guts;
+ struct ccsr_guts __iomem *guts;
np = of_find_node_by_name(NULL, "global-utilities");
if (np) {
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index db214cd4c822..313fce4f5574 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -18,7 +18,6 @@
#include <linux/interrupt.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
@@ -128,7 +127,7 @@ static void __init mpc85xx_rdb_setup_arch(void)
#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
if (machine_is(p1025_rdb)) {
- struct ccsr_guts_85xx __iomem *guts;
+ struct ccsr_guts __iomem *guts;
np = of_find_node_by_name(NULL, "global-utilities");
if (np) {
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index d8bd6563d9ca..dbaf44354f0d 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 0fe88e39945e..e74b7cde9aee 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -150,7 +150,7 @@ static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
{
struct device_node *guts_node;
struct device_node *indirect_node = NULL;
- struct ccsr_guts_85xx __iomem *guts;
+ struct ccsr_guts __iomem *guts;
u8 __iomem *lbc_lcs0_ba = NULL;
u8 __iomem *lbc_lcs1_ba = NULL;
u8 b;
@@ -269,7 +269,7 @@ exit:
void p1022ds_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
- struct ccsr_guts_85xx __iomem *guts;
+ struct ccsr_guts __iomem *guts;
unsigned long freq;
u64 temp;
u32 pxclk;
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c
index 6b07398e4369..2990e8b13dc9 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rds.c
@@ -22,7 +22,6 @@
#include <linux/of_platform.h>
#include <linux/of_device.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c
index eda6ed5683e1..6541fa2630c0 100644
--- a/arch/powerpc/platforms/85xx/p2041_rdb.c
+++ b/arch/powerpc/platforms/85xx/p2041_rdb.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/phy.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c
index 96d99a374dcf..f238efa75891 100644
--- a/arch/powerpc/platforms/85xx/p3041_ds.c
+++ b/arch/powerpc/platforms/85xx/p3041_ds.c
@@ -18,7 +18,6 @@
#include <linux/interrupt.h>
#include <linux/phy.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c
index d1b21d7663e3..c92417dc6574 100644
--- a/arch/powerpc/platforms/85xx/p4080_ds.c
+++ b/arch/powerpc/platforms/85xx/p4080_ds.c
@@ -17,7 +17,6 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index e8cba5004fd8..17bef15a85ed 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -18,7 +18,6 @@
#include <linux/interrupt.h>
#include <linux/phy.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index 1677b8a22677..cd3a66bdb54b 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -30,7 +30,6 @@
#include <linux/fsl_devices.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <linux/atomic.h>
diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c
index 3c3bbcc27566..b1be632ede43 100644
--- a/arch/powerpc/platforms/85xx/sbc8560.c
+++ b/arch/powerpc/platforms/85xx/sbc8560.c
@@ -21,7 +21,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index b71919217756..b9c6daa07b66 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -29,7 +29,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 27ca3a7b04ab..e0508002b086 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -28,7 +28,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index d7504cefe016..4d786c25d3e5 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -26,7 +26,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 503c21596c63..41c687550ea7 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -21,7 +21,6 @@
#include <linux/interrupt.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index ed58b6cfd60c..1fca663f1b25 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 710db69bd523..14e0e576bcbd 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index 4a13d2f4ac20..1638f43599f0 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 13fa9a6403e6..62cd3c555bfb 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -25,7 +25,6 @@
#include <linux/seq_file.h>
#include <linux/of.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
@@ -226,7 +225,7 @@ void mpc8610hpcd_set_monitor_port(enum fsl_diu_monitor_port port)
void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
- struct ccsr_guts_86xx __iomem *guts;
+ struct ccsr_guts __iomem *guts;
unsigned long freq;
u64 temp;
u32 pxclk;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index 569262ca499a..3755e61d7ecf 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -21,7 +21,6 @@
#include <linux/of_platform.h>
#include <linux/memblock.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 22cc3571ae19..9982f57c98b9 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -12,7 +12,6 @@
#include <linux/interrupt.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/mpic.h>
#include <asm/i8259.h>
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
index 51c8f331b671..e7007d0d949e 100644
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ b/arch/powerpc/platforms/86xx/sbc8641d.c
@@ -21,7 +21,6 @@
#include <linux/seq_file.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
index caaec29796b7..866feff83c91 100644
--- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -19,7 +19,6 @@
#include <asm/io.h>
#include <asm/machdep.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 45ed6cdc1310..5d98398c2f5e 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -32,7 +32,6 @@
#include <asm/machdep.h>
#include <asm/page.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/mpc8xx.h>
#include <asm/8xx_immap.h>
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 528e00ddef31..8d21ab70e06c 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -35,7 +35,6 @@
#include <asm/machdep.h>
#include <asm/page.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/mpc8xx.h>
#include <asm/8xx_immap.h>
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 2516c1cf8467..943c9d39aa16 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -95,7 +95,6 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
unsigned long lpar_rc;
u64 hpte_v, hpte_r, slot;
- /* same as iseries */
if (vflags & HPTE_V_SECONDARY)
return -1;
@@ -319,7 +318,6 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
unsigned long lpar_rc;
u64 hpte_v, hpte_r, slot;
- /* same as iseries */
if (vflags & HPTE_V_SECONDARY)
return -1;
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index 7f9b6742f8b6..6e3409d590ac 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -61,7 +61,7 @@ static void qpace_progress(char *s, unsigned short hex)
printk("*** %04x : %s\n", hex, s ? s : "");
}
-static const struct of_device_id qpace_bus_ids[] __initdata = {
+static const struct of_device_id qpace_bus_ids[] __initconst = {
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "spider", },
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fa3e294fd343..4ab087671185 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -140,7 +140,7 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
return 0;
}
-static const struct of_device_id cell_bus_ids[] __initdata = {
+static const struct of_device_id cell_bus_ids[] __initconst = {
{ .type = "soc", },
{ .compatible = "soc", },
{ .type = "spider", },
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 4a255cf8cd17..49a65e2dfc71 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -38,7 +38,6 @@
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
-#include <asm/system.h>
#include <asm/rtas.h>
#include <asm/cputhreads.h>
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 03c5fce2a5b3..c2c5b078ba80 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -122,7 +122,7 @@ static struct spu_context *coredump_next_context(int *fd)
struct spu_context *ctx = NULL;
for (; *fd < fdt->max_fds; (*fd)++) {
- if (!FD_ISSET(*fd, fdt->open_fds))
+ if (!fd_is_open(*fd, fdt))
continue;
file = fcheck(*fd);
diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c
index 8cab5731850f..ebd3963fdf91 100644
--- a/arch/powerpc/platforms/embedded6xx/c2k.c
+++ b/arch/powerpc/platforms/embedded6xx/c2k.c
@@ -23,7 +23,6 @@
#include <asm/machdep.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <mm/mmu_decl.h>
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index ab51b21b4bd7..8c305c7c8977 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -28,7 +28,6 @@
#include <linux/of_platform.h>
#include <linux/module.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 74ccce36baed..beeaf4a173e1 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -32,7 +32,6 @@
#include <linux/tty.h>
#include <linux/serial_core.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/platforms/embedded6xx/prpmc2800.c b/arch/powerpc/platforms/embedded6xx/prpmc2800.c
index 670035f49a69..d455f08bea53 100644
--- a/arch/powerpc/platforms/embedded6xx/prpmc2800.c
+++ b/arch/powerpc/platforms/embedded6xx/prpmc2800.c
@@ -17,7 +17,6 @@
#include <asm/machdep.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <mm/mmu_decl.h>
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index e0ed3c71d69b..c458b60d14c4 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -16,7 +16,6 @@
#include <linux/initrd.h>
#include <linux/of_platform.h>
-#include <asm/system.h>
#include <asm/time.h>
#include <asm/prom.h>
#include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index 8b0c2082a783..64fde058e545 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -15,7 +15,6 @@
#include <linux/interrupt.h>
#include <linux/mc146818rtc.h>
-#include <asm/system.h>
#include <asm/pci-bridge.h>
#define ULI_PIRQA 0x08
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 3b7545a51aa9..cb1b0b35a0c6 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -47,7 +47,6 @@
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index eac569dee27c..b4a369dac3a8 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -27,7 +27,6 @@
#include <asm/sections.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
index e777ad471a48..2ed9212d7d59 100644
--- a/arch/powerpc/platforms/pasemi/setup.c
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -32,13 +32,13 @@
#include <linux/gfp.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/iommu.h>
#include <asm/machdep.h>
#include <asm/mpic.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/mmu.h>
+#include <asm/debug.h>
#include <pcmcia/ss.h>
#include <pcmcia/cistpl.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 84d7fd9bcc69..3e91ef538114 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -19,6 +19,7 @@
#include <asm/bootx.h>
#include <asm/btext.h>
#include <asm/io.h>
+#include <asm/setup.h>
#undef DEBUG
#define SET_BOOT_BAT
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 1fc386a23f18..64171198535c 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -33,9 +33,9 @@
#include <asm/sections.h>
#include <asm/cputable.h>
#include <asm/time.h>
-#include <asm/system.h>
#include <asm/mpic.h>
#include <asm/keylargo.h>
+#include <asm/switch_to.h>
/* WARNING !!! This will cause calibrate_delay() to be called,
* but this is an __init function ! So you MUST go edit
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index da18b26dcc6f..014d06e6d46b 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -23,7 +23,6 @@
#include <linux/spinlock.h>
#include <asm/sections.h>
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/nvram.h>
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 970ea1de4298..141f8899a633 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -57,7 +57,6 @@
#include <asm/reg.h>
#include <asm/sections.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 11c9fce43b5b..8680bb69795d 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -26,7 +26,6 @@
#include <asm/sections.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 17210c526c52..3ef46254c35b 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -25,7 +25,6 @@
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
-#include <asm/system.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 8bd6ba542691..de2aea421707 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -29,6 +29,7 @@
#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/lv1call.h>
+#include <asm/setup.h>
#include "platform.h"
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 0e8656370063..a7648543c59e 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -25,10 +25,10 @@
#include <linux/debugfs.h>
#include <linux/spinlock.h>
#include <asm/smp.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/firmware.h>
#include <asm/lppaca.h>
+#include <asm/debug.h>
#include "plpar_wrappers.h"
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 8011088392d3..309d38ef7322 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -984,7 +984,8 @@ int __exit eeh_ops_unregister(const char *name)
*/
void __init eeh_init(void)
{
- struct device_node *phb, *np;
+ struct pci_controller *hose, *tmp;
+ struct device_node *phb;
int ret;
/* call platform initialization function */
@@ -1000,19 +1001,9 @@ void __init eeh_init(void)
raw_spin_lock_init(&confirm_error_lock);
- np = of_find_node_by_path("/rtas");
- if (np == NULL)
- return;
-
- /* Enable EEH for all adapters. Note that eeh requires buid's */
- for (phb = of_find_node_by_name(NULL, "pci"); phb;
- phb = of_find_node_by_name(phb, "pci")) {
- unsigned long buid;
-
- buid = get_phb_buid(phb);
- if (buid == 0 || !of_node_to_eeh_dev(phb))
- continue;
-
+ /* Enable EEH for all adapters */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->dn;
traverse_pci_devices(phb, eeh_early_enable, NULL);
}
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index f3aed7dcae95..c4507d095900 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -62,7 +62,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
}
/* Associate EEH device with OF node */
- dn->edev = edev;
+ PCI_DN(dn)->edev = edev;
edev->dn = dn;
edev->phb = phb;
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 4a4752565856..4cb375c0f8d1 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -59,8 +59,7 @@ static int eeh_event_handler(void * dummy)
struct eeh_event *event;
struct eeh_dev *edev;
- daemonize("eehd");
- set_current_state(TASK_INTERRUPTIBLE);
+ set_task_comm(current, "eehd");
spin_lock_irqsave(&eeh_eventlist_lock, flags);
event = NULL;
@@ -83,6 +82,7 @@ static int eeh_event_handler(void * dummy)
printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
eeh_pci_name(edev->pdev));
+ set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */
edev = handle_eeh_events(event);
eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index c986d08d0807..64c97d8ac0c5 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -23,7 +23,6 @@
#include <linux/delay.h>
#include <linux/sched.h> /* for idle_task_exit */
#include <linux/cpu.h>
-#include <asm/system.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
index 1a709bc48ce1..ef9d9d84c7d5 100644
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -63,73 +63,9 @@ EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
static int ioei_check_exception_token;
-/* pSeries event log format */
-
-/* Two bytes ASCII section IDs */
-#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H')
-#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H')
-#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S')
-#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H')
-#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T')
-#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S')
-#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H')
-#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W')
-#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P')
-#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R')
-#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M')
-#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P')
-#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E')
-#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I')
-#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
-#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
-
-/* Vendor specific Platform Event Log Format, Version 6, section header */
-struct pseries_elog_section {
- uint16_t id; /* 0x00 2-byte ASCII section ID */
- uint16_t length; /* 0x02 Section length in bytes */
- uint8_t version; /* 0x04 Section version */
- uint8_t subtype; /* 0x05 Section subtype */
- uint16_t creator_component; /* 0x06 Creator component ID */
- uint8_t data[]; /* 0x08 Start of section data */
-};
-
static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
/**
- * Find data portion of a specific section in RTAS extended event log.
- * @elog: RTAS error/event log.
- * @sect_id: secsion ID.
- *
- * Return:
- * pointer to the section data of the specified section
- * NULL if not found
- */
-static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
- uint16_t sect_id)
-{
- struct rtas_ext_event_log_v6 *xelog =
- (struct rtas_ext_event_log_v6 *) elog->buffer;
- struct pseries_elog_section *sect;
- unsigned char *p, *log_end;
-
- /* Check that we understand the format */
- if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
- xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
- xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
- return NULL;
-
- log_end = elog->buffer + elog->extended_log_length;
- p = xelog->vendor_log;
- while (p < log_end) {
- sect = (struct pseries_elog_section *)p;
- if (sect->id == sect_id)
- return sect;
- p += sect->length;
- }
- return NULL;
-}
-
-/**
* Find the data portion of an IO Event section from event log.
* @elog: RTAS error/event log.
*
@@ -138,7 +74,7 @@ static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *el
*/
static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
{
- struct pseries_elog_section *sect;
+ struct pseries_errorlog *sect;
/* We should only ever get called for io-event interrupts, but if
* we do get called for another type then something went wrong so
@@ -152,7 +88,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
return NULL;
}
- sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+ sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
if (unlikely(!sect)) {
printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
"log does not contain an IO Event section. "
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index c442f2b1980f..0915b1ad66ce 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -809,8 +809,7 @@ machine_arch_initcall(pseries, find_existing_ddw_windows);
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_query_response *query)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
+ struct eeh_dev *edev;
u32 cfg_addr;
u64 buid;
int ret;
@@ -821,12 +820,12 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
- cfg_addr = pcidn->eeh_config_addr;
- if (pcidn->eeh_pe_config_addr)
- cfg_addr = pcidn->eeh_pe_config_addr;
- buid = pcidn->phb->buid;
+ edev = pci_dev_to_eeh_dev(dev);
+ cfg_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ cfg_addr = edev->pe_config_addr;
+ buid = edev->phb->buid;
+
ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
@@ -839,8 +838,7 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_create_response *create, int page_shift,
int window_shift)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
+ struct eeh_dev *edev;
u32 cfg_addr;
u64 buid;
int ret;
@@ -851,12 +849,11 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
* Retrieve them from the pci device, not the node with the
* dma-window property
*/
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
- cfg_addr = pcidn->eeh_config_addr;
- if (pcidn->eeh_pe_config_addr)
- cfg_addr = pcidn->eeh_pe_config_addr;
- buid = pcidn->phb->buid;
+ edev = pci_dev_to_eeh_dev(dev);
+ cfg_addr = edev->config_addr;
+ if (edev->pe_config_addr)
+ cfg_addr = edev->pe_config_addr;
+ buid = edev->phb->buid;
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index a12e95af6933..41a34bc4a9a2 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -14,9 +14,9 @@
#include <asm/paca.h>
#include <asm/reg.h>
-#include <asm/system.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
+#include <asm/runlatch.h>
#include "plpar_wrappers.h"
#include "pseries.h"
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 086d2ae4e06a..c4dfccd3a3d9 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -16,37 +16,15 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-/* Change Activity:
- * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
- * End Change Activity
- */
-
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/kernel_stat.h>
-#include <linux/signal.h>
#include <linux/sched.h>
-#include <linux/ioport.h>
#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/init.h>
-#include <linux/delay.h>
#include <linux/irq.h>
-#include <linux/random.h>
-#include <linux/sysrq.h>
-#include <linux/bitops.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/irq.h>
-#include <asm/cache.h>
-#include <asm/prom.h>
-#include <asm/ptrace.h>
+#include <linux/of.h>
+#include <linux/fs.h>
+#include <linux/reboot.h>
+
#include <asm/machdep.h>
#include <asm/rtas.h>
-#include <asm/udbg.h>
#include <asm/firmware.h>
#include "pseries.h"
@@ -57,7 +35,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_PER_CPU(__u64, mce_data_buf);
-static int ras_get_sensor_state_token;
static int ras_check_exception_token;
#define EPOW_SENSOR_TOKEN 9
@@ -75,7 +52,6 @@ static int __init init_ras_IRQ(void)
{
struct device_node *np;
- ras_get_sensor_state_token = rtas_token("get-sensor-state");
ras_check_exception_token = rtas_token("check-exception");
/* Internal Errors */
@@ -95,26 +71,126 @@ static int __init init_ras_IRQ(void)
return 0;
}
-__initcall(init_ras_IRQ);
+subsys_initcall(init_ras_IRQ);
-/*
- * Handle power subsystem events (EPOW).
- *
- * Presently we just log the event has occurred. This should be fixed
- * to examine the type of power failure and take appropriate action where
- * the time horizon permits something useful to be done.
- */
+#define EPOW_SHUTDOWN_NORMAL 1
+#define EPOW_SHUTDOWN_ON_UPS 2
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
+
+static void handle_system_shutdown(char event_modifier)
+{
+ switch (event_modifier) {
+ case EPOW_SHUTDOWN_NORMAL:
+ pr_emerg("Firmware initiated power off");
+ orderly_poweroff(1);
+ break;
+
+ case EPOW_SHUTDOWN_ON_UPS:
+ pr_emerg("Loss of power reported by firmware, system is "
+ "running on UPS/battery");
+ break;
+
+ case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
+ pr_emerg("Loss of system critical functions reported by "
+ "firmware");
+ pr_emerg("Check RTAS error log for details");
+ orderly_poweroff(1);
+ break;
+
+ case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
+ pr_emerg("Ambient temperature too high reported by firmware");
+ pr_emerg("Check RTAS error log for details");
+ orderly_poweroff(1);
+ break;
+
+ default:
+ pr_err("Unknown power/cooling shutdown event (modifier %d)",
+ event_modifier);
+ }
+}
+
+struct epow_errorlog {
+ unsigned char sensor_value;
+ unsigned char event_modifier;
+ unsigned char extended_modifier;
+ unsigned char reserved;
+ unsigned char platform_reason;
+};
+
+#define EPOW_RESET 0
+#define EPOW_WARN_COOLING 1
+#define EPOW_WARN_POWER 2
+#define EPOW_SYSTEM_SHUTDOWN 3
+#define EPOW_SYSTEM_HALT 4
+#define EPOW_MAIN_ENCLOSURE 5
+#define EPOW_POWER_OFF 7
+
+void rtas_parse_epow_errlog(struct rtas_error_log *log)
+{
+ struct pseries_errorlog *pseries_log;
+ struct epow_errorlog *epow_log;
+ char action_code;
+ char modifier;
+
+ pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
+ if (pseries_log == NULL)
+ return;
+
+ epow_log = (struct epow_errorlog *)pseries_log->data;
+ action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
+ modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */
+
+ switch (action_code) {
+ case EPOW_RESET:
+ pr_err("Non critical power or cooling issue cleared");
+ break;
+
+ case EPOW_WARN_COOLING:
+ pr_err("Non critical cooling issue reported by firmware");
+ pr_err("Check RTAS error log for details");
+ break;
+
+ case EPOW_WARN_POWER:
+ pr_err("Non critical power issue reported by firmware");
+ pr_err("Check RTAS error log for details");
+ break;
+
+ case EPOW_SYSTEM_SHUTDOWN:
+ handle_system_shutdown(epow_log->event_modifier);
+ break;
+
+ case EPOW_SYSTEM_HALT:
+ pr_emerg("Firmware initiated power off");
+ orderly_poweroff(1);
+ break;
+
+ case EPOW_MAIN_ENCLOSURE:
+ case EPOW_POWER_OFF:
+ pr_emerg("Critical power/cooling issue reported by firmware");
+ pr_emerg("Check RTAS error log for details");
+ pr_emerg("Immediate power off");
+ emergency_sync();
+ kernel_power_off();
+ break;
+
+ default:
+ pr_err("Unknown power/cooling event (action code %d)",
+ action_code);
+ }
+}
+
+/* Handle environmental and power warning (EPOW) interrupts. */
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
{
- int status = 0xdeadbeef;
- int state = 0;
+ int status;
+ int state;
int critical;
- status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
- EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
+ status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
if (state > 3)
- critical = 1; /* Time Critical */
+ critical = 1; /* Time Critical */
else
critical = 0;
@@ -123,18 +199,14 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
virq_to_hw(irq),
- RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
+ RTAS_EPOW_WARNING,
critical, __pa(&ras_log_buf),
rtas_get_error_log_max());
- udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status, state);
- printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status, state);
-
- /* format and print the extended information */
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+ rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
+
spin_unlock(&ras_log_buf_lock);
return IRQ_HANDLED;
}
@@ -150,7 +222,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
{
struct rtas_error_log *rtas_elog;
- int status = 0xdeadbeef;
+ int status;
int fatal;
spin_lock(&ras_log_buf_lock);
@@ -158,7 +230,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
virq_to_hw(irq),
- RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
+ RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
__pa(&ras_log_buf),
rtas_get_error_log_max());
@@ -173,24 +245,13 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
if (fatal) {
- udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status);
- printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status);
-
-#ifndef DEBUG_RTAS_POWER_OFF
- /* Don't actually power off when debugging so we can test
- * without actually failing while injecting errors.
- * Error data will not be logged to syslog.
- */
- ppc_md.power_off();
-#endif
+ pr_emerg("Fatal hardware error reported by firmware");
+ pr_emerg("Check RTAS error log for details");
+ pr_emerg("Immediate power off");
+ emergency_sync();
+ kernel_power_off();
} else {
- udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status);
- printk(KERN_WARNING
- "Warning: Recoverable hardware error <0x%lx 0x%x>\n",
- *((unsigned long *)&ras_log_buf), status);
+ pr_err("Recoverable hardware error reported by firmware");
}
spin_unlock(&ras_log_buf_lock);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index eadba9521a35..e16bb8d48550 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -37,7 +37,6 @@
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/firmware.h>
-#include <asm/system.h>
#include <asm/rtas.h>
#include <asm/pSeries_reconfig.h>
#include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c
index ca6fa26f6e63..8ef53bc2e70e 100644
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ b/arch/powerpc/platforms/wsp/chroma.c
@@ -17,7 +17,6 @@
#include <linux/time.h>
#include <asm/machdep.h>
-#include <asm/system.h>
#include <asm/udbg.h>
#include "ics.h"
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
index 0c1ae06d0be1..508ec8282b96 100644
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -17,7 +17,6 @@
#include <linux/time.h>
#include <asm/machdep.h>
-#include <asm/system.h>
#include <asm/udbg.h>
#include "ics.h"
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
index 763014cd1e62..1526551f9fe6 100644
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ b/arch/powerpc/platforms/wsp/wsp_pci.c
@@ -27,6 +27,7 @@
#include <asm/ppc-pci.h>
#include <asm/iommu.h>
#include <asm/io-workarounds.h>
+#include <asm/debug.h>
#include "wsp.h"
#include "wsp_pci.h"
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index bf6c7cc0a6af..4dd534194ae8 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -26,7 +26,6 @@
#include <asm/udbg.h>
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/rheap.h>
#include <asm/cpm.h>
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index e8f385fbf549..c449dbd1c938 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -31,7 +31,6 @@
#include <linux/fs_enet_pd.h>
#include <linux/fs_uart_pd.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 5287e95cec3a..0968b66b4cf9 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/bitmap.h>
#include <asm/msi_bitmap.h>
+#include <asm/setup.h>
int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num)
{
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index ceb09cbd2329..818e763f8265 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Freescale Semicondutor, Inc. All rights reserved.
+ * Copyright (C) 2006-2010 Freescale Semicondutor, Inc. All rights reserved.
*
* Authors: Shlomi Gridish <gridish@freescale.com>
* Li Yang <leoli@freescale.com>
@@ -266,7 +266,19 @@ EXPORT_SYMBOL(qe_clock_source);
static void qe_snums_init(void)
{
int i;
- static const u8 snum_init[] = {
+ static const u8 snum_init_76[] = {
+ 0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
+ 0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
+ 0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9,
+ 0xD8, 0xD9, 0xE8, 0xE9, 0x44, 0x45, 0x4C, 0x4D,
+ 0x54, 0x55, 0x5C, 0x5D, 0x64, 0x65, 0x6C, 0x6D,
+ 0x74, 0x75, 0x7C, 0x7D, 0x84, 0x85, 0x8C, 0x8D,
+ 0x94, 0x95, 0x9C, 0x9D, 0xA4, 0xA5, 0xAC, 0xAD,
+ 0xB4, 0xB5, 0xBC, 0xBD, 0xC4, 0xC5, 0xCC, 0xCD,
+ 0xD4, 0xD5, 0xDC, 0xDD, 0xE4, 0xE5, 0xEC, 0xED,
+ 0xF4, 0xF5, 0xFC, 0xFD,
+ };
+ static const u8 snum_init_46[] = {
0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9,
@@ -274,9 +286,15 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
+ static const u8 *snum_init;
qe_num_of_snum = qe_get_num_of_snums();
+ if (qe_num_of_snum == 76)
+ snum_init = snum_init_76;
+ else
+ snum_init = snum_init_46;
+
for (i = 0; i < qe_num_of_snum; i++) {
snums[i].num = snum_init[i];
snums[i].state = QE_SNUM_STATE_FREE;
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 2370e1c63379..1fd0717ade02 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -22,7 +22,6 @@
#include <linux/of_net.h>
#include <asm/tsi108.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 68a9cbbab450..0f3ab06d2222 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -41,6 +41,7 @@
#include <asm/spu_priv1.h>
#include <asm/setjmp.h>
#include <asm/reg.h>
+#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>