diff options
Diffstat (limited to 'arch/powerpc')
54 files changed, 1101 insertions, 229 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a3128ca0fe11..2f6a22e8e935 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,6 +140,8 @@ config PPC select IRQ_PER_CPU select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL + select HAVE_RCU_TABLE_FREE if SMP + select HAVE_SYSCALL_TRACEPOINTS config EARLY_PRINTK bool diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index a597dd77b903..e72dcf6a421d 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -35,27 +35,6 @@ config DEBUG_STACKOVERFLOW This option will cause messages to be printed if free stack space drops below a certain limit. -config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T and sysrq-P debug output. - - This option will slow down process creation somewhat. - -config DEBUG_PER_CPU_MAPS - bool "Debug access to per_cpu maps" - depends on DEBUG_KERNEL - depends on SMP - default n - ---help--- - Say Y to verify that the per_cpu map being accessed has - been setup. Adds a fair amount of code to kernel memory - and decreases performance. - - Say N if unsure. - config HCALL_STATS bool "Hypervisor call instrumentation" depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 2779f08313a5..22dd6ae84da0 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -530,5 +530,23 @@ 0x0 0x0 0x0 0x3 &UIC3 0x12 0x4 /* swizzled int C */ 0x0 0x0 0x0 0x4 &UIC3 0x13 0x4 /* swizzled int D */>; }; + + MSI: ppc4xx-msi@C10000000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0xC 0x10000000 0x100>; + sdr-base = <0x36C>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts = <0 1 2 3>; + interrupt-parent = <&UIC3>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC3 0x18 1 + 1 &UIC3 0x19 1 + 2 &UIC3 0x1A 1 + 3 &UIC3 0x1B 1>; + }; }; }; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 7c3be5e45748..f913dbe25d35 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -442,6 +442,24 @@ 0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>; }; + MSI: ppc4xx-msi@400300000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x4 0x00300000 0x100>; + sdr-base = <0x3B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts =<0 1 2 3>; + interrupt-parent = <&UIC0>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC0 0xC 1 + 1 &UIC0 0x0D 1 + 2 &UIC0 0x0E 1 + 3 &UIC0 0x0F 1>; + }; + I2O: i2o@400100000 { compatible = "ibm,i2o-440spe"; reg = <0x00000004 0x00100000 0x100>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 89edb16649c3..1613d6e4049e 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -403,5 +403,33 @@ 0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */ 0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>; }; + + MSI: ppc4xx-msi@C10000000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x0 0xEF620000 0x100>; + sdr-base = <0x4B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <12>; + interrupts = <0 1 2 3 4 5 6 7 8 9 0xA 0xB 0xC 0xD>; + interrupt-parent = <&UIC2>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC2 0x10 1 + 1 &UIC2 0x11 1 + 2 &UIC2 0x12 1 + 2 &UIC2 0x13 1 + 2 &UIC2 0x14 1 + 2 &UIC2 0x15 1 + 2 &UIC2 0x16 1 + 2 &UIC2 0x17 1 + 2 &UIC2 0x18 1 + 2 &UIC2 0x19 1 + 2 &UIC2 0x1A 1 + 2 &UIC2 0x1B 1 + 2 &UIC2 0x1C 1 + 3 &UIC2 0x1D 1>; + }; }; }; diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 761faa7b6964..ac1eb320c7b4 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -176,6 +176,19 @@ sleep = <&pmc 0x00300000>; }; + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <12 0x8 13 0x8>; + interrupt-parent = < &ipic >; + fsl,tclk-period = <10>; + fsl,tmr-prsc = <100>; + fsl,tmr-add = <0x999999A4>; + fsl,tmr-fiper1 = <0x3B9AC9F6>; + fsl,tmr-fiper2 = <0x00018696>; + fsl,max-adj = <659999998>; + }; + enet0: ethernet@24000 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts index cafc1285c140..f6c04d25e916 100644 --- a/arch/powerpc/boot/dts/mpc8572ds.dts +++ b/arch/powerpc/boot/dts/mpc8572ds.dts @@ -324,6 +324,19 @@ }; }; + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <68 2 69 2 70 2 71 2>; + interrupt-parent = < &mpic >; + fsl,tclk-period = <5>; + fsl,tmr-prsc = <200>; + fsl,tmr-add = <0xAAAAAAAB>; + fsl,tmr-fiper1 = <0x3B9AC9FB>; + fsl,tmr-fiper2 = <0x3B9AC9FB>; + fsl,max-adj = <499999999>; + }; + enet0: ethernet@24000 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts index 2bcf3683d223..dae403100f2f 100644 --- a/arch/powerpc/boot/dts/p2020ds.dts +++ b/arch/powerpc/boot/dts/p2020ds.dts @@ -178,6 +178,19 @@ }; + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <68 2 69 2 70 2>; + interrupt-parent = < &mpic >; + fsl,tclk-period = <5>; + fsl,tmr-prsc = <200>; + fsl,tmr-add = <0xCCCCCCCD>; + fsl,tmr-fiper1 = <0x3B9AC9FB>; + fsl,tmr-fiper2 = <0x0001869B>; + fsl,max-adj = <249999999>; + }; + enet0: ethernet@24000 { tbi-handle = <&tbi0>; phy-handle = <&phy0>; diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts index 3782a58f13be..1d7a05f3021e 100644 --- a/arch/powerpc/boot/dts/p2020rdb.dts +++ b/arch/powerpc/boot/dts/p2020rdb.dts @@ -224,6 +224,19 @@ status = "disabled"; }; + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <68 2 69 2 70 2>; + interrupt-parent = < &mpic >; + fsl,tclk-period = <5>; + fsl,tmr-prsc = <200>; + fsl,tmr-add = <0xCCCCCCCD>; + fsl,tmr-fiper1 = <0x3B9AC9FB>; + fsl,tmr-fiper2 = <0x0001869B>; + fsl,max-adj = <249999999>; + }; + enet0: ethernet@24000 { fixed-link = <1 1 1000 0 0>; phy-connection-type = "rgmii-id"; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index 81636c01d906..d86a3a498118 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -358,8 +358,28 @@ 0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>; }; + MSI: ppc4xx-msi@400300000 { + compatible = "amcc,ppc4xx-msi", "ppc4xx-msi"; + reg = < 0x4 0x00300000 0x100 + 0x4 0x00300000 0x100>; + sdr-base = <0x3B0>; + msi-data = <0x00000000>; + msi-mask = <0x44440000>; + interrupt-count = <3>; + interrupts =<0 1 2 3>; + interrupt-parent = <&UIC0>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC0 0xC 1 + 1 &UIC0 0x0D 1 + 2 &UIC0 0x0E 1 + 3 &UIC0 0x0F 1>; + }; + }; + chosen { linux,stdout-path = "/plb/opb/serial@ef600200"; }; diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index dde1296b8b41..169d039ed402 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -60,4 +60,18 @@ struct dyn_arch_ftrace { #endif +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Compare the symbol name with the system call name. Skip the .sys or .SyS + * prefix from the symbol name and the sys prefix from the system call name and + * just match the rest. This is only needed on ppc64 since symbol names on + * 32bit do not start with a period so the generic function will work. + */ + return !strcmp(sym + 4, name + 3); +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */ + #endif /* _ASM_POWERPC_FTRACE */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 852b8c1c09db..fd8201dddd4b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -236,7 +236,7 @@ #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 #define H_GET_MPP_X 0x314 -#define MAX_HCALL_OPCODE H_BEST_ENERGY +#define MAX_HCALL_OPCODE H_GET_MPP_X #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea6963ad77..d2ca5ed3877b 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 << 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 << 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 << 2) + +#define KVM_SREGS_E_SPRG8 (1 << 3) +#define KVM_SREGS_E_MCIVPR (1 << 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 << 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 << 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 << 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 << 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 << 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 << 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 << 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 << 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 << 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 << 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0xffffffff to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR (1 << 0) +#define KVM_SREGS_E_UPDATE_TSR (1 << 1) +#define KVM_SREGS_E_UPDATE_DEC (1 << 2) +#define KVM_SREGS_E_UPDATE_DBSR (1 << 3) + +/* + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a + * previous KVM_GET_REGS. + * + * Unless otherwise indicated, setting any register with KVM_SET_SREGS + * directly sets its value. It does not trigger any special semantics such + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct + * just received from KVM_GET_SREGS is always a no-op. + */ struct kvm_sregs { __u32 pvr; union { @@ -62,6 +170,82 @@ struct kvm_sregs { __u64 dbat[8]; } ppc32; } s; + struct { + union { + struct { /* KVM_SREGS_E_IMPL_FSL */ + __u32 features; /* KVM_SREGS_E_FSL_ */ + __u32 svr; + __u64 mcar; + __u32 hid0; + + /* KVM_SREGS_E_FSL_PIDn */ + __u32 pid1, pid2; + } fsl; + __u8 pad[256]; + } impl; + + __u32 features; /* KVM_SREGS_E_ */ + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ + __u32 pir; /* read-only */ + __u64 sprg8; + __u64 sprg9; /* E.ED */ + __u64 csrr0; + __u64 dsrr0; /* E.ED */ + __u64 mcsrr0; + __u32 csrr1; + __u32 dsrr1; /* E.ED */ + __u32 mcsrr1; + __u32 esr; + __u64 dear; + __u64 ivpr; + __u64 mcivpr; + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ + + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ + __u32 tcr; + __u32 decar; + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ + + /* + * Userspace can read TB directly, but the + * value reported here is consistent with "dec". + * + * Read-only. + */ + __u64 tb; + + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ + __u32 dbcr[3]; + __u32 iac[4]; + __u32 dac[2]; + __u32 dvc[2]; + __u8 num_iac; /* read-only */ + __u8 num_dac; /* read-only */ + __u8 num_dvc; /* read-only */ + __u8 pad; + + __u32 epr; /* EXP */ + __u32 vrsave; /* a.k.a. USPRG0 */ + __u32 epcr; /* KVM_SREGS_E_64 */ + + __u32 mas0; + __u32 mas1; + __u64 mas2; + __u64 mas7_3; + __u32 mas4; + __u32 mas6; + + __u32 ivor_low[16]; /* IVOR0-15 */ + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ + + __u32 mmucfg; /* read-only */ + __u32 eptcfg; /* E.PT, read-only */ + __u32 tlbcfg[4];/* read-only */ + __u32 tlbps[4]; /* read-only */ + + __u32 eplc, epsc; /* E.PD */ + } e; __u8 pad[1020]; } u; }; diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h index d22d39942a92..a0e57618ff33 100644 --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); } -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 7fea26fffb25..7a2a565f88c4 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h @@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 { u32 host_pid[E500_PID_NUM]; u32 pid[E500_PID_NUM]; + u32 svr; u32 mas0; u32 mas1; @@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 { u32 hid1; u32 tlb0cfg; u32 tlb1cfg; + u64 mcar; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bba3b9b72a39..186f150b9b89 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -223,6 +223,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong sprg4; ulong sprg5; @@ -232,6 +233,9 @@ struct kvm_vcpu_arch { ulong csrr1; ulong dsrr0; ulong dsrr1; + ulong mcsrr0; + ulong mcsrr1; + ulong mcsr; ulong esr; u32 dec; u32 decar; @@ -255,6 +259,7 @@ struct kvm_vcpu_arch { u32 dbsr; #ifdef CONFIG_KVM_EXIT_TIMING + struct mutex exit_timing_lock; struct kvmppc_exit_timing timing_exit; struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ecb3bc74c344..9345238edecf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); +extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); /* Core-specific hooks */ @@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index abe8532bd14e..bf301ac62f35 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -31,14 +31,29 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) #endif #ifdef CONFIG_SMP -extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift); -extern void pte_free_finish(void); +struct mmu_gather; +extern void tlb_remove_table(struct mmu_gather *, void *); + +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + pgtable_free(table, shift); +} #else /* CONFIG_SMP */ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) { pgtable_free(table, shift); } -static inline void pte_free_finish(void) { } #endif /* !CONFIG_SMP */ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index c4490f9c67c4..59247e816ac5 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -22,7 +22,7 @@ #define _PAGE_HASHPTE _PAGE_HPTE_SUB /* Note the full page bits must be in the same location as for normal - * 4k pages as the same asssembly will be used to insert 64K pages + * 4k pages as the same assembly will be used to insert 64K pages * wether the kernel has CONFIG_PPC_64K_PAGES or not */ #define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 880b8c1e6e53..11eb404b5606 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -191,8 +191,6 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; -extern irqreturn_t debug_ipi_action(int irq, void *data); - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 23913e902fc3..b54b2add07be 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -15,6 +15,11 @@ #include <linux/sched.h> +/* ftrace syscalls requires exporting the sys_call_table */ +#ifdef CONFIG_FTRACE_SYSCALLS +extern const unsigned long *sys_call_table; +#endif /* CONFIG_FTRACE_SYSCALLS */ + static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d8529ef13b23..836f231ec1f0 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -110,7 +110,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_FREEZE 14 /* Freezing for suspend */ -#define TIF_RUNLATCH 15 /* Is the runlatch enabled? */ +#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ +#define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -127,8 +128,10 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NOERROR (1<<TIF_NOERROR) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_RUNLATCH (1<<TIF_RUNLATCH) -#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP) +#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME) @@ -139,10 +142,12 @@ static inline struct thread_info *current_thread_info(void) #define TLF_NAPPING 0 /* idle thread enabled NAP mode */ #define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */ #define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */ +#define TLF_LAZY_MMU 3 /* tlb_batch is active */ #define _TLF_NAPPING (1 << TLF_NAPPING) #define _TLF_SLEEPING (1 << TLF_SLEEPING) #define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK) +#define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU) #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9aab36312572..e8b981897d44 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6887661ac072..36e1c8a29be8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -396,6 +396,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index ce1f3e44c24f..bf99cfa6bbfe 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> +#include <asm/syscall.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -600,3 +601,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) +unsigned long __init arch_syscall_addr(int nr) +{ + return sys_call_table[nr*2]; +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a24d37d4cf51..5b428e308666 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -295,17 +295,20 @@ static inline void handle_one_irq(unsigned int irq) unsigned long saved_sp_limit; struct irq_desc *desc; + desc = irq_to_desc(irq); + if (!desc) + return; + /* Switch to the irq stack to handle this */ curtp = current_thread_info(); irqtp = hardirq_ctx[smp_processor_id()]; if (curtp == irqtp) { /* We're already on the irq stack, just handle it */ - generic_handle_irq(irq); + desc->handle_irq(irq, desc); return; } - desc = irq_to_desc(irq); saved_sp_limit = current->thread.ksp_limit; irqtp->task = curtp->task; @@ -557,15 +560,8 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { raw_spin_unlock_irqrestore(&irq_big_lock, flags); - /* If we are early boot, we can't free the structure, - * too bad... - * this will be fixed once slab is made available early - * instead of the current cruft - */ - if (mem_init_done) { - of_node_put(host->of_node); - kfree(host); - } + of_node_put(host->of_node); + kfree(host); return NULL; } irq_map[0].host = host; @@ -727,9 +723,7 @@ unsigned int irq_create_mapping(struct irq_host *host, } pr_debug("irq: -> using host @%p\n", host); - /* Check if mapping already exist, if it does, call - * host->ops->map() to update the flags - */ + /* Check if mapping already exists */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { pr_debug("irq: -> existing mapping on virq %d\n", virq); @@ -899,10 +893,13 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, return irq_find_mapping(host, hwirq); /* - * No rcu_read_lock(ing) needed, the ptr returned can't go under us - * as it's referencing an entry in the static irq_map table. + * The ptr returned references the static global irq_map. + * but freeing an irq can delete nodes along the path to + * do the lookup via call_rcu. */ + rcu_read_lock(); ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq); + rcu_read_unlock(); /* * If found in radix tree, then fine. @@ -1010,14 +1007,23 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); + if (virq < NUM_ISA_INTERRUPTS) { + if (virq + count < NUM_ISA_INTERRUPTS) + return; + count =- NUM_ISA_INTERRUPTS - virq; + virq = NUM_ISA_INTERRUPTS; + } + + if (count > irq_virq_count || virq > irq_virq_count - count) { + if (virq > irq_virq_count) + return; + count = irq_virq_count - virq; + } + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; - if (i < NUM_ISA_INTERRUPTS || - (virq + count) > irq_virq_count) - continue; - host = irq_map[i].host; irq_map[i].hwirq = host->inval_irq; smp_wmb(); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index bd9d35f59cf4..76a6e40a6f7c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -142,7 +142,7 @@ static int kgdb_singlestep(struct pt_regs *regs) return 0; /* - * On Book E and perhaps other processsors, singlestep is handled on + * On Book E and perhaps other processors, singlestep is handled on * the critical exception stack. This causes current_thread_info() * to fail, since it it locates the thread_info by masking off * the low bits of the current stack pointer. We work around diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 095043d79946..91e52df3d81d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -395,6 +395,9 @@ struct task_struct *__switch_to(struct task_struct *prev, struct thread_struct *new_thread, *old_thread; unsigned long flags; struct task_struct *last; +#ifdef CONFIG_PPC_BOOK3S_64 + struct ppc64_tlb_batch *batch; +#endif #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu @@ -513,7 +516,17 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } -#endif +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 + batch = &__get_cpu_var(ppc64_tlb_batch); + if (batch->active) { + current_thread_info()->local_flags |= _TLF_LAZY_MMU; + if (batch->index) + __flush_tlb_pending(batch); + batch->active = 0; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ local_irq_save(flags); @@ -528,6 +541,14 @@ struct task_struct *__switch_to(struct task_struct *prev, hard_irq_disable(); last = _switch(old_thread, new_thread); +#ifdef CONFIG_PPC_BOOK3S_64 + if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { + current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; + batch = &__get_cpu_var(ppc64_tlb_batch); + batch->active = 1; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ + local_irq_restore(flags); return last; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index a6ae1cfad86c..cb22024f2b42 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -29,6 +29,7 @@ #include <linux/signal.h> #include <linux/seccomp.h> #include <linux/audit.h> +#include <trace/syscall.h> #ifdef CONFIG_PPC32 #include <linux/module.h> #endif @@ -40,6 +41,9 @@ #include <asm/pgtable.h> #include <asm/system.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * The parameter save area on the stack is used to store arguments being passed * to callee function and is located at fixed offset from stack pointer. @@ -1710,6 +1714,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) */ ret = -1L; + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->gpr[0]); + if (unlikely(current->audit_context)) { #ifdef CONFIG_PPC64 if (!is_32bit_task()) @@ -1738,6 +1745,9 @@ void do_syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->result); + step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4a6f2ec7e761..8ebc6700b98d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -129,7 +129,7 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -irqreturn_t debug_ipi_action(int irq, void *data) +static irqreturn_t debug_ipi_action(int irq, void *data) { if (crash_ipi_function_ptr) { crash_ipi_function_ptr(get_irq_regs()); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b9150f07d266..920276c0f6a1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 74d0e7421143..da3a1225c0ac 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 65ea083a5b27..549bb2c9a47a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); return emulated; } @@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); return emulated; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ef76acb455c3..8462b3a1c1c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.shared->srr0 = regs->srr0; vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_pid(vcpu, regs->pid); vcpu->arch.shared->sprg0 = regs->sprg0; vcpu->arch.shared->sprg1 = regs->sprg1; vcpu->arch.shared->sprg2 = regs->sprg2; @@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +static void get_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + u64 tb = get_tb(); + + sregs->u.e.features |= KVM_SREGS_E_BASE; + + sregs->u.e.csrr0 = vcpu->arch.csrr0; + sregs->u.e.csrr1 = vcpu->arch.csrr1; + sregs->u.e.mcsr = vcpu->arch.mcsr; + sregs->u.e.esr = vcpu->arch.esr; + sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.tsr = vcpu->arch.tsr; + sregs->u.e.tcr = vcpu->arch.tcr; + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); + sregs->u.e.tb = tb; + sregs->u.e.vrsave = vcpu->arch.vrsave; +} + +static int set_sregs_base(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) + return 0; + + vcpu->arch.csrr0 = sregs->u.e.csrr0; + vcpu->arch.csrr1 = sregs->u.e.csrr1; + vcpu->arch.mcsr = sregs->u.e.mcsr; + vcpu->arch.esr = sregs->u.e.esr; + vcpu->arch.shared->dar = sregs->u.e.dear; + vcpu->arch.vrsave = sregs->u.e.vrsave; + vcpu->arch.tcr = sregs->u.e.tcr; + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) + vcpu->arch.dec = sregs->u.e.dec; + + kvmppc_emulate_dec(vcpu); + + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + /* + * FIXME: existing KVM timer handling is incomplete. + * TSR cannot be read by the guest, and its value in + * vcpu->arch is always zero. For now, just handle + * the case where the caller is trying to inject a + * decrementer interrupt. + */ + + if ((sregs->u.e.tsr & TSR_DIS) && + (vcpu->arch.tcr & TCR_DIE)) + kvmppc_core_queue_dec(vcpu); + } + + return 0; +} + +static void get_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_ARCH206; + + sregs->u.e.pir = 0; + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; + sregs->u.e.decar = vcpu->arch.decar; + sregs->u.e.ivpr = vcpu->arch.ivpr; +} + +static int set_sregs_arch206(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) + return 0; + + if (sregs->u.e.pir != 0) + return -EINVAL; + + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; + vcpu->arch.decar = sregs->u.e.decar; + vcpu->arch.ivpr = sregs->u.e.ivpr; + + return 0; +} + +void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.features |= KVM_SREGS_E_IVOR; + + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; +} + +int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; + + return 0; +} + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + sregs->pvr = vcpu->arch.pvr; + + get_sregs_base(vcpu, sregs); + get_sregs_arch206(vcpu, sregs); + kvmppc_core_get_sregs(vcpu, sregs); + return 0; } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return -ENOTSUPP; + int ret; + + if (vcpu->arch.pvr != sregs->pvr) + return -EINVAL; + + ret = set_sregs_base(vcpu, sregs); + if (ret < 0) + return ret; + + ret = set_sregs_arch206(vcpu, sregs); + if (ret < 0) + return ret; + + return kvmppc_core_set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 1cc471faac2d..b58ccae95904 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -380,7 +380,6 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_SPRG4(r4) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index e3768ee9b595..318dbc61ba44 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) /* Registers init */ vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ vcpu->vcpu_id = 0; @@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, return 0; } +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | + KVM_SREGS_E_PM; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + sregs->u.e.mas0 = vcpu_e500->mas0; + sregs->u.e.mas1 = vcpu_e500->mas1; + sregs->u.e.mas2 = vcpu_e500->mas2; + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; + sregs->u.e.mas4 = vcpu_e500->mas4; + sregs->u.e.mas6 = vcpu_e500->mas6; + + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; + + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu_e500->mas0 = sregs->u.e.mas0; + vcpu_e500->mas1 = sregs->u.e.mas1; + vcpu_e500->mas2 = sregs->u.e.mas2; + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; + vcpu_e500->mas4 = sregs->u.e.mas4; + vcpu_e500->mas6 = sregs->u.e.mas6; + } + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_SPE) { + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = + sregs->u.e.ivor_high[0]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = + sregs->u.e.ivor_high[1]; + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = + sregs->u.e.ivor_high[2]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8e3edfbc9634..69cd665a0caf 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, <yu.liu@freescale.com> * @@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) switch (sprn) { case SPRN_PID: - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = - vcpu->arch.pid = spr_val; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_PID1: vcpu_e500->pid[1] = spr_val; break; @@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; case SPRN_HID1: kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + case SPRN_SVR: + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; case SPRN_MMUCSR0: kvmppc_set_gpr(vcpu, rt, 0); break; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index d6d6d47a75a9..b18fe353397d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * @@ -24,6 +24,7 @@ #include "../mm/mmu_decl.h" #include "e500_tlb.h" #include "trace.h" +#include "timing.h" #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) @@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) vcpu_e500->mas7 = 0; } + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); return EMULATE_DONE; } @@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) write_host_tlbe(vcpu_e500, stlbsel, sesel); } + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); return EMULATE_DONE; } @@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, return -1; } +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = + vcpu->arch.pid = pid; +} + void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) { struct tlbe *tlbe; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c64fd2909bb2..141dce3c6810 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } +u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) +{ + u64 jd = tb - vcpu->arch.dec_jiffies; + return vcpu->arch.dec - jd; +} + /* XXX to do: * lhax * lhaux @@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_DEC: { - u64 jd = get_tb() - vcpu->arch.dec_jiffies; - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); - pr_debug("mfDEC: %x - %llx = %lx\n", - vcpu->arch.dec, jd, - kvmppc_get_gpr(vcpu, rt)); + kvmppc_set_gpr(vcpu, rt, + kvmppc_get_dec(vcpu, get_tb())); break; } default: @@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } break; } + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; case OP_31_XOP_STHX: @@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) printk("mtspr: unknown spr %x\n", sprn); break; } + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); break; case OP_31_XOP_DCBI: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 99758460efde..616dd516ca1f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_SREGS: +#else case KVM_CAP_PPC_SEGSTATE: +#endif case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: @@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; +#ifdef CONFIG_KVM_EXIT_TIMING + mutex_init(&vcpu->arch.exit_timing_lock); +#endif + return 0; } @@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { +#ifdef CONFIG_BOOKE + /* + * vrsave (formerly usprg0) isn't used by Linux, but may + * be used by the guest. + * + * On non-booke this is associated with Altivec and + * is handled by code in book3s.c. + */ + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_core_vcpu_put(vcpu); +#ifdef CONFIG_BOOKE + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index a021f5827a33..319177df9587 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) { int i; - /* pause guest execution to avoid concurrent updates */ - mutex_lock(&vcpu->mutex); + /* Take a lock to avoid concurrent updates */ + mutex_lock(&vcpu->arch.exit_timing_lock); vcpu->arch.last_exit_type = 0xDEAD; for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { @@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) vcpu->arch.timing_exit.tv64 = 0; vcpu->arch.timing_last_enter.tv64 = 0; - mutex_unlock(&vcpu->mutex); + mutex_unlock(&vcpu->arch.exit_timing_lock); } static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) @@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) return; } + mutex_lock(&vcpu->arch.exit_timing_lock); + vcpu->arch.timing_count_type[type]++; /* sum */ @@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) vcpu->arch.timing_min_duration[type] = duration; if (unlikely(duration > vcpu->arch.timing_max_duration[type])) vcpu->arch.timing_max_duration[type] = duration; + + mutex_unlock(&vcpu->arch.exit_timing_lock); } void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) @@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) { struct kvm_vcpu *vcpu = m->private; int i; + u64 min, max, sum, sum_quad; seq_printf(m, "%s", "type count min max sum sum_squared\n"); + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + + min = vcpu->arch.timing_min_duration[i]; + do_div(min, tb_ticks_per_usec); + max = vcpu->arch.timing_max_duration[i]; + do_div(max, tb_ticks_per_usec); + sum = vcpu->arch.timing_sum_duration[i]; + do_div(sum, tb_ticks_per_usec); + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; + do_div(sum_quad, tb_ticks_per_usec); + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", kvm_exit_names[i], vcpu->arch.timing_count_type[i], - vcpu->arch.timing_min_duration[i], - vcpu->arch.timing_max_duration[i], - vcpu->arch.timing_sum_duration[i], - vcpu->arch.timing_sum_quad_duration[i]); + min, + max, + sum, + sum_quad); + } return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 6a3997f98dfb..af40c8768a78 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -33,110 +33,6 @@ #include "mmu_decl.h" -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -#ifdef CONFIG_SMP - -/* - * Handle batching of page table freeing on SMP. Page tables are - * queued up and send to be freed later by RCU in order to avoid - * freeing a page table page that is being walked without locks - */ - -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -static unsigned long pte_freelist_forced_free; - -struct pte_freelist_batch -{ - struct rcu_head rcu; - unsigned int index; - unsigned long tables[0]; -}; - -#define PTE_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ - / sizeof(unsigned long)) - -static void pte_free_smp_sync(void *arg) -{ - /* Do nothing, just ensure we sync with all CPUs */ -} - -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -static void pgtable_free_now(void *table, unsigned shift) -{ - pte_freelist_forced_free++; - - smp_call_function(pte_free_smp_sync, NULL, 1); - - pgtable_free(table, shift); -} - -static void pte_free_rcu_callback(struct rcu_head *head) -{ - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; - - for (i = 0; i < batch->index; i++) { - void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); - } - - free_page((unsigned long)batch); -} - -static void pte_free_submit(struct pte_freelist_batch *batch) -{ - call_rcu_sched(&batch->rcu, pte_free_rcu_callback); -} - -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) -{ - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); - unsigned long pgf; - - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ - pgtable_free(table, shift); - return; - } - - if (*batchp == NULL) { - *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); - if (*batchp == NULL) { - pgtable_free_now(table, shift); - return; - } - (*batchp)->index = 0; - } - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf = (unsigned long)table | shift; - (*batchp)->tables[(*batchp)->index++] = pgf; - if ((*batchp)->index == PTE_FREELIST_SIZE) { - pte_free_submit(*batchp); - *batchp = NULL; - } -} - -void pte_free_finish(void) -{ - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); - - if (*batchp == NULL) - return; - pte_free_submit(*batchp); - *batchp = NULL; -} - -#endif /* CONFIG_SMP */ - static inline int is_exec_fault(void) { return current->thread.regs && TRAP(current->thread.regs) == 0x400; diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 690566b66e8e..27b863c14941 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb) */ _tlbia(); } - - /* Push out batch of freed page tables */ - pte_free_finish(); } /* diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c14d09f614f3..31f18207970b 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -155,7 +155,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) void tlb_flush(struct mmu_gather *tlb) { - struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); /* If there's a TLB batch pending, then we must flush it because the * pages are going to be freed and we really don't want to have a CPU @@ -164,8 +164,7 @@ void tlb_flush(struct mmu_gather *tlb) if (tlbbatch->index) __flush_tlb_pending(tlbbatch); - /* Push out batch of freed page tables */ - pte_free_finish(); + put_cpu_var(ppc64_tlb_batch); } /** diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 2a030d89bbc6..0bdad3aecc67 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range); void tlb_flush(struct mmu_gather *tlb) { flush_tlb_mm(tlb->mm); - - /* Push out batch of freed page tables */ - pte_free_finish(); } /* diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 8ee51a252cf1..e6bec74be131 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -261,6 +261,28 @@ static int get_kernel(unsigned long pc, unsigned long mmcra) return is_kernel; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -281,7 +303,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); - if (val < 0) { + if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { oprofile_add_ext_sample(pc, regs, i, is_kernel); classic_ctr_write(i, reset_value[i]); diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index b72176434ebe..d733d7ca939c 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -57,6 +57,8 @@ config KILAUEA select 405EX select PPC40x_SIMPLE select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI help This option enables support for the AMCC PPC405EX evaluation board. diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index f485fc5f6d5e..e958b6f48ec2 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -74,6 +74,8 @@ config KATMAI select 440SPe select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PCC4xx_MSI help This option enables support for the AMCC PPC440SPe evaluation board. @@ -118,6 +120,8 @@ config CANYONLANDS select 460EX select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI select IBM_NEW_EMAC_RGMII select IBM_NEW_EMAC_ZMII help @@ -144,6 +148,8 @@ config REDWOOD select 460SX select PCI select PPC4xx_PCI_EXPRESS + select PCI_MSI + select PPC4xx_MSI help This option enables support for the AMCC PPC460SX Redwood board. diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 449c08c15862..3e4eba603e6b 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -176,14 +176,14 @@ EXPORT_SYMBOL_GPL(iic_get_target_id); #ifdef CONFIG_SMP /* Use the highest interrupt priorities for IPI */ -static inline int iic_ipi_to_irq(int ipi) +static inline int iic_msg_to_irq(int msg) { - return IIC_IRQ_TYPE_IPI + 0xf - ipi; + return IIC_IRQ_TYPE_IPI + 0xf - msg; } -void iic_cause_IPI(int cpu, int mesg) +void iic_message_pass(int cpu, int msg) { - out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - mesg) << 4); + out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4); } struct irq_host *iic_get_irq_host(int node) @@ -192,50 +192,31 @@ struct irq_host *iic_get_irq_host(int node) } EXPORT_SYMBOL_GPL(iic_get_irq_host); -static irqreturn_t iic_ipi_action(int irq, void *dev_id) -{ - int ipi = (int)(long)dev_id; - - switch(ipi) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - scheduler_ipi(); - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - debug_ipi_action(0, NULL); - break; - } - return IRQ_HANDLED; -} -static void iic_request_ipi(int ipi, const char *name) +static void iic_request_ipi(int msg) { int virq; - virq = irq_create_mapping(iic_host, iic_ipi_to_irq(ipi)); + virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg)); if (virq == NO_IRQ) { printk(KERN_ERR - "iic: failed to map IPI %s\n", name); + "iic: failed to map IPI %s\n", smp_ipi_name[msg]); return; } - if (request_irq(virq, iic_ipi_action, IRQF_DISABLED, name, - (void *)(long)ipi)) - printk(KERN_ERR - "iic: failed to request IPI %s\n", name); + + /* + * If smp_request_message_ipi encounters an error it will notify + * the error. If a message is not needed it will return non-zero. + */ + if (smp_request_message_ipi(virq, msg)) + irq_dispose_mapping(virq); } void iic_request_IPIs(void) { - iic_request_ipi(PPC_MSG_CALL_FUNCTION, "IPI-call"); - iic_request_ipi(PPC_MSG_RESCHEDULE, "IPI-resched"); - iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE, "IPI-call-single"); -#ifdef CONFIG_DEBUGGER - iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); -#endif /* CONFIG_DEBUGGER */ + iic_request_ipi(PPC_MSG_CALL_FUNCTION); + iic_request_ipi(PPC_MSG_RESCHEDULE); + iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE); + iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 942dc39d6045..4f60ae6ca358 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -75,7 +75,7 @@ enum { }; extern void iic_init_IRQ(void); -extern void iic_cause_IPI(int cpu, int mesg); +extern void iic_message_pass(int cpu, int msg); extern void iic_request_IPIs(void); extern void iic_setup_cpu(void); diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index d176e6148e3f..dbb641ea90dd 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -152,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) return 1; } static struct smp_ops_t bpa_iic_smp_ops = { - .message_pass = iic_cause_IPI, + .message_pass = iic_message_pass, .probe = smp_iic_probe, .kick_cpu = smp_cell_kick_cpu, .setup_cpu = smp_cell_setup_cpu, diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index d775fd148d13..7b4df37ac381 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -7,11 +7,18 @@ config PPC4xx_PCI_EXPRESS depends on PCI && 4xx default n +config PPC4xx_MSI + bool + depends on PCI_MSI + depends on PCI && 4xx + default n + config PPC_MSI_BITMAP bool depends on PCI_MSI default y if MPIC default y if FSL_PCI + default y if PPC4xx_MSI source "arch/powerpc/sysdev/xics/Kconfig" diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 6076e0074a87..0efa990e3344 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o ifeq ($(CONFIG_PCI),y) obj-$(CONFIG_4xx) += ppc4xx_pci.o endif +obj-$(CONFIG_PPC4xx_MSI) += ppc4xx_msi.o obj-$(CONFIG_PPC4xx_CPM) += ppc4xx_cpm.o obj-$(CONFIG_PPC4xx_GPIO) += ppc4xx_gpio.o diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c new file mode 100644 index 000000000000..367af0241851 --- /dev/null +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -0,0 +1,276 @@ +/* + * Adding PCI-E MSI support for PPC4XX SoCs. + * + * Copyright (c) 2010, Applied Micro Circuits Corporation + * Authors: Tirumala R Marri <tmarri@apm.com> + * Feng Kan <fkan@apm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +#include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/of_platform.h> +#include <linux/interrupt.h> +#include <asm/prom.h> +#include <asm/hw_irq.h> +#include <asm/ppc-pci.h> +#include <boot/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/msi_bitmap.h> + +#define PEIH_TERMADH 0x00 +#define PEIH_TERMADL 0x08 +#define PEIH_MSIED 0x10 +#define PEIH_MSIMK 0x18 +#define PEIH_MSIASS 0x20 +#define PEIH_FLUSH0 0x30 +#define PEIH_FLUSH1 0x38 +#define PEIH_CNTRST 0x48 +#define NR_MSI_IRQS 4 + +struct ppc4xx_msi { + u32 msi_addr_lo; + u32 msi_addr_hi; + void __iomem *msi_regs; + int msi_virqs[NR_MSI_IRQS]; + struct msi_bitmap bitmap; + struct device_node *msi_dev; +}; + +static struct ppc4xx_msi ppc4xx_msi; + +static int ppc4xx_msi_init_allocator(struct platform_device *dev, + struct ppc4xx_msi *msi_data) +{ + int err; + + err = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS, + dev->dev.of_node); + if (err) + return err; + + err = msi_bitmap_reserve_dt_hwirqs(&msi_data->bitmap); + if (err < 0) { + msi_bitmap_free(&msi_data->bitmap); + return err; + } + + return 0; +} + +static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int int_no = -ENOMEM; + unsigned int virq; + struct msi_msg msg; + struct msi_desc *entry; + struct ppc4xx_msi *msi_data = &ppc4xx_msi; + + list_for_each_entry(entry, &dev->msi_list, list) { + int_no = msi_bitmap_alloc_hwirqs(&msi_data->bitmap, 1); + if (int_no >= 0) + break; + if (int_no < 0) { + pr_debug("%s: fail allocating msi interrupt\n", + __func__); + } + virq = irq_of_parse_and_map(msi_data->msi_dev, int_no); + if (virq == NO_IRQ) { + dev_err(&dev->dev, "%s: fail mapping irq\n", __func__); + msi_bitmap_free_hwirqs(&msi_data->bitmap, int_no, 1); + return -ENOSPC; + } + dev_dbg(&dev->dev, "%s: virq = %d\n", __func__, virq); + + /* Setup msi address space */ + msg.address_hi = msi_data->msi_addr_hi; + msg.address_lo = msi_data->msi_addr_lo; + + irq_set_msi_desc(virq, entry); + msg.data = int_no; + write_msi_msg(virq, &msg); + } + return 0; +} + +void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + struct ppc4xx_msi *msi_data = &ppc4xx_msi; + + dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); + + list_for_each_entry(entry, &dev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + irq_set_msi_desc(entry->irq, NULL); + msi_bitmap_free_hwirqs(&msi_data->bitmap, + virq_to_hw(entry->irq), 1); + irq_dispose_mapping(entry->irq); + } +} + +static int ppc4xx_msi_check_device(struct pci_dev *pdev, int nvec, int type) +{ + dev_dbg(&pdev->dev, "PCIE-MSI:%s called. vec %x type %d\n", + __func__, nvec, type); + if (type == PCI_CAP_ID_MSIX) + pr_debug("ppc4xx msi: MSI-X untested, trying anyway.\n"); + + return 0; +} + +static int ppc4xx_setup_pcieh_hw(struct platform_device *dev, + struct resource res, struct ppc4xx_msi *msi) +{ + const u32 *msi_data; + const u32 *msi_mask; + const u32 *sdr_addr; + dma_addr_t msi_phys; + void *msi_virt; + + sdr_addr = of_get_property(dev->dev.of_node, "sdr-base", NULL); + if (!sdr_addr) + return -1; + + SDR0_WRITE(sdr_addr, (u64)res.start >> 32); /*HIGH addr */ + SDR0_WRITE(sdr_addr + 1, res.start & 0xFFFFFFFF); /* Low addr */ + + + msi->msi_dev = of_find_node_by_name(NULL, "ppc4xx-msi"); + if (msi->msi_dev) + return -ENODEV; + + msi->msi_regs = of_iomap(msi->msi_dev, 0); + if (!msi->msi_regs) { + dev_err(&dev->dev, "of_iomap problem failed\n"); + return -ENOMEM; + } + dev_dbg(&dev->dev, "PCIE-MSI: msi register mapped 0x%x 0x%x\n", + (u32) (msi->msi_regs + PEIH_TERMADH), (u32) (msi->msi_regs)); + + msi_virt = dma_alloc_coherent(&dev->dev, 64, &msi_phys, GFP_KERNEL); + msi->msi_addr_hi = 0x0; + msi->msi_addr_lo = (u32) msi_phys; + dev_dbg(&dev->dev, "PCIE-MSI: msi address 0x%x\n", msi->msi_addr_lo); + + /* Progam the Interrupt handler Termination addr registers */ + out_be32(msi->msi_regs + PEIH_TERMADH, msi->msi_addr_hi); + out_be32(msi->msi_regs + PEIH_TERMADL, msi->msi_addr_lo); + + msi_data = of_get_property(dev->dev.of_node, "msi-data", NULL); + if (!msi_data) + return -1; + msi_mask = of_get_property(dev->dev.of_node, "msi-mask", NULL); + if (!msi_mask) + return -1; + /* Program MSI Expected data and Mask bits */ + out_be32(msi->msi_regs + PEIH_MSIED, *msi_data); + out_be32(msi->msi_regs + PEIH_MSIMK, *msi_mask); + + return 0; +} + +static int ppc4xx_of_msi_remove(struct platform_device *dev) +{ + struct ppc4xx_msi *msi = dev->dev.platform_data; + int i; + int virq; + + for (i = 0; i < NR_MSI_IRQS; i++) { + virq = msi->msi_virqs[i]; + if (virq != NO_IRQ) + irq_dispose_mapping(virq); + } + + if (msi->bitmap.bitmap) + msi_bitmap_free(&msi->bitmap); + iounmap(msi->msi_regs); + of_node_put(msi->msi_dev); + kfree(msi); + + return 0; +} + +static int __devinit ppc4xx_msi_probe(struct platform_device *dev) +{ + struct ppc4xx_msi *msi; + struct resource res; + int err = 0; + + msi = &ppc4xx_msi;/*keep the msi data for further use*/ + + dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); + + msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL); + if (!msi) { + dev_err(&dev->dev, "No memory for MSI structure\n"); + return -ENOMEM; + } + dev->dev.platform_data = msi; + + /* Get MSI ranges */ + err = of_address_to_resource(dev->dev.of_node, 0, &res); + if (err) { + dev_err(&dev->dev, "%s resource error!\n", + dev->dev.of_node->full_name); + goto error_out; + } + + if (ppc4xx_setup_pcieh_hw(dev, res, msi)) + goto error_out; + + err = ppc4xx_msi_init_allocator(dev, msi); + if (err) { + dev_err(&dev->dev, "Error allocating MSI bitmap\n"); + goto error_out; + } + + ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs; + ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + ppc_md.msi_check_device = ppc4xx_msi_check_device; + return err; + +error_out: + ppc4xx_of_msi_remove(dev); + return err; +} +static const struct of_device_id ppc4xx_msi_ids[] = { + { + .compatible = "amcc,ppc4xx-msi", + }, + {} +}; +static struct platform_driver ppc4xx_msi_driver = { + .probe = ppc4xx_msi_probe, + .remove = ppc4xx_of_msi_remove, + .driver = { + .name = "ppc4xx-msi", + .owner = THIS_MODULE, + .of_match_table = ppc4xx_msi_ids, + }, + +}; + +static __init int ppc4xx_msi_init(void) +{ + return platform_driver_register(&ppc4xx_msi_driver); +} + +subsys_initcall(ppc4xx_msi_init); |