diff options
Diffstat (limited to 'arch/powerpc')
351 files changed, 7309 insertions, 6035 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d003409b5f98..98ae8b714d31 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -111,6 +111,7 @@ config PPC select HAVE_DMA_API_DEBUG select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK + select ARCH_HAS_SG_CHAIN select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS @@ -145,6 +146,8 @@ config PPC select HAVE_IRQ_EXIT_ON_IRQ_STACK select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select HAVE_ARCH_AUDITSYSCALL + select ARCH_SUPPORTS_ATOMIC_RMW + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -180,7 +183,7 @@ config SCHED_OMIT_FRAME_POINTER config ARCH_MAY_HAVE_PC_FDC bool - default !PPC_PSERIES || PCI + default PCI config PPC_OF def_bool y @@ -418,7 +421,7 @@ config KEXEC config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if PPC64 || 44x || FSL_BOOKE + select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE help Build a kernel suitable for use as a kdump capture kernel. The same kernel binary can be used as production kernel and dump @@ -1021,6 +1024,7 @@ endmenu if PPC64 config RELOCATABLE bool "Build a relocatable kernel" + depends on !COMPILE_TEST select NONSTATIC_KERNEL help This builds a kernel image that is capable of running anywhere diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 790352f93700..ec2e40f2cc11 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT config PPC_EARLY_DEBUG_44x bool "Early serial debugging for IBM/AMCC 44x CPUs" - # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water - # mark, which doesn't work with current 440 KVM. - depends on 44x && !KVM + depends on 44x help Select this to enable early debugging for IBM 44x chips via the inbuilt serial port. If you enable this, ensure you set @@ -303,7 +301,6 @@ config PPC_EARLY_DEBUG_OPAL_VTERMNO This correspond to which /dev/hvcN you want to use for early debug. - On OPAL v1 (takeover) this should always be 0 On OPAL v2, this will be 0 for network console and 1 or 2 for the machine built-in serial ports. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5687e299d0a5..132d9c681d6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -135,6 +135,7 @@ CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) +CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) # Altivec option not allowed with e500mc64 in GCC. ifeq ($(CONFIG_ALTIVEC),y) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ccc25eddbcb8..8a5bc1cfc6aa 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -389,7 +389,12 @@ $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) @rm -f $@; ln $< $@ +# Only install the vmlinux install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) + sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" + +# Install the vmlinux and other built boot targets. +zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ # anything not in $(targets) diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi index f75b4f820c3c..7d4a6a2354f4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-sec6.0-0.dtsi @@ -32,7 +32,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - compatible = "fsl,sec-v6.0"; + compatible = "fsl,sec-v6.0", "fsl,sec-v5.0", + "fsl,sec-v4.0"; fsl,sec-era = <6>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi index 2c0e1552d20b..7f9d14f5c4da 100644 --- a/arch/powerpc/boot/dts/mpc5121.dtsi +++ b/arch/powerpc/boot/dts/mpc5121.dtsi @@ -498,6 +498,7 @@ compatible = "fsl,mpc5121-dma"; reg = <0x14000 0x1800>; interrupts = <65 0x8>; + #dma-cells = <1>; }; }; diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c index ef2aed0f63ca..9dc52501de83 100644 --- a/arch/powerpc/boot/gunzip_util.c +++ b/arch/powerpc/boot/gunzip_util.c @@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen) r = zlib_inflate(&state->s, Z_FULL_FLUSH); if (r != Z_OK && r != Z_STREAM_END) fatal("inflate returned %d msg: %s\n\r", r, state->s.msg); - len = state->s.next_out - (unsigned char *)dst; + len = state->s.next_out - (Byte *)dst; } else { /* uncompressed image */ - len = min(state->s.avail_in, (unsigned)dstlen); + len = min(state->s.avail_in, (uLong)dstlen); memcpy(dst, state->s.next_in, len); state->s.next_in += len; state->s.avail_in -= len; diff --git a/arch/powerpc/boot/io.h b/arch/powerpc/boot/io.h index 7c09f4861fe1..394da5500466 100644 --- a/arch/powerpc/boot/io.h +++ b/arch/powerpc/boot/io.h @@ -1,5 +1,5 @@ #ifndef _IO_H -#define __IO_H +#define _IO_H #include "types.h" diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index c05310a913be..c936fab9ec4a 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -43,7 +43,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index e9a81e5ba273..e362d588dfbf 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -192,7 +192,6 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=y CONFIG_STAGING=y -# CONFIG_NET_VENDOR_SILICOM is not set CONFIG_CLK_PPC_CORENET=y CONFIG_EXT2_FS=y CONFIG_NTFS_FS=y diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig index bcbe74716689..9b192bb6bd3d 100644 --- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig +++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig @@ -54,7 +54,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_IDE=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 4bee1a6d41d0..7a7b3c879f96 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -17,6 +18,7 @@ CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PS3=y diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 6d7b22f41b50..acccbfde8a50 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -14,6 +15,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_CELLEB=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 2eb27046434e..6db97e4414b2 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -4,6 +4,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=24 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 3c72fa615bd9..6fab06f7f411 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -5,6 +5,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_BLK_DEV_INITRD=y @@ -15,6 +16,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 95e545d9f25c..fbd9e4163311 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -4,6 +4,7 @@ CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_COMPAT_BRK is not set @@ -15,6 +16,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 530601e8ccfe..69fd8adf9f5e 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -47,7 +47,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_EEPROM_AT24=y -CONFIG_SCSI_TGT=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index cec044a3ff69..3e72c8c06a0d 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -3,6 +3,7 @@ CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y @@ -13,6 +14,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +# CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PASEMI=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index ccf66b9060a6..924e10df1844 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_VIRTUALIZATION=y -CONFIG_KVM_440=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index f26b267eb71f..b9b769e4a23e 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -4,6 +4,7 @@ CONFIG_VSX=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -49,6 +50,7 @@ CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_SCHED_SMT=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 438e813dc9cb..587f5514f9b1 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -3,6 +3,7 @@ CONFIG_PPC_BOOK3E_64=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c91066944842..fec5870f1818 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -425,10 +425,8 @@ CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_SRP_ATTRS=m -CONFIG_SCSI_SRP_TGT_ATTRS=y CONFIG_SCSI_MESH=m CONFIG_SCSI_MAC53C94=m -CONFIG_SCSI_SRP=m CONFIG_SCSI_LOWLEVEL_PCMCIA=y CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index fdee37fab81c..2e637c881d2b 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_RD_LZMA=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index a905063281cc..50375f1f59e7 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -5,6 +5,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 58e3dbf43ca4..4428ee428f4e 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -6,6 +6,7 @@ CONFIG_NR_CPUS=2048 CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_IRQ_DOMAIN_DEBUG=y diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 3fb1bc432f4f..7f23f162ce9c 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,5 +4,6 @@ generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h +generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 4b237aa35660..21be8ae8f809 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -34,10 +34,14 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ +#define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) +#define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else +#define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) +#define STWX_BE stringify_in_c(stwbrx) #define STDX_BE stringify_in_c(stdbrx) #endif diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3eb53d741070..3a39283333c3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,7 +133,6 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void die(const char *, struct pt_regs *, long); -extern void print_backtrace(unsigned long *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ed0afc1e44a4..34a05a1a990b 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include <asm/reg.h> /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -39,6 +40,12 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; + +static inline void logmpp(u64 x) +{ + asm volatile(PPC_LOGMPP(R1) : : "r" (x)); +} + #endif /* __powerpc64__ && ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 37991e154ef8..840a5509b3f1 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -88,4 +88,15 @@ static inline unsigned long ppc_function_entry(void *func) #endif } +static inline unsigned long ppc_global_function_entry(void *func) +{ +#if defined(CONFIG_PPC64) && defined(_CALL_ELF) && _CALL_ELF == 2 + /* PPC64 ABIv2 the global entry point is at the address */ + return (unsigned long)func; +#else + /* All other cases there is no change vs ppc_function_entry() */ + return ppc_function_entry(func); +#endif +} + #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index e91dec89644a..daa5af91163c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -195,8 +195,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN) -#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_SLB | MMU_FTR_TLBIEL | \ - MMU_FTR_16M_PAGE) +#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE) /* We only set the altivec features if the kernel was compiled with altivec * support @@ -268,10 +267,6 @@ extern const char *powerpc_base_platform; #define CPU_FTR_MAYBE_CAN_NAP 0 #endif -#define CLASSIC_PPC (!defined(CONFIG_8xx) && !defined(CONFIG_4xx) && \ - !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \ - !defined(CONFIG_BOOKE)) - #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) #define CPU_FTRS_603 (CPU_FTR_COMMON | \ @@ -400,11 +395,6 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \ - CPU_FTR_IABR | CPU_FTR_PPC_LE) -#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \ - CPU_FTR_IABR | \ - CPU_FTR_MMCRA | CPU_FTR_CTRL) #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ @@ -447,6 +437,7 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) +#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -466,15 +457,15 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) #else #define CPU_FTRS_POSSIBLE \ - (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ - CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ - CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ - CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX) + (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ + CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ + CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ + CPU_FTRS_PA6T | CPU_FTR_VSX) #endif #else enum { CPU_FTRS_POSSIBLE = -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU | CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 | CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX | @@ -517,14 +508,15 @@ enum { #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) #else #define CPU_FTRS_ALWAYS \ - (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 & \ - CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 & \ - CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) + (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ + CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ + CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ + CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE) #endif #else enum { CPU_FTRS_ALWAYS = -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU & CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 & CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX & diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 150866b2a3fe..894d538f3567 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask) extern int dma_set_mask(struct device *dev, u64 dma_mask); extern int __dma_set_mask(struct device *dev, u64 dma_mask); +extern u64 __dma_get_required_mask(struct device *dev); #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fab7743c2640..3b260efbfbf9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/string.h> #include <linux/time.h> +#include <linux/atomic.h> struct pci_dev; struct pci_bus; @@ -33,10 +34,11 @@ struct device_node; #ifdef CONFIG_EEH /* EEH subsystem flags */ -#define EEH_ENABLED 0x1 /* EEH enabled */ -#define EEH_FORCE_DISABLED 0x2 /* EEH disabled */ -#define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */ -#define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */ +#define EEH_ENABLED 0x01 /* EEH enabled */ +#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ +#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ +#define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ /* * Delay for PE reset, all in ms @@ -84,7 +86,9 @@ struct eeh_pe { int freeze_count; /* Times of froze up */ struct timeval tstamp; /* Time on first-time freeze */ int false_positives; /* Times of reported #ff's */ + atomic_t pass_dev_cnt; /* Count of passed through devs */ struct eeh_pe *parent; /* Parent PE */ + void *data; /* PE auxillary data */ struct list_head child_list; /* Link PE to the child list */ struct list_head edevs; /* Link list of EEH devices */ struct list_head child; /* Child PEs */ @@ -93,6 +97,11 @@ struct eeh_pe { #define eeh_pe_for_each_dev(pe, edev, tmp) \ list_for_each_entry_safe(edev, tmp, &pe->edevs, list) +static inline bool eeh_pe_passed(struct eeh_pe *pe) +{ + return pe ? !!atomic_read(&pe->pass_dev_cnt) : false; +} + /* * The struct is used to trace EEH state for the associated * PCI device node or PCI device. In future, it might @@ -137,6 +146,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev) +{ + return edev ? edev->pe : NULL; +} + /* Return values from eeh_ops::next_error */ enum { EEH_NEXT_ERR_NONE = 0, @@ -158,6 +172,7 @@ enum { #define EEH_OPT_ENABLE 1 /* EEH enable */ #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ #define EEH_OPT_THAW_DMA 3 /* DMA enable */ +#define EEH_OPT_FREEZE_PE 4 /* Freeze PE */ #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ @@ -165,6 +180,11 @@ enum { #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ +#define EEH_PE_STATE_NORMAL 0 /* Normal state */ +#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ +#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ +#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */ +#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ @@ -184,6 +204,8 @@ struct eeh_ops { int (*wait_state)(struct eeh_pe *pe, int max_wait); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); @@ -194,36 +216,28 @@ extern int eeh_subsystem_flags; extern struct eeh_ops *eeh_ops; extern raw_spinlock_t confirm_error_lock; -static inline bool eeh_enabled(void) +static inline void eeh_add_flag(int flag) { - if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) || - !(eeh_subsystem_flags & EEH_ENABLED)) - return false; - - return true; + eeh_subsystem_flags |= flag; } -static inline void eeh_set_enable(bool mode) +static inline void eeh_clear_flag(int flag) { - if (mode) - eeh_subsystem_flags |= EEH_ENABLED; - else - eeh_subsystem_flags &= ~EEH_ENABLED; + eeh_subsystem_flags &= ~flag; } -static inline void eeh_probe_mode_set(int flag) +static inline bool eeh_has_flag(int flag) { - eeh_subsystem_flags |= flag; + return !!(eeh_subsystem_flags & flag); } -static inline int eeh_probe_mode_devtree(void) +static inline bool eeh_enabled(void) { - return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE); -} + if (eeh_has_flag(EEH_FORCE_DISABLED) || + !eeh_has_flag(EEH_ENABLED)) + return false; -static inline int eeh_probe_mode_dev(void) -{ - return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV); + return true; } static inline void eeh_serialize_lock(unsigned long *flags) @@ -243,6 +257,7 @@ static inline void eeh_serialize_unlock(unsigned long flags) #define EEH_MAX_ALLOWED_FREEZES 5 typedef void *(*eeh_traverse_func)(void *data, void *flag); +void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_get(struct eeh_dev *edev); @@ -262,8 +277,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); -unsigned long eeh_check_failure(const volatile void __iomem *token, - unsigned long val); +int eeh_check_failure(const volatile void __iomem *token); int eeh_dev_check_failure(struct eeh_dev *edev); void eeh_addr_cache_build(void); void eeh_add_device_early(struct device_node *); @@ -272,6 +286,15 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); +int eeh_pe_reset_and_recover(struct eeh_pe *pe); +int eeh_dev_open(struct pci_dev *pdev); +void eeh_dev_release(struct pci_dev *pdev); +struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); +int eeh_pe_set_option(struct eeh_pe *pe, int option); +int eeh_pe_get_state(struct eeh_pe *pe); +int eeh_pe_reset(struct eeh_pe *pe, int option); +int eeh_pe_configure(struct eeh_pe *pe); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -295,8 +318,6 @@ static inline bool eeh_enabled(void) return false; } -static inline void eeh_set_enable(bool mode) { } - static inline int eeh_init(void) { return 0; @@ -309,9 +330,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data) static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } -static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +static inline int eeh_check_failure(const volatile void __iomem *token) { - return val; + return 0; } #define eeh_dev_check_failure(x) (0) @@ -342,7 +363,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr) { u8 val = in_8(addr); if (EEH_POSSIBLE_ERROR(val, u8)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -350,7 +371,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr) { u16 val = in_le16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -358,7 +379,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr) { u32 val = in_le32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -366,7 +387,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr) { u64 val = in_le64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -374,7 +395,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr) { u16 val = in_be16(addr); if (EEH_POSSIBLE_ERROR(val, u16)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -382,7 +403,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr) { u32 val = in_be32(addr); if (EEH_POSSIBLE_ERROR(val, u32)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -390,7 +411,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr) { u64 val = in_be64(addr); if (EEH_POSSIBLE_ERROR(val, u64)) - return eeh_check_failure(addr, val); + eeh_check_failure(addr); return val; } @@ -404,7 +425,7 @@ static inline void eeh_memcpy_fromio(void *dest, const * were copied. Check all four bytes. */ if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32)) - eeh_check_failure(src, *((u32 *)(dest + n - 4))); + eeh_check_failure(src); } /* in-string eeh macros */ @@ -413,7 +434,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf, { _insb(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8)) - eeh_check_failure(addr, *(u8*)buf); + eeh_check_failure(addr); } static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, @@ -421,7 +442,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf, { _insw(addr, buf, ns); if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16)) - eeh_check_failure(addr, *(u16*)buf); + eeh_check_failure(addr); } static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, @@ -429,7 +450,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, { _insl(addr, buf, nl); if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32)) - eeh_check_failure(addr, *(u32*)buf); + eeh_check_failure(addr); } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 8f35cd7d59cc..77f52b26dad6 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -425,6 +425,8 @@ label##_relon_hv: \ #define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL +#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI +#define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ @@ -513,8 +515,11 @@ label##_relon_hv: \ * runlatch, etc... */ -/* Exception addition: Hard disable interrupts */ -#define DISABLE_INTS RECONCILE_IRQ_STATE(r10,r11) +/* + * This addition reconciles our actual IRQ state with the various software + * flags that track it. This may call C code. + */ +#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11) #define ADD_NVGPRS \ bl save_nvgprs @@ -532,6 +537,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) .globl label##_common; \ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + /* Volatile regs are potentially clobbered here */ \ additions; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ @@ -539,7 +545,7 @@ label##_common: \ #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ - ADD_NVGPRS;DISABLE_INTS) + ADD_NVGPRS;ADD_RECONCILE) /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur @@ -548,7 +554,7 @@ label##_common: \ */ #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ - FINISH_NAP;DISABLE_INTS;RUNLATCH_ON) + FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 418fb654370d..1bbb3013d6aa 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -11,6 +11,7 @@ typedef struct { unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; + unsigned int hmi_exceptions; #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 5dbbb29f5c3e..85bc8c0d257b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -279,6 +279,12 @@ #define H_GET_24X7_DATA 0xF07C #define H_GET_PERF_COUNTER_INFO 0xF080 +/* Values for 2nd argument to H_SET_MODE */ +#define H_SET_MODE_RESOURCE_SET_CIABR 1 +#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 +#define H_SET_MODE_RESOURCE_LE 4 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 10be1dd01c6b..b59ac27a6b7d 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -25,6 +25,7 @@ #define PACA_IRQ_EE 0x04 #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ +#define PACA_IRQ_HMI 0x20 #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index 5b0c98bd46ab..1cb39c96d155 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -95,7 +95,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_SPARE 19 extern int hydra_init(void); -extern void macio_adb_init(void); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 41f13cec8a8f..e8e3a0a04eb0 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -31,11 +31,6 @@ extern atomic_t ppc_n_lost_interrupts; extern irq_hw_number_t virq_to_hw(unsigned int virq); -/** - * irq_early_init - Init irq remapping subsystem - */ -extern void irq_early_init(void); - static __inline__ int irq_canonicalize(int irq) { return irq; diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index e20eb95429a8..f2149066fe5d 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -32,9 +32,8 @@ #endif /* - * Most of the CPU's IRQ-state tracing is done from assembly code; we - * have to call a C function so call a wrapper that saves all the - * C-clobbered registers. + * These are calls to C code, so the caller must be prepared for volatiles to + * be clobbered. */ #define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) #define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) @@ -42,6 +41,9 @@ /* * This is used by assembly code to soft-disable interrupts first and * reconcile irq state. + * + * NB: This may call C code, so the caller must be prepared for volatiles to + * be clobbered. */ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACASOFTIRQEN(r13); \ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index f016bb699b5f..efbf9a322a23 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -10,6 +10,7 @@ * 2 of the License, or (at your option) any later version. */ +#ifndef __ASSEMBLY__ #include <linux/types.h> #include <asm/feature-fixups.h> @@ -42,4 +43,12 @@ struct jump_entry { jump_label_t key; }; +#else +#define ARCH_STATIC_BRANCH(LABEL, KEY) \ +1098: nop; \ + .pushsection __jump_table, "aw"; \ + FTR_ENTRY_LONG 1098b, LABEL, KEY; \ + .popsection +#endif + #endif /* _ASM_POWERPC_JUMP_LABEL_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 16d7e33d35e9..19c36cba37c4 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -81,7 +81,6 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs); extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); -extern void machine_kexec_simple(struct kimage *image); extern void crash_kexec_secondary(struct pt_regs *regs); extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h deleted file mode 100644 index a0e57618ff33..000000000000 --- a/arch/powerpc/include/asm/kvm_44x.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#ifndef __ASM_44X_H__ -#define __ASM_44X_H__ - -#include <linux/kvm_host.h> - -#define PPC44x_TLB_SIZE 64 - -/* If the guest is expecting it, this can be as large as we like; we'd just - * need to find some way of advertising it. */ -#define KVM44x_GUEST_TLB_SIZE 64 - -struct kvmppc_44x_tlbe { - u32 tid; /* Only the low 8 bits are used. */ - u32 word0; - u32 word1; - u32 word2; -}; - -struct kvmppc_44x_shadow_ref { - struct page *page; - u16 gtlb_index; - u8 writeable; - u8 tid; -}; - -struct kvmppc_vcpu_44x { - /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - - /* References to guest pages in the hardware TLB. */ - struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; - - /* State of the shadow TLB at guest context switch time. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); -} - -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); - -#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 9601741080e5..465dfcb82c92 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -33,7 +33,6 @@ /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) -#define VCPU_TLB_PGSZ PPC44x_TLB_64K #define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG) #define BOOKE_INTERRUPT_CRITICAL 0 @@ -98,6 +97,7 @@ #define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 #define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 +#define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 @@ -131,6 +131,7 @@ #define BOOK3S_HFLAG_NATIVE_PS 0x8 #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 #define BOOK3S_HFLAG_NEW_TLBIE 0x20 +#define BOOK3S_HFLAG_SPLIT_HACK 0x40 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f52f65694527..6acf0c2a0f99 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 purr_offset; - u64 spurr_offset; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; @@ -148,9 +146,10 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache * extern int kvmppc_mmu_hpte_sysinit(void); extern void kvmppc_mmu_hpte_sysexit(void); extern int kvmppc_mmu_hv_init(void); +extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); +/* XXX remove this export when load_last_inst() is generic */ extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); @@ -159,13 +158,13 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); -extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); @@ -183,12 +182,16 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); +extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); +extern int kvmppc_hcall_impl_pr(unsigned long cmd); +extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, @@ -274,32 +277,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE); } -static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) -{ - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - - return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) : - vcpu->arch.last_inst; -} - -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu)); -} - -/* - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. - * Because the sc instruction sets SRR0 to point to the following - * instruction, we have to fetch from pc - 4. - */ -static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) -{ - return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4); -} - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dar; @@ -310,6 +287,13 @@ static inline bool is_kvmppc_resume_guest(int r) return (r == RESUME_GUEST || r == RESUME_GUEST_NV); } +static inline bool is_kvmppc_hv_enabled(struct kvm *kvm); +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Only PR KVM supports the magic page */ + return !is_kvmppc_hv_enabled(vcpu->kvm); +} + /* Magic register values loaded into r3 and r4 before the 'sc' assembly * instruction for the OSI hypercalls */ #define OSI_SC_MAGIC_R3 0x113724FA @@ -322,4 +306,7 @@ static inline bool is_kvmppc_resume_guest(int r) /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) +#define SPLIT_HACK_MASK 0xff000000 +#define SPLIT_HACK_OFFS 0xfb000000 + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b48ce9..0aa817933e6a 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages; /* These bits are reserved in the guest view of the HPTE */ #define HPTE_GR_RESERVED HPTE_GR_MODIFIED -static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) +static inline long try_lock_hpte(__be64 *hpte, unsigned long bits) { unsigned long tmp, old; + __be64 be_lockbit, be_bits; + + /* + * We load/store in native endian, but the HTAB is in big endian. If + * we byte swap all data we apply on the PTE we're implicitly correct + * again. + */ + be_lockbit = cpu_to_be64(HPTE_V_HVLOCK); + be_bits = cpu_to_be64(bits); asm volatile(" ldarx %0,0,%2\n" " and. %1,%0,%3\n" " bne 2f\n" - " ori %0,%0,%4\n" + " or %0,%0,%4\n" " stdcx. %0,0,%2\n" " beq+ 2f\n" " mr %1,%3\n" "2: isync" : "=&r" (tmp), "=&r" (old) - : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) + : "r" (hpte), "r" (be_bits), "r" (be_lockbit) : "cc", "memory"); return old == 0; } @@ -110,16 +119,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize) static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize, a_psize; + int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); - if (!(v & HPTE_V_LARGE)) { - /* both base and actual psize is 4k */ - b_psize = MMU_PAGE_4K; - a_psize = MMU_PAGE_4K; - } else { + if (v & HPTE_V_LARGE) { for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { /* valid entries have a shift value */ @@ -142,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, */ /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ + + rb |= v >> (62 - 8); /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -172,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, { int aval_shift; /* - * remaining 7bits of AVA/LP fields + * remaining bits of AVA/LP fields * Also contain the rr bits of LP */ - rb |= (va_low & 0x7f) << 16; + rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; /* * Now clear not needed LP bits based on actual psize */ @@ -198,8 +205,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) { + int size, a_psize; /* Look at the 8 bit LP value */ unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); @@ -214,14 +223,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) continue; a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) + if (a_psize != -1) { + if (is_base_size) + return 1ul << mmu_psize_defs[size].shift; return 1ul << mmu_psize_defs[a_psize].shift; + } } } return 0; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index c7aed6105ff9..f7aa5cc395c4 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -69,11 +69,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) return false; } -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.last_inst; -} - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.ctr = val; @@ -108,4 +103,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } + +static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) +{ + /* Magic page is only supported on e500v2 */ +#ifdef CONFIG_KVM_E500V2 + return true; +#else + return false; +#endif +} #endif /* __ASM_KVM_BOOKE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bb66d8b8efdf..98d9dd50d063 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/cacheflush.h> +#include <asm/hvcall.h> #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -48,7 +49,6 @@ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 256 -#if !defined(CONFIG_KVM_440) #include <linux/mmu_notifier.h> #define KVM_ARCH_WANT_MMU_NOTIFIER @@ -61,8 +61,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -#endif - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 @@ -96,7 +94,6 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 sum_exits; u32 mmio_exits; - u32 dcr_exits; u32 signal_exits; u32 light_exits; /* Account for special types of light exits: */ @@ -113,22 +110,21 @@ struct kvm_vcpu_stat { u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; + u32 ld; + u32 st; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; u32 sp_storage; u32 sp_instruc; u32 queue_intr; - u32 ld; u32 ld_slow; - u32 st; u32 st_slow; #endif }; enum kvm_exit_types { MMIO_EXITS, - DCR_EXITS, SIGNAL_EXITS, ITLB_REAL_MISS_EXITS, ITLB_VIRT_MISS_EXITS, @@ -254,7 +250,6 @@ struct kvm_arch { atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; - struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -263,6 +258,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; @@ -271,6 +267,10 @@ struct kvm_arch { struct kvmppc_xics *xics; #endif struct kvmppc_ops *kvm_ops; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* This array can grow quite large, keep it at the end */ + struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; +#endif }; /* @@ -305,6 +305,8 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ + void *mpp_buffer; /* Micro Partition Prefetch buffer */ + bool mpp_buffer_is_valid; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -503,8 +505,10 @@ struct kvm_vcpu_arch { #ifdef CONFIG_BOOKE u32 decar; #endif - u32 tbl; - u32 tbu; + /* Time base value when we entered the guest */ + u64 entry_tb; + u64 entry_vtb; + u64 entry_ic; u32 tcr; ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ u32 ivor[64]; @@ -580,6 +584,8 @@ struct kvm_vcpu_arch { u32 mmucfg; u32 eptcfg; u32 epr; + u64 sprg9; + u32 pwrmgtcr0; u32 crit_save; /* guest debug registers*/ struct debug_reg dbg_reg; @@ -593,8 +599,6 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; u8 mmio_sign_extend; - u8 dcr_needed; - u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9c89cdd067a6..fb86a2299d8a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -41,12 +41,26 @@ enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ - EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; +enum instruction_type { + INST_GENERIC, + INST_SC, /* system call */ +}; + +enum xlate_instdata { + XLATE_INST, /* translate instruction address */ + XLATE_DATA /* translate data address */ +}; + +enum xlate_readwrite { + XLATE_READ, /* check for read permissions */ + XLATE_WRITE /* check for write permissions */ +}; + extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); @@ -62,8 +76,16 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); +extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst); + +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); @@ -86,6 +108,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, gva_t eaddr); extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu); +extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, + enum xlate_instdata xlid, enum xlate_readwrite xlrw, + struct kvmppc_pte *pte); extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id); @@ -106,6 +131,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, + ulong esr_flags); +extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, + ulong esr_flags); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); @@ -228,12 +261,35 @@ struct kvmppc_ops { void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); - + int (*hcall_implemented)(unsigned long hcall); }; extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_pr_ops; +static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu, + enum instruction_type type, u32 *inst) +{ + int ret = EMULATE_DONE; + u32 fetched_inst; + + /* Load the instruction manually if it failed to do so in the + * exit path */ + if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) + ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst); + + /* Write fetch_failed unswapped if the fetch failed */ + if (ret == EMULATE_DONE) + fetched_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.last_inst) : + vcpu->arch.last_inst; + else + fetched_inst = vcpu->arch.last_inst; + + *inst = fetched_inst; + return ret; +} + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) { return kvm->arch.kvm_ops == kvmppc_hv_ops; @@ -392,6 +448,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#elif defined(CONFIG_BOOKE) + return vcpu->arch.epr; +#else + return 0; +#endif +} + static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) { #ifdef CONFIG_KVM_BOOKE_HV @@ -472,8 +539,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu) #endif } +#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \ +static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +{ \ + return mfspr(bookehv_spr); \ +} \ + +#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \ +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \ +{ \ + mtspr(bookehv_spr, val); \ +} \ + #define SHARED_WRAPPER_GET(reg, size) \ -static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ { \ if (kvmppc_shared_big_endian(vcpu)) \ return be##size##_to_cpu(vcpu->arch.shared->reg); \ @@ -494,14 +573,31 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \ SHARED_WRAPPER_GET(reg, size) \ SHARED_WRAPPER_SET(reg, size) \ +#define SPRNG_WRAPPER(reg, bookehv_spr) \ + SPRNG_WRAPPER_GET(reg, bookehv_spr) \ + SPRNG_WRAPPER_SET(reg, bookehv_spr) \ + +#ifdef CONFIG_KVM_BOOKE_HV + +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SPRNG_WRAPPER(reg, bookehv_spr) \ + +#else + +#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \ + SHARED_WRAPPER(reg, size) \ + +#endif + SHARED_WRAPPER(critical, 64) -SHARED_WRAPPER(sprg0, 64) -SHARED_WRAPPER(sprg1, 64) -SHARED_WRAPPER(sprg2, 64) -SHARED_WRAPPER(sprg3, 64) -SHARED_WRAPPER(srr0, 64) -SHARED_WRAPPER(srr1, 64) -SHARED_WRAPPER(dar, 64) +SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0) +SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1) +SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2) +SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3) +SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0) +SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1) +SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR) +SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR) SHARED_WRAPPER_GET(msr, 64) static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val) { diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index f92b0b54e921..902ab203aeb2 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -57,10 +57,10 @@ struct machdep_calls { void (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(struct mm_struct *mm, + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize); - + int psize, int ssize); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); @@ -174,6 +174,10 @@ struct machdep_calls { /* Exception handlers */ int (*system_reset_exception)(struct pt_regs *regs); int (*machine_check_exception)(struct pt_regs *regs); + int (*handle_hmi_exception)(struct pt_regs *regs); + + /* Early exception handlers called in realmode */ + int (*hmi_exception_early)(struct pt_regs *regs); /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); @@ -326,8 +330,6 @@ extern struct machdep_calls *machine_id; extern void probe_machine(void); -extern char cmd_line[COMMAND_LINE_SIZE]; - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling @@ -366,6 +368,7 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal) } \ __define_initcall(__machine_initcall_##mach##_##fn, id); +#define machine_early_initcall(mach, fn) __define_machine_initcall(mach, fn, early) #define machine_core_initcall(mach, fn) __define_machine_initcall(mach, fn, 1) #define machine_core_initcall_sync(mach, fn) __define_machine_initcall(mach, fn, 1s) #define machine_postcore_initcall(mach, fn) __define_machine_initcall(mach, fn, 2) diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index d0918e09557f..cd4f04a74802 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -40,7 +40,11 @@ /* MAS registers bit definitions */ -#define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) +#define MAS0_TLBSEL_MASK 0x30000000 +#define MAS0_TLBSEL_SHIFT 28 +#define MAS0_TLBSEL(x) (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK) +#define MAS0_GET_TLBSEL(mas0) (((mas0) & MAS0_TLBSEL_MASK) >> \ + MAS0_TLBSEL_SHIFT) #define MAS0_ESEL_MASK 0x0FFF0000 #define MAS0_ESEL_SHIFT 16 #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) @@ -58,6 +62,7 @@ #define MAS1_TSIZE_MASK 0x00000f80 #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) +#define MAS1_GET_TSIZE(mas1) (((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT) #define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040 @@ -86,6 +91,7 @@ #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 +#define MAS4_TLBSEL_MASK MAS0_TLBSEL_MASK #define MAS4_TLBSELD(x) MAS0_TLBSEL(x) #define MAS4_INDD 0x00008000 /* Default IND */ #define MAS4_TSIZED(x) MAS1_TSIZE(x) diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 807014dde821..92bc3a637923 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -22,26 +22,7 @@ */ #include <asm/pgtable-ppc64.h> #include <asm/bug.h> - -/* - * Segment table - */ - -#define STE_ESID_V 0x80 -#define STE_ESID_KS 0x20 -#define STE_ESID_KP 0x10 -#define STE_ESID_N 0x08 - -#define STE_VSID_SHIFT 12 - -/* Location of cpu0's segment table */ -#define STAB0_PAGE 0x8 -#define STAB0_OFFSET (STAB0_PAGE << 12) -#define STAB0_PHYS_ADDR (STAB0_OFFSET + PHYSICAL_START) - -#ifndef __ASSEMBLY__ -extern char initial_stab[]; -#endif /* ! __ASSEMBLY */ +#include <asm/processor.h> /* * SLB @@ -361,6 +342,8 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); +int htab_remove_mapping(unsigned long vstart, unsigned long vend, + int psize, int ssize); extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); @@ -369,10 +352,8 @@ extern void hpte_init_lpar(void); extern void hpte_init_beat(void); extern void hpte_init_beat_v3(void); -extern void stabs_alloc(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); -extern void stab_initialize(unsigned long stab); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); @@ -496,7 +477,7 @@ extern void slb_set_size(u16 size); */ struct subpage_prot_table { unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[2]; + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; unsigned int *low_prot[4]; }; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index f8d1d6dcf7db..3d5abfe6ba67 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -19,8 +19,7 @@ #define MMU_FTR_TYPE_40x ASM_CONST(0x00000004) #define MMU_FTR_TYPE_44x ASM_CONST(0x00000008) #define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010) -#define MMU_FTR_TYPE_3E ASM_CONST(0x00000020) -#define MMU_FTR_TYPE_47x ASM_CONST(0x00000040) +#define MMU_FTR_TYPE_47x ASM_CONST(0x00000020) /* * This is individual features @@ -65,9 +64,9 @@ */ #define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000) -/* MMU is SLB-based +/* Doesn't support the B bit (1T segment) in SLBIE */ -#define MMU_FTR_SLB ASM_CONST(0x02000000) +#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000) /* Support 16M large pages */ @@ -89,10 +88,6 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) -/* Doesn't support the B bit (1T segment) in SLBIE - */ -#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x80000000) - /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -106,13 +101,6 @@ MMU_FTR_CI_LARGE_PAGE #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \ MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B -#define MMU_FTRS_A2 MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \ - MMU_FTR_USE_TLBIVAX_BCAST | \ - MMU_FTR_LOCK_BCAST_INVAL | \ - MMU_FTR_USE_TLBRSRV | \ - MMU_FTR_USE_PAIRED_MAS | \ - MMU_FTR_TLBIEL | \ - MMU_FTR_16M_PAGE #ifndef __ASSEMBLY__ #include <asm/cputable.h> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b467530e2485..73382eba02dc 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -18,7 +18,6 @@ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); -extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); extern void set_context(unsigned long id, pgd_t *pgd); @@ -77,10 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * sub architectures. */ #ifdef CONFIG_PPC_STD_MMU_64 - if (mmu_has_feature(MMU_FTR_SLB)) - switch_slb(tsk, next); - else - switch_stab(tsk, next); + switch_slb(tsk, next); #else /* Out of line for now */ switch_mmu_context(prev, next); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 460018889ba9..a084cf5bd76c 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -12,27 +12,7 @@ #ifndef __OPAL_H #define __OPAL_H -/****** Takeover interface ********/ - -/* PAPR H-Call used to querty the HAL existence and/or instanciate - * it from within pHyp (tech preview only). - * - * This is exclusively used in prom_init.c - */ - #ifndef __ASSEMBLY__ - -struct opal_takeover_args { - u64 k_image; /* r4 */ - u64 k_size; /* r5 */ - u64 k_entry; /* r6 */ - u64 k_entry2; /* r7 */ - u64 hal_addr; /* r8 */ - u64 rd_image; /* r9 */ - u64 rd_size; /* r10 */ - u64 rd_loc; /* r11 */ -}; - /* * SG entry * @@ -55,15 +35,6 @@ struct opal_sg_list { /* We calculate number of sg entries based on PAGE_SIZE */ #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) -extern long opal_query_takeover(u64 *hal_size, u64 *hal_align); - -extern long opal_do_takeover(struct opal_takeover_args *args); - -struct rtas_args; -extern int opal_enter_rtas(struct rtas_args *args, - unsigned long data, - unsigned long entry); - #endif /* __ASSEMBLY__ */ /****** OPAL APIs ******/ @@ -164,6 +135,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 #define OPAL_RESYNC_TIMEBASE 79 +#define OPAL_CHECK_TOKEN 80 #define OPAL_DUMP_INIT 81 #define OPAL_DUMP_INFO 82 #define OPAL_DUMP_READ 83 @@ -176,6 +148,11 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 #define OPAL_DUMP_INFO2 94 +#define OPAL_PCI_ERR_INJECT 96 +#define OPAL_PCI_EEH_FREEZE_SET 97 +#define OPAL_HANDLE_HMI 98 +#define OPAL_REGISTER_DUMP_REGION 101 +#define OPAL_UNREGISTER_DUMP_REGION 102 #ifndef __ASSEMBLY__ @@ -199,7 +176,11 @@ enum OpalFreezeState { enum OpalEehFreezeActionToken { OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3, + + OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1, + OPAL_EEH_ACTION_SET_FREEZE_DMA = 2, + OPAL_EEH_ACTION_SET_FREEZE_ALL = 3 }; enum OpalPciStatusToken { @@ -220,6 +201,35 @@ enum OpalPciErrorSeverity { OPAL_EEH_SEV_INF = 5 }; +enum OpalErrinjectType { + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR = 0, + OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64 = 1, +}; + +enum OpalErrinjectFunc { + /* IOA bus specific errors */ + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR = 0, + OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA = 1, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR = 2, + OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA = 3, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR = 4, + OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA = 5, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR = 6, + OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA = 7, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR = 8, + OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA = 9, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR = 10, + OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA = 11, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR = 12, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA = 13, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER = 14, + OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET = 15, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR = 16, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA = 17, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER = 18, + OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET = 19, +}; + enum OpalShpcAction { OPAL_SHPC_GET_LINK_STATE = 0, OPAL_SHPC_GET_SLOT_STATE = 1 @@ -269,6 +279,7 @@ enum OpalMessageType { OPAL_MSG_MEM_ERR, OPAL_MSG_EPOW, OPAL_MSG_SHUTDOWN, + OPAL_MSG_HMI_EVT, OPAL_MSG_TYPE_MAX, }; @@ -369,10 +380,19 @@ enum OpalMveEnableAction { OPAL_ENABLE_MVE = 1 }; +enum OpalM64EnableAction { + OPAL_DISABLE_M64 = 0, + OPAL_ENABLE_M64_SPLIT = 1, + OPAL_ENABLE_M64_NON_SPLIT = 2 +}; + enum OpalPciResetScope { - OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, - OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, - OPAL_PCI_IODA_TABLE_RESET = 6, + OPAL_RESET_PHB_COMPLETE = 1, + OPAL_RESET_PCI_LINK = 2, + OPAL_RESET_PHB_ERROR = 3, + OPAL_RESET_PCI_HOT = 4, + OPAL_RESET_PCI_FUNDAMENTAL = 5, + OPAL_RESET_PCI_IODA_TABLE = 6 }; enum OpalPciReinitScope { @@ -531,6 +551,50 @@ struct OpalMemoryErrorData { } u; }; +/* HMI interrupt event */ +enum OpalHMI_Version { + OpalHMIEvt_V1 = 1, +}; + +enum OpalHMI_Severity { + OpalHMI_SEV_NO_ERROR = 0, + OpalHMI_SEV_WARNING = 1, + OpalHMI_SEV_ERROR_SYNC = 2, + OpalHMI_SEV_FATAL = 3, +}; + +enum OpalHMI_Disposition { + OpalHMI_DISPOSITION_RECOVERED = 0, + OpalHMI_DISPOSITION_NOT_RECOVERED = 1, +}; + +enum OpalHMI_ErrType { + OpalHMI_ERROR_MALFUNC_ALERT = 0, + OpalHMI_ERROR_PROC_RECOV_DONE, + OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN, + OpalHMI_ERROR_PROC_RECOV_MASKED, + OpalHMI_ERROR_TFAC, + OpalHMI_ERROR_TFMR_PARITY, + OpalHMI_ERROR_HA_OVERFLOW_WARN, + OpalHMI_ERROR_XSCOM_FAIL, + OpalHMI_ERROR_XSCOM_DONE, + OpalHMI_ERROR_SCOM_FIR, + OpalHMI_ERROR_DEBUG_TRIG_FIR, + OpalHMI_ERROR_HYP_RESOURCE, +}; + +struct OpalHMIEvent { + uint8_t version; /* 0x00 */ + uint8_t severity; /* 0x01 */ + uint8_t type; /* 0x02 */ + uint8_t disposition; /* 0x03 */ + uint8_t reserved_1[4]; /* 0x04 */ + + __be64 hmer; + /* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */ + __be64 tfmr; +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, @@ -542,40 +606,40 @@ enum { }; struct OpalIoP7IOCErrorData { - uint16_t type; + __be16 type; /* GEM */ - uint64_t gemXfir; - uint64_t gemRfir; - uint64_t gemRirqfir; - uint64_t gemMask; - uint64_t gemRwof; + __be64 gemXfir; + __be64 gemRfir; + __be64 gemRirqfir; + __be64 gemMask; + __be64 gemRwof; /* LEM */ - uint64_t lemFir; - uint64_t lemErrMask; - uint64_t lemAction0; - uint64_t lemAction1; - uint64_t lemWof; + __be64 lemFir; + __be64 lemErrMask; + __be64 lemAction0; + __be64 lemAction1; + __be64 lemWof; union { struct OpalIoP7IOCRgcErrorData { - uint64_t rgcStatus; /* 3E1C10 */ - uint64_t rgcLdcp; /* 3E1C18 */ + __be64 rgcStatus; /* 3E1C10 */ + __be64 rgcLdcp; /* 3E1C18 */ }rgc; struct OpalIoP7IOCBiErrorData { - uint64_t biLdcp0; /* 3C0100, 3C0118 */ - uint64_t biLdcp1; /* 3C0108, 3C0120 */ - uint64_t biLdcp2; /* 3C0110, 3C0128 */ - uint64_t biFenceStatus; /* 3C0130, 3C0130 */ + __be64 biLdcp0; /* 3C0100, 3C0118 */ + __be64 biLdcp1; /* 3C0108, 3C0120 */ + __be64 biLdcp2; /* 3C0110, 3C0128 */ + __be64 biFenceStatus; /* 3C0130, 3C0130 */ - uint8_t biDownbound; /* BI Downbound or Upbound */ + u8 biDownbound; /* BI Downbound or Upbound */ }bi; struct OpalIoP7IOCCiErrorData { - uint64_t ciPortStatus; /* 3Dn008 */ - uint64_t ciPortLdcp; /* 3Dn010 */ + __be64 ciPortStatus; /* 3Dn008 */ + __be64 ciPortLdcp; /* 3Dn010 */ - uint8_t ciPort; /* Index of CI port: 0/1 */ + u8 ciPort; /* Index of CI port: 0/1 */ }ci; }; }; @@ -607,60 +671,60 @@ struct OpalIoPhbErrorCommon { struct OpalIoP7IOCPhbErrorData { struct OpalIoPhbErrorCommon common; - uint32_t brdgCtl; + __be32 brdgCtl; // P7IOC utl regs - uint32_t portStatusReg; - uint32_t rootCmplxStatus; - uint32_t busAgentStatus; + __be32 portStatusReg; + __be32 rootCmplxStatus; + __be32 busAgentStatus; // P7IOC cfg regs - uint32_t deviceStatus; - uint32_t slotStatus; - uint32_t linkStatus; - uint32_t devCmdStatus; - uint32_t devSecStatus; + __be32 deviceStatus; + __be32 slotStatus; + __be32 linkStatus; + __be32 devCmdStatus; + __be32 devSecStatus; // cfg AER regs - uint32_t rootErrorStatus; - uint32_t uncorrErrorStatus; - uint32_t corrErrorStatus; - uint32_t tlpHdr1; - uint32_t tlpHdr2; - uint32_t tlpHdr3; - uint32_t tlpHdr4; - uint32_t sourceId; + __be32 rootErrorStatus; + __be32 uncorrErrorStatus; + __be32 corrErrorStatus; + __be32 tlpHdr1; + __be32 tlpHdr2; + __be32 tlpHdr3; + __be32 tlpHdr4; + __be32 sourceId; - uint32_t rsv3; + __be32 rsv3; // Record data about the call to allocate a buffer. - uint64_t errorClass; - uint64_t correlator; + __be64 errorClass; + __be64 correlator; //P7IOC MMIO Error Regs - uint64_t p7iocPlssr; // n120 - uint64_t p7iocCsr; // n110 - uint64_t lemFir; // nC00 - uint64_t lemErrorMask; // nC18 - uint64_t lemWOF; // nC40 - uint64_t phbErrorStatus; // nC80 - uint64_t phbFirstErrorStatus; // nC88 - uint64_t phbErrorLog0; // nCC0 - uint64_t phbErrorLog1; // nCC8 - uint64_t mmioErrorStatus; // nD00 - uint64_t mmioFirstErrorStatus; // nD08 - uint64_t mmioErrorLog0; // nD40 - uint64_t mmioErrorLog1; // nD48 - uint64_t dma0ErrorStatus; // nD80 - uint64_t dma0FirstErrorStatus; // nD88 - uint64_t dma0ErrorLog0; // nDC0 - uint64_t dma0ErrorLog1; // nDC8 - uint64_t dma1ErrorStatus; // nE00 - uint64_t dma1FirstErrorStatus; // nE08 - uint64_t dma1ErrorLog0; // nE40 - uint64_t dma1ErrorLog1; // nE48 - uint64_t pestA[OPAL_P7IOC_NUM_PEST_REGS]; - uint64_t pestB[OPAL_P7IOC_NUM_PEST_REGS]; + __be64 p7iocPlssr; // n120 + __be64 p7iocCsr; // n110 + __be64 lemFir; // nC00 + __be64 lemErrorMask; // nC18 + __be64 lemWOF; // nC40 + __be64 phbErrorStatus; // nC80 + __be64 phbFirstErrorStatus; // nC88 + __be64 phbErrorLog0; // nCC0 + __be64 phbErrorLog1; // nCC8 + __be64 mmioErrorStatus; // nD00 + __be64 mmioFirstErrorStatus; // nD08 + __be64 mmioErrorLog0; // nD40 + __be64 mmioErrorLog1; // nD48 + __be64 dma0ErrorStatus; // nD80 + __be64 dma0FirstErrorStatus; // nD88 + __be64 dma0ErrorLog0; // nDC0 + __be64 dma0ErrorLog1; // nDC8 + __be64 dma1ErrorStatus; // nE00 + __be64 dma1FirstErrorStatus; // nE08 + __be64 dma1ErrorLog0; // nE40 + __be64 dma1ErrorLog1; // nE48 + __be64 pestA[OPAL_P7IOC_NUM_PEST_REGS]; + __be64 pestB[OPAL_P7IOC_NUM_PEST_REGS]; }; struct OpalIoPhb3ErrorData { @@ -787,6 +851,10 @@ int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number, __be64 *phb_status); int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, uint64_t eeh_action_token); +int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token); +int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type, + uint32_t func, uint64_t addr, uint64_t mask); int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state); @@ -797,7 +865,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type, uint16_t window_num, uint64_t starting_real_address, uint64_t starting_pci_address, - uint16_t segment_size); + uint64_t size); int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number, uint16_t window_type, uint16_t window_num, uint16_t segment_num); @@ -855,6 +923,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); +int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); @@ -889,6 +958,9 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, uint64_t length); int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); +int64_t opal_handle_hmi(void); +int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); +int64_t opal_unregister_dump_region(uint32_t id); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -931,6 +1003,8 @@ extern void opal_msglog_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); +extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); extern int opal_resync_timebase(void); @@ -941,6 +1015,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, unsigned long vmalloc_size); void opal_free_sg_list(struct opal_sg_list *sg); +/* + * Dump region ID range usable by the OS + */ +#define OPAL_DUMP_REGION_HOST_START 0x80 +#define OPAL_DUMP_REGION_LOG_BUF 0x80 +#define OPAL_DUMP_REGION_HOST_END 0xFF + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h index d697b08994c9..61fe5d6f18e1 100644 --- a/arch/powerpc/include/asm/oprofile_impl.h +++ b/arch/powerpc/include/asm/oprofile_impl.h @@ -61,7 +61,6 @@ struct op_powerpc_model { }; extern struct op_powerpc_model op_model_fsl_emb; -extern struct op_powerpc_model op_model_rs64; extern struct op_powerpc_model op_model_power4; extern struct op_powerpc_model op_model_7450; extern struct op_powerpc_model op_model_cell; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index bb0bd25f20d0..a5139ea6910b 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -78,10 +78,6 @@ struct paca_struct { u64 kernel_toc; /* Kernel TOC address */ u64 kernelbase; /* Base address of kernel */ u64 kernel_msr; /* MSR while running in kernel */ -#ifdef CONFIG_PPC_STD_MMU_64 - u64 stab_real; /* Absolute address of segment table */ - u64 stab_addr; /* Virtual address of segment table */ -#endif /* CONFIG_PPC_STD_MMU_64 */ void *emergency_sp; /* pointer to emergency stack */ u64 data_offset; /* per cpu data offset */ s16 hw_cpu_id; /* Physical processor number */ @@ -171,6 +167,7 @@ struct paca_struct { * and already using emergency stack. */ u16 in_mce; + u8 hmi_event_available; /* HMI event is available */ #endif /* Stuff for accurate time accounting */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 32e4e212b9c1..26fe1ae15212 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT; #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) #endif -/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ -#define __HAVE_ARCH_GATE_AREA 1 - /* * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we * assign PAGE_MASK to a larger type it gets extended the way we want diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 88693cef4f3d..d908a46d05c0 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -42,20 +42,40 @@ typedef unsigned long pte_basic_t; -static __inline__ void clear_page(void *addr) +static inline void clear_page(void *addr) { - unsigned long lines, line_size; - - line_size = ppc64_caches.dline_size; - lines = ppc64_caches.dlines_per_page; - - __asm__ __volatile__( + unsigned long iterations; + unsigned long onex, twox, fourx, eightx; + + iterations = ppc64_caches.dlines_per_page / 8; + + /* + * Some verisions of gcc use multiply instructions to + * calculate the offsets so lets give it a hand to + * do better. + */ + onex = ppc64_caches.dline_size; + twox = onex << 1; + fourx = onex << 2; + eightx = onex << 3; + + asm volatile( "mtctr %1 # clear_page\n\ -1: dcbz 0,%0\n\ - add %0,%0,%3\n\ + .balign 16\n\ +1: dcbz 0,%0\n\ + dcbz %3,%0\n\ + dcbz %4,%0\n\ + dcbz %5,%0\n\ + dcbz %6,%0\n\ + dcbz %7,%0\n\ + dcbz %8,%0\n\ + dcbz %9,%0\n\ + add %0,%0,%10\n\ bdnz+ 1b" - : "=r" (addr) - : "r" (lines), "0" (addr), "r" (line_size) + : "=&r" (addr) + : "r" (iterations), "0" (addr), "b" (onex), "b" (twox), + "b" (twox+onex), "b" (fourx), "b" (fourx+onex), + "b" (twox+fourx), "b" (eightx-onex), "r" (eightx) : "ctr", "memory"); } @@ -104,7 +124,6 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr, extern unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); -extern void slice_init_context(struct mm_struct *mm, unsigned int psize); extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 9ed737146dbb..814622146d5a 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -19,6 +19,8 @@ #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 +struct perf_event; + /* * This struct provides the constants and functions needed to * describe the PMU on a particular POWER-family CPU. @@ -30,7 +32,8 @@ struct power_pmu { unsigned long add_fields; unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]); + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]); int (*get_constraint)(u64 event_id, unsigned long *mskp, unsigned long *valp); int (*get_alternatives)(u64 event_id, unsigned int flags, @@ -61,8 +64,7 @@ struct power_pmu { #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */ #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ -#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ -#define PPMU_EBB 0x00000100 /* supports event based branch */ +#define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8c3556..945e47adf7db 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -8,8 +8,6 @@ #include <linux/threads.h> #include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */ -extern unsigned long va_to_phys(unsigned long address); -extern pte_t *va_to_pte(unsigned long address); extern unsigned long ioremap_bot; #ifdef CONFIG_44x @@ -50,10 +48,10 @@ extern int icache_44x_need_flush; #define FIRST_USER_ADDRESS 0 #define pte_ERROR(e) \ - printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ + pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ (unsigned long long)pte_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 12798c9d4b4b..7b935683f268 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -64,7 +64,7 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index eb9261024f51..ae153c40ab7c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -328,11 +328,11 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pte_ERROR(e) \ - printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) + pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) + pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) @@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) } extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); + pmd_t *pmdp, unsigned long old_pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index e316dad6ba76..6f8536208049 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -139,6 +139,7 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 +#define PPC_INST_LOGMPP 0x7c0007e4 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -277,6 +278,20 @@ #define __PPC_EH(eh) 0 #endif +/* POWER8 Micro Partition Prefetch (MPP) parameters */ +/* Address mask is common for LOGMPP instruction and MPPR SPR */ +#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 + +/* Bits 60 and 61 of MPP SPR should be set to one of the following */ +/* Aborting the fetch is indeed setting 00 in the table size bits */ +#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60) +#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60) + +/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */ +#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54) +#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54) +#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) @@ -285,6 +300,8 @@ #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \ + __PPC_RB(b)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 9ea266eae33e..7e4612528546 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -277,6 +277,8 @@ n: .globl n; \ n: +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(n) \ .section ".kprobes.text","a"; \ .globl n; \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 6d59072e13a7..dda7ac4c80bd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -400,6 +400,8 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif +#define cpu_relax_lowlatency() cpu_relax() + /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 74b79f07f041..7f436ba1b56f 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -76,8 +76,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window, unsigned long *busno, unsigned long *phys, unsigned long *size); -extern void kdump_move_device_tree(void); - extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index d836d945068d..4f4ec2ab45c9 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -46,11 +46,31 @@ * in order to deal with 64K made of 4K HW pages. Thus we override the * generic accessors and iterators here */ -#define __real_pte(e,p) ((real_pte_t) { \ - (e), (pte_val(e) & _PAGE_COMBO) ? \ - (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) -#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ - (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> 12) & 0xf; +} + #define __rpte_to_pte(r) ((r).pte) #define __rpte_sub_valid(rpte, index) \ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) @@ -75,7 +95,8 @@ (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) #define remap_4k_pfn(vma, addr, pfn, prot) \ - remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ - __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)) + (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \ + remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ + __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 279b80f3bb29..c0c61fa9cd9e 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -47,6 +47,12 @@ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STACK_FRAME_MIN_SIZE 32 +#else +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD +#endif + /* Size of dummy stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 128 #define __SIGNAL_FRAMESIZE32 64 @@ -60,6 +66,7 @@ #define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD) #define STACK_FRAME_MARKER 2 +#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD /* Size of stack frame allocated when calling signal handler. */ #define __SIGNAL_FRAMESIZE 64 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1ff8ba94b9f4..fe3f9488f321 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -213,9 +213,8 @@ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ -#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ -#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ +#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 @@ -225,6 +224,7 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */ #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 @@ -254,7 +254,7 @@ #define DSISR_PROTFAULT 0x08000000 /* protection fault */ #define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ -#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */ +#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ #define DSISR_KEYFAULT 0x00200000 /* Key fault */ #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ @@ -944,9 +944,6 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors @@ -1204,6 +1201,15 @@ : "r" ((unsigned long)(v)) \ : "memory") +static inline unsigned long mfvtb (void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfspr(SPRN_VTB); +#endif + return 0; +} + #ifdef __powerpc64__ #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E) #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h index b1d2deceeedb..ec800f28fec5 100644 --- a/arch/powerpc/include/asm/rio.h +++ b/arch/powerpc/include/asm/rio.h @@ -13,7 +13,6 @@ #ifndef ASM_PPC_RIO_H #define ASM_PPC_RIO_H -extern void platform_rio_init(void); #ifdef CONFIG_FSL_RIO extern int fsl_rio_mcheck_exception(struct pt_regs *); #else diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h deleted file mode 100644 index de1f620bd5c9..000000000000 --- a/arch/powerpc/include/asm/scatterlist.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_POWERPC_SCATTERLIST_H -#define _ASM_POWERPC_SCATTERLIST_H -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/dma.h> -#include <asm-generic/scatterlist.h> - -#define ARCH_HAS_SG_CHAIN - -#endif /* _ASM_POWERPC_SCATTERLIST_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 35aa339410bd..4dbe072eecbe 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); return !arch_spin_value_unlocked(*lock); } diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index f593b0f9b627..d3a42cc45a82 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -25,3 +25,65 @@ struct pt_regs; /* Emulate instructions that cause a transfer of control. */ extern int emulate_step(struct pt_regs *regs, unsigned int instr); + +enum instruction_type { + COMPUTE, /* arith/logical/CR op, etc. */ + LOAD, + LOAD_MULTI, + LOAD_FP, + LOAD_VMX, + LOAD_VSX, + STORE, + STORE_MULTI, + STORE_FP, + STORE_VMX, + STORE_VSX, + LARX, + STCX, + BRANCH, + MFSPR, + MTSPR, + CACHEOP, + BARRIER, + SYSCALL, + MFMSR, + MTMSR, + RFI, + INTERRUPT, + UNKNOWN +}; + +#define INSTR_TYPE_MASK 0x1f + +/* Load/store flags, ORed in with type */ +#define SIGNEXT 0x20 +#define UPDATE 0x40 /* matches bit in opcode 31 instructions */ +#define BYTEREV 0x80 + +/* Cacheop values, ORed in with type */ +#define CACHEOP_MASK 0x700 +#define DCBST 0 +#define DCBF 0x100 +#define DCBTST 0x200 +#define DCBT 0x300 +#define ICBI 0x400 + +/* Size field in type word */ +#define SIZE(n) ((n) << 8) +#define GETSIZE(w) ((w) >> 8) + +#define MKOP(t, f, s) ((t) | (f) | SIZE(s)) + +struct instruction_op { + int type; + int reg; + unsigned long val; + /* For LOAD/STORE/LARX/STCX */ + unsigned long ea; + int update_reg; + /* For MFSPR */ + int spr; +}; + +extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr); diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h index b9bd1ca944d0..96f59de61855 100644 --- a/arch/powerpc/include/asm/swab.h +++ b/arch/powerpc/include/asm/swab.h @@ -9,10 +9,6 @@ #include <uapi/asm/swab.h> -#ifdef __GNUC__ -#ifndef __powerpc64__ -#endif /* __powerpc64__ */ - static __inline__ __u16 ld_le16(const volatile __u16 *addr) { __u16 val; @@ -20,19 +16,12 @@ static __inline__ __u16 ld_le16(const volatile __u16 *addr) __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); return val; } -#define __arch_swab16p ld_le16 static __inline__ void st_le16(volatile __u16 *addr, const __u16 val) { __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } -static inline void __arch_swab16s(__u16 *addr) -{ - st_le16(addr, *addr); -} -#define __arch_swab16s __arch_swab16s - static __inline__ __u32 ld_le32(const volatile __u32 *addr) { __u32 val; @@ -40,42 +29,10 @@ static __inline__ __u32 ld_le32(const volatile __u32 *addr) __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); return val; } -#define __arch_swab32p ld_le32 static __inline__ void st_le32(volatile __u32 *addr, const __u32 val) { __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } -static inline void __arch_swab32s(__u32 *addr) -{ - st_le32(addr, *addr); -} -#define __arch_swab32s __arch_swab32s - -static inline __attribute_const__ __u16 __arch_swab16(__u16 value) -{ - __u16 result; - - __asm__("rlwimi %0,%1,8,16,23" - : "=r" (result) - : "r" (value), "0" (value >> 8)); - return result; -} -#define __arch_swab16 __arch_swab16 - -static inline __attribute_const__ __u32 __arch_swab32(__u32 value) -{ - __u32 result; - - __asm__("rlwimi %0,%1,24,16,23\n\t" - "rlwimi %0,%1,8,8,15\n\t" - "rlwimi %0,%1,24,0,7" - : "=r" (result) - : "r" (value), "0" (value >> 24)); - return result; -} -#define __arch_swab32 __arch_swab32 - -#endif /* __GNUC__ */ #endif /* _ASM_POWERPC_SWAB_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index babbeca6850f..7d8a60068805 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -77,10 +77,10 @@ SYSCALL_SPU(setreuid) SYSCALL_SPU(setregid) #define compat_sys_sigsuspend sys_sigsuspend SYS32ONLY(sigsuspend) -COMPAT_SYS(sigpending) +SYSX(sys_ni_syscall,compat_sys_sigpending,sys_sigpending) SYSCALL_SPU(sethostname) COMPAT_SYS_SPU(setrlimit) -COMPAT_SYS(old_getrlimit) +SYSX(sys_ni_syscall,compat_sys_old_getrlimit,sys_old_getrlimit) COMPAT_SYS_SPU(getrusage) COMPAT_SYS_SPU(gettimeofday) COMPAT_SYS_SPU(settimeofday) @@ -362,3 +362,6 @@ SYSCALL(ni_syscall) /* sys_kcmp */ SYSCALL_SPU(sched_setattr) SYSCALL_SPU(sched_getattr) SYSCALL_SPU(renameat2) +SYSCALL_SPU(seccomp) +SYSCALL_SPU(getrandom) +SYSCALL_SPU(memfd_create) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1d428e6007ca..03cbada59d3a 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -102,6 +102,15 @@ static inline u64 get_rtc(void) return (u64)hi * 1000000000 + lo; } +static inline u64 get_vtb(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return mfvtb(); +#endif + return 0; +} + #ifdef CONFIG_PPC64 static inline u64 get_tb(void) { diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 5712f06905a9..c15da6073cb8 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -99,6 +99,51 @@ TRACE_EVENT_FN(hcall_exit, ); #endif +#ifdef CONFIG_PPC_POWERNV +extern void opal_tracepoint_regfunc(void); +extern void opal_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(opal_entry, + + TP_PROTO(unsigned long opcode, unsigned long *args), + + TP_ARGS(opcode, args), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); + +TRACE_EVENT_FN(opal_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval), + + TP_ARGS(opcode, retval), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + opal_tracepoint_regfunc, opal_tracepoint_unregfunc +); +#endif + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h index f8b60793b7a9..d531d9e173ef 100644 --- a/arch/powerpc/include/asm/tsi108.h +++ b/arch/powerpc/include/asm/tsi108.h @@ -84,10 +84,6 @@ extern u32 tsi108_pci_cfg_base; /* Exported functions */ -extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base); -extern unsigned long tsi108_get_mem_size(void); -extern unsigned long tsi108_get_cpu_clk(void); -extern unsigned long tsi108_get_sdc_clk(void); extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val); extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn, diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index b51fba10e733..78f2675f2aac 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -52,7 +52,6 @@ extern void __init udbg_init_44x_as1(void); extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); -extern void __init udbg_init_wsp(void); extern void __init udbg_init_memcons(void); extern void __init udbg_init_ehv_bc(void); extern void __init udbg_init_ps3gelic(void); diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 5ce5552ab9f5..4e9af3fd43e7 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 358 +#define __NR_syscalls 361 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 9a5c928bb3c6..5b3a903adae6 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -42,32 +42,65 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #else +#ifdef CONFIG_64BIT + +/* unused */ struct word_at_a_time { - const unsigned long one_bits, high_bits; }; -#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } +#define WORD_AT_A_TIME_CONSTANTS { } -#ifdef CONFIG_64BIT +/* This will give us 0xff for a NULL char and 0x00 elsewhere */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long ret; + unsigned long zero = 0; -/* Alan Modra's little-endian strlen tail for 64-bit */ -#define create_zero_mask(mask) (mask) + asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero)); + *bits = ret; -static inline unsigned long find_zero(unsigned long mask) + return ret; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +/* Alan Modra's little-endian strlen tail for 64-bit */ +static inline unsigned long create_zero_mask(unsigned long bits) { unsigned long leading_zero_bits; long trailing_zero_bit_mask; - asm ("addi %1,%2,-1\n\t" - "andc %1,%1,%2\n\t" - "popcntd %0,%1" - : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) - : "r" (mask)); - return leading_zero_bits >> 3; + asm("addi %1,%2,-1\n\t" + "andc %1,%1,%2\n\t" + "popcntd %0,%1" + : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask) + : "r" (bits)); + + return leading_zero_bits; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return mask >> 3; +} + +/* This assumes that we never ask for an all 1s bitmask */ +static inline unsigned long zero_bytemask(unsigned long mask) +{ + return (1UL << mask) - 1; } #else /* 32-bit case */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + /* * This is largely generic for little-endian machines, but the * optimal byte mask counting is probably going to be something @@ -96,8 +129,6 @@ static inline unsigned long find_zero(unsigned long mask) return count_masked_bytes(mask); } -#endif - /* Return nonzero if it has a zero */ static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) { @@ -114,6 +145,59 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, /* The mask we created is directly usable as a bytemask */ #define zero_bytemask(mask) (mask) +#endif /* CONFIG_64BIT */ + +#endif /* __BIG_ENDIAN__ */ + +/* + * We use load_unaligned_zero() in a selftest, which builds a userspace + * program. Some linker scripts seem to discard the .fixup section, so allow + * the test code to use a different section name. + */ +#ifndef FIXUP_SECTION +#define FIXUP_SECTION ".fixup" +#endif + +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset, tmp; + + asm( + "1: " PPC_LL "%[ret], 0(%[addr])\n" + "2:\n" + ".section " FIXUP_SECTION ",\"ax\"\n" + "3: " +#ifdef __powerpc64__ + "clrrdi %[tmp], %[addr], 3\n\t" + "clrlsldi %[offset], %[addr], 61, 3\n\t" + "ld %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "sld %[ret], %[ret], %[offset]\n\t" +#else + "srd %[ret], %[ret], %[offset]\n\t" #endif +#else + "clrrwi %[tmp], %[addr], 2\n\t" + "clrlslwi %[offset], %[addr], 30, 3\n\t" + "lwz %[ret], 0(%[tmp])\n\t" +#ifdef __BIG_ENDIAN__ + "slw %[ret], %[ret], %[offset]\n\t" +#else + "srw %[ret], %[ret], %[offset]\n\t" +#endif +#endif + "b 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n\t" + PPC_LONG_ALIGN "\n\t" + PPC_LONG "1b,3b\n" + ".previous" + : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) + : [addr] "b" (addr), "m" (*(unsigned long *)addr)); + + return ret; +} + +#undef FIXUP_SECTION #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c855..0d050ea37a04 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -29,6 +29,7 @@ /* Native ICP */ #ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); +extern void icp_native_flush_interrupt(void); #else static inline int icp_native_init(void) { return -ENODEV; } #endif diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 2bc4a9409a93..e0e49dbb145d 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -548,6 +548,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) +#define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5) #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) /* Architecture compatibility level */ @@ -555,6 +556,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) #define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9) +#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2d526f7b48da..0688fc06e183 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -380,5 +380,8 @@ #define __NR_sched_setattr 355 #define __NR_sched_getattr 356 #define __NR_renameat2 357 +#define __NR_seccomp 358 +#define __NR_getrandom 359 +#define __NR_memfd_create 360 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 670c312d914e..502cf69b6c89 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -93,6 +93,9 @@ obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += ppc_ksyms.o +ifeq ($(CONFIG_PPC32),y) +obj-$(CONFIG_MODULES) += ppc_ksyms_32.o +endif obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f5995a912213..9d7dede2847c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -216,8 +216,6 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 - DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); - DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); @@ -493,6 +491,7 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); + DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); @@ -667,6 +666,7 @@ int main(void) DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 965291b4c2fa..9b6dcaaec1a3 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -123,96 +123,6 @@ extern void __restore_cpu_e6500(void); static struct cpu_spec __initdata cpu_specs[] = { #ifdef CONFIG_PPC_BOOK3S_64 - { /* Power3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00400000, - .cpu_name = "POWER3 (630)", - .cpu_features = CPU_FTRS_POWER3, - .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power3", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "power3", - }, - { /* Power3+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00410000, - .cpu_name = "POWER3 (630+)", - .cpu_features = CPU_FTRS_POWER3, - .cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power3", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "power3", - }, - { /* Northstar */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00330000, - .cpu_name = "RS64-II (northstar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* Pulsar */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00340000, - .cpu_name = "RS64-III (pulsar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* I-star */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00360000, - .cpu_name = "RS64-III (icestar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, - { /* S-star */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00370000, - .cpu_name = "RS64-IV (sstar)", - .cpu_features = CPU_FTRS_RS64, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/rs64", - .oprofile_type = PPC_OPROFILE_RS64, - .platform = "rs64", - }, { /* Power4 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00350000, @@ -527,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8 DD1: Does not support doorbell IPIs */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004d0100, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8_DD1, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004d0000, @@ -597,7 +527,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#if CLASSIC_PPC +#ifdef CONFIG_PPC_BOOK3S_32 { /* 601 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00010000, @@ -1237,7 +1167,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CLASSIC_PPC */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_8xx { /* 8xx */ .pvr_mask = 0xffff0000, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 7a13f378ca2c..c78e6dac4d7d 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> +#include <linux/io.h> #include <linux/memblock.h> #include <asm/code-patching.h> #include <asm/kdump.h> diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index a7b0156586dd..adac9dc54aee 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -235,6 +235,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask) *dev->dma_mask = dma_mask; return 0; } + int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -243,13 +244,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -u64 dma_get_required_mask(struct device *dev) +u64 __dma_get_required_mask(struct device *dev) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - if (ppc_md.dma_get_required_mask) - return ppc_md.dma_get_required_mask(dev); - if (unlikely(dma_ops == NULL)) return 0; @@ -258,6 +256,14 @@ u64 dma_get_required_mask(struct device *dev) return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); } + +u64 dma_get_required_mask(struct device *dev) +{ + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + return __dma_get_required_mask(dev); +} EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 86e25702aaca..d543e4179c18 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/iommu.h> #include <linux/proc_fs.h> #include <linux/rbtree.h> #include <linux/reboot.h> @@ -40,6 +41,7 @@ #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> +#include <asm/iommu.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/rtas.h> @@ -108,11 +110,14 @@ struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +/* Lock to protect passed flags */ +static DEFINE_MUTEX(eeh_dev_mutex); + /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. */ -#define EEH_PCI_REGS_LOG_LEN 4096 +#define EEH_PCI_REGS_LOG_LEN 8192 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; /* @@ -137,27 +142,24 @@ static struct eeh_stats eeh_stats; static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) - eeh_subsystem_flags |= EEH_FORCE_DISABLED; + eeh_add_flag(EEH_FORCE_DISABLED); return 1; } __setup("eeh=", eeh_setup); -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. +/* + * This routine captures assorted PCI configuration space data + * for the indicated PCI device, and puts them into a buffer + * for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) +static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); u32 cfg; int cap, i; - int n = 0; + int n = 0, l = 0; + char buffer[128]; n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); pr_warn("EEH: of node=%s\n", dn->full_name); @@ -202,8 +204,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) for (i=0; i<=8; i++) { eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); + + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); + } + } + + pr_warn("%s\n", buffer); } /* If AER capable, dump it */ @@ -212,16 +228,42 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) n += scnprintf(buf+n, len-n, "pci-e AER:\n"); pr_warn("EEH: PCI-E AER capability register set follows:\n"); - for (i=0; i<14; i++) { + for (i=0; i<=13; i++) { eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); + + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E AER %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); + } } + + pr_warn("%s\n", buffer); } return n; } +static void *eeh_dump_pe_log(void *data, void *flag) +{ + struct eeh_pe *pe = data; + struct eeh_dev *edev, *tmp; + size_t *plen = flag; + + eeh_pe_for_each_dev(pe, edev, tmp) + *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, + EEH_PCI_REGS_LOG_LEN - *plen); + + return NULL; +} + /** * eeh_slot_error_detail - Generate combined log including driver log and error log * @pe: EEH PE @@ -235,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev, *tmp; /* * When the PHB is fenced or dead, it's pointless to collect @@ -247,16 +288,13 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * 0xFF's is always returned from PCI config space. */ if (!(pe->type & EEH_PE_PHB)) { - if (eeh_probe_mode_devtree()) + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev, tmp) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, - EEH_PCI_REGS_LOG_LEN - loglen); - } + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); } eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); @@ -298,14 +336,14 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) unsigned long flags; int ret; - if (!eeh_probe_mode_dev()) + if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) return -EPERM; /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warning("%s Can't find PE for PHB#%d\n", - __func__, pe->phb->global_number); + pr_warn("%s Can't find PE for PHB#%d\n", + __func__, pe->phb->global_number); return -EEXIST; } @@ -377,7 +415,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) } dn = eeh_dev_to_of_node(edev); dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; + pe = eeh_dev_to_pe(edev); /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { @@ -400,6 +438,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev) if (ret > 0) return ret; + /* + * If the PE isn't owned by us, we shouldn't check the + * state. Instead, let the owner handle it if the PE has + * been frozen. + */ + if (eeh_pe_passed(pe)) + return 0; + /* If we already have a pending isolation event for this * slot, we know it's bad already, we don't need to check. * Do this checking under a lock; as multiple PCI devices @@ -501,17 +547,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) + * @token: I/O address * - * Check for an EEH failure at the given token address. Call this + * Check for an EEH failure at the given I/O address. Call this * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine + * find out if this is due to an EEH slot freeze event. This routine * will query firmware for the EEH status. * * Note this routine is safe to call in an interrupt context. */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +int eeh_check_failure(const volatile void __iomem *token) { unsigned long addr; struct eeh_dev *edev; @@ -521,13 +566,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; - return val; + return 0; } - eeh_dev_check_failure(edev); - return val; + return eeh_dev_check_failure(edev); } - EXPORT_SYMBOL(eeh_check_failure); @@ -541,25 +584,51 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int active_flag, rc; /* * pHyp doesn't allow to enable IO or DMA on unfrozen PE. * Also, it's pointless to enable them on unfrozen PE. So - * we have the check here. + * we have to check before enabling IO or DMA. + */ + switch (function) { + case EEH_OPT_THAW_MMIO: + active_flag = EEH_STATE_MMIO_ACTIVE; + break; + case EEH_OPT_THAW_DMA: + active_flag = EEH_STATE_DMA_ACTIVE; + break; + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_FREEZE_PE: + active_flag = 0; + break; + default: + pr_warn("%s: Invalid function %d\n", + __func__, function); + return -EINVAL; + } + + /* + * Check if IO or DMA has been enabled before + * enabling them. */ - if (function == EEH_OPT_THAW_MMIO || - function == EEH_OPT_THAW_DMA) { + if (active_flag) { rc = eeh_ops->get_state(pe, NULL); if (rc < 0) return rc; - /* Needn't to enable or already enabled */ - if ((rc == EEH_STATE_NOT_SUPPORT) || - ((rc & flags) == flags)) + /* Needn't enable it at all */ + if (rc == EEH_STATE_NOT_SUPPORT) + return 0; + + /* It's already enabled */ + if (rc & active_flag) return 0; } + + /* Issue the request */ rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " @@ -567,17 +636,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) - return rc; + /* Check if the request is finished successfully */ + if (active_flag) { + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc <= 0) + return rc; - if ((function == EEH_OPT_THAW_MMIO) && - (rc & EEH_STATE_MMIO_ENABLED)) - return 0; + if (rc & active_flag) + return 0; - if ((function == EEH_OPT_THAW_DMA) && - (rc & EEH_STATE_DMA_ENABLED)) - return 0; + return -EIO; + } return rc; } @@ -593,7 +662,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; + struct eeh_pe *pe = eeh_dev_to_pe(edev); if (!pe) { pr_err("%s: No PE found on PCI device %s\n", @@ -604,14 +673,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + eeh_pe_state_clear(pe, EEH_PE_RESET); break; case pcie_hot_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_state_mark(pe, EEH_PE_RESET); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: + eeh_pe_state_clear(pe, EEH_PE_RESET); return -EINVAL; }; @@ -746,13 +819,13 @@ void eeh_save_bars(struct eeh_dev *edev) int __init eeh_ops_register(struct eeh_ops *ops) { if (!ops->name) { - pr_warning("%s: Invalid EEH ops name for %p\n", + pr_warn("%s: Invalid EEH ops name for %p\n", __func__, ops); return -EINVAL; } if (eeh_ops && eeh_ops != ops) { - pr_warning("%s: EEH ops of platform %s already existing (%s)\n", + pr_warn("%s: EEH ops of platform %s already existing (%s)\n", __func__, eeh_ops->name, ops->name); return -EEXIST; } @@ -772,7 +845,7 @@ int __init eeh_ops_register(struct eeh_ops *ops) int __exit eeh_ops_unregister(const char *name) { if (!name || !strlen(name)) { - pr_warning("%s: Invalid EEH ops name\n", + pr_warn("%s: Invalid EEH ops name\n", __func__); return -EINVAL; } @@ -788,7 +861,7 @@ int __exit eeh_ops_unregister(const char *name) static int eeh_reboot_notifier(struct notifier_block *nb, unsigned long action, void *unused) { - eeh_set_enable(false); + eeh_clear_flag(EEH_ENABLED); return NOTIFY_DONE; } @@ -837,11 +910,11 @@ int eeh_init(void) /* call platform initialization function */ if (!eeh_ops) { - pr_warning("%s: Platform EEH operation not found\n", + pr_warn("%s: Platform EEH operation not found\n", __func__); return -EEXIST; } else if ((ret = eeh_ops->init())) { - pr_warning("%s: Failed to call platform init function (%d)\n", + pr_warn("%s: Failed to call platform init function (%d)\n", __func__, ret); return ret; } @@ -852,13 +925,13 @@ int eeh_init(void) return ret; /* Enable EEH for all adapters */ - if (eeh_probe_mode_devtree()) { + if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) { list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->dn; traverse_pci_devices(phb, eeh_ops->of_probe, NULL); } - } else if (eeh_probe_mode_dev()) { + } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) { list_for_each_entry_safe(hose, tmp, &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); @@ -882,7 +955,7 @@ int eeh_init(void) if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - pr_warning("EEH: No capable adapters found\n"); + pr_warn("EEH: No capable adapters found\n"); return ret; } @@ -910,7 +983,7 @@ void eeh_add_device_early(struct device_node *dn) * would delay the probe until late stage because * the PCI device isn't available this moment. */ - if (!eeh_probe_mode_devtree()) + if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) return; if (!of_node_to_eeh_dev(dn)) @@ -996,7 +1069,7 @@ void eeh_add_device_late(struct pci_dev *dev) * We have to do the EEH probe here because the PCI device * hasn't been created yet in the early stage. */ - if (eeh_probe_mode_dev()) + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) eeh_ops->dev_probe(dev, NULL); eeh_addr_cache_insert_dev(dev); @@ -1100,6 +1173,404 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } +int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +{ + int ret; + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } + + /* Clear software isolated state */ + if (sw_state && (pe->state & EEH_PE_ISOLATED)) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + + return ret; +} + + +static struct pci_device_id eeh_reset_ids[] = { + { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ + { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { 0 } +}; + +static int eeh_pe_change_owner(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + struct pci_device_id *id; + int flags, ret; + + /* Check PE state */ + flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + ret = eeh_ops->get_state(pe, NULL); + if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) + return 0; + + /* Unfrozen PE, nothing to do */ + if ((ret & flags) == flags) + return 0; + + /* Frozen PE, check if it needs PE level reset */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { + if (id->vendor != PCI_ANY_ID && + id->vendor != pdev->vendor) + continue; + if (id->device != PCI_ANY_ID && + id->device != pdev->device) + continue; + if (id->subvendor != PCI_ANY_ID && + id->subvendor != pdev->subsystem_vendor) + continue; + if (id->subdevice != PCI_ANY_ID && + id->subdevice != pdev->subsystem_device) + continue; + + goto reset; + } + } + + return eeh_unfreeze_pe(pe, true); + +reset: + return eeh_pe_reset_and_recover(pe); +} + +/** + * eeh_dev_open - Increase count of pass through devices for PE + * @pdev: PCI device + * + * Increase count of passed through devices for the indicated + * PE. In the result, the EEH errors detected on the PE won't be + * reported. The PE owner will be responsible for detection + * and recovery. + */ +int eeh_dev_open(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + int ret = -ENODEV; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + goto out; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + goto out; + + /* + * The PE might have been put into frozen state, but we + * didn't detect that yet. The passed through PCI devices + * in frozen PE won't work properly. Clear the frozen state + * in advance. + */ + ret = eeh_pe_change_owner(edev->pe); + if (ret) + goto out; + + /* Increase PE's pass through count */ + atomic_inc(&edev->pe->pass_dev_cnt); + mutex_unlock(&eeh_dev_mutex); + + return 0; +out: + mutex_unlock(&eeh_dev_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(eeh_dev_open); + +/** + * eeh_dev_release - Decrease count of pass through devices for PE + * @pdev: PCI device + * + * Decrease count of pass through devices for the indicated PE. If + * there is no passed through device in PE, the EEH errors detected + * on the PE will be reported and handled as usual. + */ +void eeh_dev_release(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + + mutex_lock(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + goto out; + + /* No EEH device ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) + goto out; + + /* Decrease PE's pass through count */ + atomic_dec(&edev->pe->pass_dev_cnt); + WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); + eeh_pe_change_owner(edev->pe); +out: + mutex_unlock(&eeh_dev_mutex); +} +EXPORT_SYMBOL(eeh_dev_release); + +#ifdef CONFIG_IOMMU_API + +static int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + struct iommu_table *tbl; + + if (!dev) + return 0; + + tbl = get_iommu_table_base(dev); + if (tbl && tbl->it_group) { + *ppdev = pdev; + return 1; + } + + return 0; +} + +/** + * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE + * @group: IOMMU group + * + * The routine is called to convert IOMMU group to EEH PE. + */ +struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) +{ + struct pci_dev *pdev = NULL; + struct eeh_dev *edev; + int ret; + + /* No IOMMU group ? */ + if (!group) + return NULL; + + ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); + if (!ret || !pdev) + return NULL; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + return NULL; + + return edev->pe; +} +EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); + +#endif /* CONFIG_IOMMU_API */ + +/** + * eeh_pe_set_option - Set options for the indicated PE + * @pe: EEH PE + * @option: requested option + * + * The routine is called to enable or disable EEH functionality + * on the indicated PE, to enable IO or DMA for the frozen PE. + */ +int eeh_pe_set_option(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* + * EEH functionality could possibly be disabled, just + * return error for the case. And the EEH functinality + * isn't expected to be disabled on one specific PE. + */ + switch (option) { + case EEH_OPT_ENABLE: + if (eeh_enabled()) { + ret = eeh_pe_change_owner(pe); + break; + } + ret = -EIO; + break; + case EEH_OPT_DISABLE: + break; + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + if (!eeh_ops || !eeh_ops->set_option) { + ret = -ENOENT; + break; + } + + ret = eeh_pci_enable(pe, option); + break; + default: + pr_debug("%s: Option %d out of range (%d, %d)\n", + __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_set_option); + +/** + * eeh_pe_get_state - Retrieve PE's state + * @pe: EEH PE + * + * Retrieve the PE's state, which includes 3 aspects: enabled + * DMA, enabled IO and asserted reset. + */ +int eeh_pe_get_state(struct eeh_pe *pe) +{ + int result, ret = 0; + bool rst_active, dma_en, mmio_en; + + /* Existing PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->get_state) + return -ENOENT; + + result = eeh_ops->get_state(pe, NULL); + rst_active = !!(result & EEH_STATE_RESET_ACTIVE); + dma_en = !!(result & EEH_STATE_DMA_ENABLED); + mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); + + if (rst_active) + ret = EEH_PE_STATE_RESET; + else if (dma_en && mmio_en) + ret = EEH_PE_STATE_NORMAL; + else if (!dma_en && !mmio_en) + ret = EEH_PE_STATE_STOPPED_IO_DMA; + else if (!dma_en && mmio_en) + ret = EEH_PE_STATE_STOPPED_DMA; + else + ret = EEH_PE_STATE_UNAVAIL; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_get_state); + +static int eeh_pe_reenable_devices(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + int ret = 0; + + /* Restore config space */ + eeh_pe_restore_bars(pe); + + /* + * Reenable PCI devices as the devices passed + * through are always enabled before the reset. + */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + ret = pci_reenable_device(pdev); + if (ret) { + pr_warn("%s: Failure %d reenabling %s\n", + __func__, ret, pci_name(pdev)); + return ret; + } + } + + /* The PE is still in frozen state */ + return eeh_unfreeze_pe(pe, true); +} + +/** + * eeh_pe_reset - Issue PE reset according to specified type + * @pe: EEH PE + * @option: reset type + * + * The routine is called to reset the specified PE with the + * indicated type, either fundamental reset or hot reset. + * PE reset is the most important part for error recovery. + */ +int eeh_pe_reset(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) + return -ENOENT; + + switch (option) { + case EEH_RESET_DEACTIVATE: + ret = eeh_ops->reset(pe, option); + eeh_pe_state_clear(pe, EEH_PE_RESET); + if (ret) + break; + + ret = eeh_pe_reenable_devices(pe); + break; + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Proactively freeze the PE to drop all MMIO access + * during reset, which should be banned as it's always + * cause recursive EEH error. + */ + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_ops->reset(pe, option); + break; + default: + pr_debug("%s: Unsupported option %d\n", + __func__, option); + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_reset); + +/** + * eeh_pe_configure - Configure PCI bridges after PE reset + * @pe: EEH PE + * + * The routine is called to restore the PCI config space for + * those PCI devices, especially PCI bridges affected by PE + * reset issued previously. + */ +int eeh_pe_configure(struct eeh_pe *pe) +{ + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_configure); + static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1143,9 +1614,9 @@ static const struct file_operations proc_eeh_operations = { static int eeh_enable_dbgfs_set(void *data, u64 val) { if (val) - eeh_subsystem_flags &= ~EEH_FORCE_DISABLED; + eeh_clear_flag(EEH_FORCE_DISABLED); else - eeh_subsystem_flags |= EEH_FORCE_DISABLED; + eeh_add_flag(EEH_FORCE_DISABLED); /* Notify the backend */ if (eeh_ops->post_init) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index e8c9fd546a5c..07d8a2423a61 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -143,7 +143,7 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, } else { if (dev != piar->pcidev || alo != piar->addr_lo || ahi != piar->addr_hi) { - pr_warning("PIAR: overlapping address range\n"); + pr_warn("PIAR: overlapping address range\n"); } return piar; } @@ -177,19 +177,20 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); if (!dn) { - pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); + pr_warn("PCI: no pci dn found for dev=%s\n", + pci_name(dev)); return; } edev = of_node_to_eeh_dev(dn); if (!edev) { - pr_warning("PCI: no EEH dev found for dn=%s\n", + pr_warn("PCI: no EEH dev found for dn=%s\n", dn->full_name); return; } /* Skip any devices for which EEH is not enabled. */ - if (!eeh_probe_mode_dev() && !edev->pe) { + if (!edev->pe) { #ifdef DEBUG pr_info("PCI: skip building address cache for=%s - %s\n", pci_name(dev), dn->full_name); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 1efa28f5fc54..e5274ee9a75f 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -57,7 +57,8 @@ void *eeh_dev_init(struct device_node *dn, void *data) /* Allocate EEH device */ edev = kzalloc(sizeof(*edev), GFP_KERNEL); if (!edev) { - pr_warning("%s: out of memory\n", __func__); + pr_warn("%s: out of memory\n", + __func__); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 420da61d4ce0..3fd514f8e4b2 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } +static void *eeh_dev_save_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_save_state(pdev); + return NULL; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata) return NULL; } +static void *eeh_dev_restore_state(void *data, void *userdata) +{ + struct eeh_dev *edev = data; + struct pci_dev *pdev; + + if (!edev) + return NULL; + + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + return NULL; + + pci_restore_state(pdev); + return NULL; +} + /** * eeh_report_resume - Tell device to resume normal operations * @data: eeh device @@ -450,38 +482,82 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - int i, rc; + bool *clear_sw_state = flag; + int i, rc = 1; - for (i = 0; i < 3; i++) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc) - continue; - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (!rc) - break; - } + for (i = 0; rc && i < 3; i++) + rc = eeh_unfreeze_pe(pe, clear_sw_state); - /* The PE has been isolated, clear it */ + /* Stop immediately on any errors */ if (rc) { - pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n", + __func__, rc, pe->phb->global_number, pe->addr); return (void *)pe; } return NULL; } -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe, + bool clear_sw_state) { void *rc; - rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state); if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); return rc ? -EIO : 0; } +int eeh_pe_reset_and_recover(struct eeh_pe *pe) +{ + int result, ret; + + /* Bail if the PE is being recovered */ + if (pe->state & EEH_PE_RECOVERING) + return 0; + + /* Put the PE into recovery mode */ + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + + /* Save states */ + eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); + + /* Report error */ + eeh_pe_dev_traverse(pe, eeh_report_error, &result); + + /* Issue reset */ + eeh_pe_state_mark(pe, EEH_PE_RESET); + ret = eeh_reset_pe(pe); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); + return ret; + } + eeh_pe_state_clear(pe, EEH_PE_RESET); + + /* Unfreeze the PE */ + ret = eeh_clear_pe_frozen_state(pe, true); + if (ret) { + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + return ret; + } + + /* Notify completion of reset */ + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + + /* Restore device state */ + eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); + + /* Resume */ + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + + /* Clear recovery mode */ + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + + return 0; +} + /** * eeh_reset_device - Perform actual reset of a pci slot * @pe: EEH PE @@ -540,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_state_clear(pe, EEH_PE_RESET); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) return rc; @@ -599,7 +675,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pe->freeze_count++; if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; - pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); /* Walk the various device drivers attached to this slot through @@ -616,7 +692,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warning("EEH: Permanent failure\n"); + pr_warn("EEH: Permanent failure\n"); goto hard_fail; } @@ -635,8 +711,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, frozen_bus); if (rc) { - pr_warning("%s: Unable to reset, err=%d\n", - __func__, rc); + pr_warn("%s: Unable to reset, err=%d\n", + __func__, rc); goto hard_fail; } } @@ -678,7 +754,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device has a hard failure, then shut off everything. */ if (result == PCI_ERS_RESULT_DISCONNECT) { - pr_warning("EEH: Device driver gave up\n"); + pr_warn("EEH: Device driver gave up\n"); goto hard_fail; } @@ -687,8 +763,8 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, NULL); if (rc) { - pr_warning("%s: Cannot reset, err=%d\n", - __func__, rc); + pr_warn("%s: Cannot reset, err=%d\n", + __func__, rc); goto hard_fail; } @@ -701,7 +777,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* All devices should claim they have recovered by now. */ if ((result != PCI_ERS_RESULT_RECOVERED) && (result != PCI_ERS_RESULT_NONE)) { - pr_warning("EEH: Not recovered\n"); + pr_warn("EEH: Not recovered\n"); goto hard_fail; } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index fbd01eba4473..53dd0915e690 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -32,9 +32,24 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> +static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** + * eeh_set_pe_aux_size - Set PE auxillary data size + * @size: PE auxillary data size + * + * Set PE auxillary data size + */ +void eeh_set_pe_aux_size(int size) +{ + if (size < 0) + return; + + eeh_pe_aux_size = size; +} + +/** * eeh_pe_alloc - Allocate PE * @phb: PCI controller * @type: PE type @@ -44,9 +59,16 @@ static LIST_HEAD(eeh_phb_pe); static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) { struct eeh_pe *pe; + size_t alloc_size; + + alloc_size = sizeof(struct eeh_pe); + if (eeh_pe_aux_size) { + alloc_size = ALIGN(alloc_size, cache_line_size()); + alloc_size += eeh_pe_aux_size; + } /* Allocate PHB PE */ - pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); + pe = kzalloc(alloc_size, GFP_KERNEL); if (!pe) return NULL; /* Initialize PHB PE */ @@ -56,6 +78,8 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) INIT_LIST_HEAD(&pe->child); INIT_LIST_HEAD(&pe->edevs); + pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe), + cache_line_size()); return pe; } @@ -179,7 +203,8 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, void *ret; if (!root) { - pr_warning("%s: Invalid PE %p\n", __func__, root); + pr_warn("%s: Invalid PE %p\n", + __func__, root); return NULL; } @@ -351,17 +376,6 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe->config_addr = edev->config_addr; /* - * While doing PE reset, we probably hot-reset the - * upstream bridge. However, the PCI devices including - * the associated EEH devices might be removed when EEH - * core is doing recovery. So that won't safe to retrieve - * the bridge through downstream EEH device. We have to - * trace the parent PCI bus, then the upstream bridge. - */ - if (eeh_probe_mode_dev()) - pe->bus = eeh_dev_to_pci_dev(edev)->bus; - - /* * Put the new EEH PE into hierarchy tree. If the parent * can't be found, the newly created PE will be attached * to PHB directly. Otherwise, we have to associate the @@ -414,7 +428,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) } /* Remove the EEH device */ - pe = edev->pe; + pe = eeh_dev_to_pe(edev); edev->pe = NULL; list_del(&edev->list); @@ -570,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && @@ -578,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pe->state &= ~state; - /* Clear check count since last isolation */ - if (state & EEH_PE_ISOLATED) - pe->check_count = 0; + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) + return NULL; + + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + pdev->error_state = pci_channel_io_normal; + } return NULL; } @@ -802,53 +831,33 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) */ const char *eeh_pe_loc_get(struct eeh_pe *pe) { - struct pci_controller *hose; struct pci_bus *bus = eeh_pe_bus_get(pe); - struct pci_dev *pdev; - struct device_node *dn; - const char *loc; + struct device_node *dn = pci_bus_to_OF_node(bus); + const char *loc = NULL; - if (!bus) - return "N/A"; + if (!dn) + goto out; /* PHB PE or root PE ? */ if (pci_is_root_bus(bus)) { - hose = pci_bus_to_host(bus); - loc = of_get_property(hose->dn, - "ibm,loc-code", NULL); + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,io-base-loc-code", NULL); if (loc) - return loc; - loc = of_get_property(hose->dn, - "ibm,io-base-loc-code", NULL); - if (loc) - return loc; - - pdev = pci_get_slot(bus, 0x0); - } else { - pdev = bus->self; - } - - if (!pdev) { - loc = "N/A"; - goto out; - } + goto out; - dn = pci_device_to_OF_node(pdev); - if (!dn) { - loc = "N/A"; - goto out; + /* Check the root port */ + dn = dn->child; + if (!dn) + goto out; } loc = of_get_property(dn, "ibm,loc-code", NULL); if (!loc) loc = of_get_property(dn, "ibm,slot-location-code", NULL); - if (!loc) - loc = "N/A"; out: - if (pci_is_root_bus(bus) && pdev) - pci_dev_put(pdev); - return loc; + return loc ? loc : "N/A"; } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index e2595ba4b720..f19b1e5cb060 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -54,6 +54,43 @@ EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); +static ssize_t eeh_pe_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + int state; + + if (!edev || !edev->pe) + return -ENODEV; + + state = eeh_ops->get_state(edev->pe, NULL); + return sprintf(buf, "%0x08x %0x08x\n", + state, edev->pe->state); +} + +static ssize_t eeh_pe_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + /* Nothing to do if it's not frozen */ + if (!(edev->pe->state & EEH_PE_ISOLATED)) + return count; + + if (eeh_unfreeze_pe(edev->pe, true)) + return -EIO; + + return count; +} + +static DEVICE_ATTR_RW(eeh_pe_state); + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -68,9 +105,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); if (rc) - printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); + pr_warn("EEH: Unable to create sysfs entries\n"); else if (edev) edev->mode |= EEH_DEV_SYSFS; } @@ -92,6 +130,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); + device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); if (edev) edev->mode &= ~EEH_DEV_SYSFS; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6528c5e2cc44..5bbd1bc8c3b0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -482,16 +482,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) ld r8,KSP(r4) /* new stack pointer */ #ifdef CONFIG_PPC_BOOK3S BEGIN_FTR_SECTION - BEGIN_FTR_SECTION_NESTED(95) clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ - FTR_SECTION_ELSE_NESTED(95) +FTR_SECTION_ELSE clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) -FTR_SECTION_ELSE - b 2f -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -919,6 +915,11 @@ restore_check_irq_replay: addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except +1: cmpwi cr0,r3,0xe60 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl handle_hmi_exception + b ret_from_except 1: cmpwi cr0,r3,0x900 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a7d36b19221d..050f79a4a168 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -188,10 +188,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) data_access_pSeries: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) -BEGIN_FTR_SECTION - b data_access_check_stab -data_access_not_stab: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, KVMTEST, 0x300) @@ -339,7 +335,7 @@ emulation_assist_trampoline: hv_exception_trampoline: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_hv + b hmi_exception_early . = 0xe80 hv_doorbell_trampoline: @@ -514,34 +510,6 @@ machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) - - /* moved from 0x300 */ -data_access_check_stab: - GET_PACA(r13) - std r9,PACA_EXSLB+EX_R9(r13) - std r10,PACA_EXSLB+EX_R10(r13) - mfspr r10,SPRN_DAR - mfspr r9,SPRN_DSISR - srdi r10,r10,60 - rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - lbz r9,HSTATE_IN_GUEST(r13) - rlwimi r10,r9,8,0x300 -#endif - mfcr r9 - cmpwi r10,0x2c - beq do_stab_bolted_pSeries - mtcrf 0x80,r9 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - b data_access_not_stab -do_stab_bolted_pSeries: - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) @@ -621,8 +589,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */ + MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) + MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) @@ -643,6 +612,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * - If it was a decrementer interrupt, we bump the dec to max and and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. + * - If it was a HMI we return immediately since we handled it in realmode + * and it won't refire. * - else we hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ @@ -660,6 +631,8 @@ masked_##_H##interrupt: \ b 2f; \ 1: cmpwi r10,PACA_IRQ_DBELL; \ beq 2f; \ + cmpwi r10,PACA_IRQ_HMI; \ + beq 2f; \ mfspr r10,SPRN_##_H##SRR1; \ rldicl r10,r10,48,1; /* clear MSR_EE */ \ rotldi r10,r10,16; \ @@ -799,7 +772,7 @@ kvmppc_skip_Hinterrupt: STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) - STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception) + STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception) #ifdef CONFIG_PPC_DOORBELL STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception) #else @@ -985,66 +958,6 @@ ppc64_runlatch_on_trampoline: b __ppc64_runlatch_on /* - * Here we have detected that the kernel stack pointer is bad. - * R9 contains the saved CR, r13 points to the paca, - * r10 contains the (bad) kernel stack pointer, - * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using an emergency stack, save the registers there, - * and call kernel_bad_stack(), which panics. - */ -bad_stack: - ld r1,PACAEMERGSP(r13) - subi r1,r1,64+INT_FRAME_SIZE - std r9,_CCR(r1) - std r10,GPR1(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - mfspr r11,SPRN_DAR - mfspr r12,SPRN_DSISR - std r11,_DAR(r1) - std r12,_DSISR(r1) - mflr r10 - mfctr r11 - mfxer r12 - std r10,_LINK(r1) - std r11,_CTR(r1) - std r12,_XER(r1) - SAVE_GPR(0,r1) - SAVE_GPR(2,r1) - ld r10,EX_R3(r3) - std r10,GPR3(r1) - SAVE_GPR(4,r1) - SAVE_4GPRS(5,r1) - ld r9,EX_R9(r3) - ld r10,EX_R10(r3) - SAVE_2GPRS(9,r1) - ld r9,EX_R11(r3) - ld r10,EX_R12(r3) - ld r11,EX_R13(r3) - std r9,GPR11(r1) - std r10,GPR12(r1) - std r11,GPR13(r1) -BEGIN_FTR_SECTION - ld r10,EX_CFAR(r3) - std r10,ORIG_GPR3(r1) -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - SAVE_8GPRS(14,r1) - SAVE_10GPRS(22,r1) - lhz r12,PACA_TRAP_SAVE(r13) - std r12,_TRAP(r1) - addi r11,r1,INT_FRAME_SIZE - std r11,0(r1) - li r12,0 - std r12,0(r11) - ld r2,PACATOC(r13) - ld r11,exception_marker@toc(r2) - std r12,RESULT(r1) - std r11,STACK_FRAME_OVERHEAD-16(r1) -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl kernel_bad_stack - b 1b - -/* * Here r13 points to the paca, r9 contains the saved CR, * SRR0 and SRR1 are saved in r11 and r12, * r9 - r13 are saved in paca->exgen. @@ -1057,7 +970,7 @@ data_access_common: mfspr r10,SPRN_DSISR stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) @@ -1073,7 +986,7 @@ h_data_storage_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception b ret_from_except @@ -1082,7 +995,7 @@ h_data_storage_common: .globl instruction_access_common instruction_access_common: EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,_NIP(r1) andis. r4,r12,0x5820 @@ -1146,7 +1059,7 @@ slb_miss_fault: unrecov_user_slb: EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception @@ -1169,7 +1082,7 @@ machine_check_common: stw r10,PACA_EXGEN+EX_DSISR(r13) EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) FINISH_NAP - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) std r3,_DAR(r1) @@ -1192,7 +1105,7 @@ alignment_common: std r3,_DAR(r1) std r4,_DSISR(r1) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception b ret_from_except @@ -1202,7 +1115,7 @@ alignment_common: program_check_common: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception b ret_from_except @@ -1213,7 +1126,7 @@ fp_unavailable_common: EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception BUG_OPCODE @@ -1232,7 +1145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm b ret_from_except @@ -1258,7 +1171,7 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm b ret_from_except @@ -1267,7 +1180,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception b ret_from_except @@ -1292,7 +1205,7 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm b ret_from_except @@ -1301,7 +1214,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif bl save_nvgprs - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception b ret_from_except @@ -1338,11 +1251,60 @@ fwnmi_data_area: . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ -/* Space for CPU0's segment table */ - .balign 4096 - .globl initial_stab -initial_stab: - .space 4096 + .globl hmi_exception_early +hmi_exception_early: + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + std r9,_CCR(r1) /* save CR in stackframe */ + mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ + std r11,_NIP(r1) /* save HSRR0 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save SRR1 */ + std r12,_MSR(r1) /* save SRR1 in stackframe */ + std r10,0(r1) /* make stack chain pointer */ + std r0,GPR0(r1) /* save r0 in stackframe */ + std r10,GPR1(r1) /* save r1 in stackframe */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) + EXCEPTION_PROLOG_COMMON_3(0xe60) + addi r3,r1,STACK_FRAME_OVERHEAD + bl hmi_exception_realmode + /* Windup the stack. */ + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r0,MSR_RI + mfmsr r9 /* get MSR value */ + andc r9,r9,r0 + mtmsrd r9,1 /* Clear MSR_RI */ + /* Move original HSRR0 and HSRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_HSRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_HSRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + + /* + * Go to virtual mode and pull the HMI event information from + * firmware. + */ + .globl hmi_exception_after_realmode +hmi_exception_after_realmode: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b hmi_exception_hv #ifdef CONFIG_PPC_POWERNV _GLOBAL(opal_mc_secondary_handler) @@ -1566,7 +1528,7 @@ slb_miss_realmode: unrecov_slb: EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS + RECONCILE_IRQ_STATE(r10, r11) bl save_nvgprs 1: addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception @@ -1594,12 +1556,6 @@ do_hash_page: bne- handle_page_fault /* if not, try to insert a HPTE */ andis. r0,r4,DSISR_DABRMATCH@h bne- handle_dabr_fault - -BEGIN_FTR_SECTION - andis. r0,r4,0x0020 /* Is it a segment table fault? */ - bne- do_ste_alloc /* If so handle it */ -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) - CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1681,113 +1637,62 @@ handle_dabr_fault: bl bad_page_fault b ret_from_except - /* here we have a segment miss */ -do_ste_alloc: - bl ste_allocate /* try to insert stab entry */ - cmpdi r3,0 - bne- handle_page_fault - b fast_exception_return - /* - * r13 points to the PACA, r9 contains the saved CR, + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, * r11 and r12 contain the saved SRR0 and SRR1. - * r9 - r13 are saved in paca->exslb. - * We assume we aren't going to take any exceptions during this procedure. - * We assume (DAR >> 60) == 0xc. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. */ - .align 7 -do_stab_bolted: - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ - mfspr r11,SPRN_DAR /* ea */ - - /* - * check for bad kernel/user address - * (ea & ~REGION_MASK) >= PGTABLE_RANGE - */ - rldicr. r9,r11,4,(63 - 46 - 4) - li r9,0 /* VSID = 0 for bad address */ - bne- 0f - - /* - * Calculate VSID: - * This is the kernel vsid, we take the top for context from - * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 - * Here we know that (ea >> 60) == 0xc - */ - lis r9,(MAX_USER_CONTEXT + 1)@ha - addi r9,r9,(MAX_USER_CONTEXT + 1)@l - - srdi r10,r11,SID_SHIFT - rldimi r10,r9,ESID_BITS,0 /* proto vsid */ - ASM_VSID_SCRAMBLE(r10, r9, 256M) - rldic r9,r10,12,16 /* r9 = vsid << 12 */ - -0: - /* Hash to the primary group */ - ld r10,PACASTABVIRT(r13) - srdi r11,r11,SID_SHIFT - rldimi r10,r11,7,52 /* r10 = first ste of the group */ - - /* Search the primary group for a free entry */ -1: ld r11,0(r10) /* Test valid bit of the current ste */ - andi. r11,r11,0x80 - beq 2f - addi r10,r10,16 - andi. r11,r10,0x70 - bne 1b - - /* Stick for only searching the primary group for now. */ - /* At least for now, we use a very simple random castout scheme */ - /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ - mftb r11 - rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ - ori r11,r11,0x10 - - /* r10 currently points to an ste one past the group of interest */ - /* make it point to the randomly selected entry */ - subi r10,r10,128 - or r10,r10,r11 /* r10 is the entry to invalidate */ - - isync /* mark the entry invalid */ - ld r11,0(r10) - rldicl r11,r11,56,1 /* clear the valid bit */ - rotldi r11,r11,8 - std r11,0(r10) - sync - - clrrdi r11,r11,28 /* Get the esid part of the ste */ - slbie r11 - -2: std r9,8(r10) /* Store the vsid part of the ste */ - eieio - - mfspr r11,SPRN_DAR /* Get the new esid */ - clrrdi r11,r11,28 /* Permits a full 32b of ESID */ - ori r11,r11,0x90 /* Turn on valid and kp */ - std r11,0(r10) /* Put new entry back into the stab */ - - sync - - /* All done -- return from exception. */ - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ - - andi. r10,r12,MSR_RI - beq- unrecov_slb - - mtcrf 0x80,r9 /* restore CR */ - - mfmsr r10 - clrrdi r10,r10,2 - mtmsrd r10,1 - - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,SPRN_DAR + mfspr r12,SPRN_DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) + SAVE_10GPRS(22,r1) + lhz r12,PACA_TRAP_SAVE(r13) + std r12,_TRAP(r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl kernel_bad_stack + b 1b diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index f202d0731b06..390311c0f03d 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -10,6 +10,8 @@ * */ +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + #include <linux/spinlock.h> #include <linux/hardirq.h> #include <linux/uaccess.h> @@ -105,7 +107,7 @@ __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned int op; - unsigned long ptr; + unsigned long entry, ptr; unsigned long ip = rec->ip; void *tramp; @@ -115,7 +117,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } @@ -125,21 +127,21 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %p", ip, tramp); if (!is_module_trampoline(tramp)) { - printk(KERN_ERR "Not a trampoline\n"); + pr_err("Not a trampoline\n"); return -EINVAL; } if (module_trampoline_target(mod, tramp, &ptr)) { - printk(KERN_ERR "Failed to get trampoline target\n"); + pr_err("Failed to get trampoline target\n"); return -EFAULT; } pr_devel("trampoline target %lx", ptr); + entry = ppc_global_function_entry((void *)addr); /* This should match what was called */ - if (ptr != ppc_function_entry((void *)addr)) { - printk(KERN_ERR "addr %lx does not match expected %lx\n", - ptr, ppc_function_entry((void *)addr)); + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); return -EINVAL; } @@ -179,7 +181,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } @@ -198,7 +200,7 @@ __ftrace_make_nop(struct module *mod, /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); + pr_err("Failed to read %lx\n", tramp); return -EFAULT; } @@ -209,7 +211,7 @@ __ftrace_make_nop(struct module *mod, ((jmp[1] & 0xffff0000) != 0x398c0000) || (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { - printk(KERN_ERR "Not a trampoline\n"); + pr_err("Not a trampoline\n"); return -EINVAL; } @@ -221,8 +223,7 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %lx ", tramp); if (tramp != addr) { - printk(KERN_ERR - "Trampoline location %08lx does not match addr\n", + pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; } @@ -263,15 +264,13 @@ int ftrace_make_nop(struct module *mod, */ if (!rec->arch.mod) { if (!mod) { - printk(KERN_ERR "No module loaded addr=%lx\n", - addr); + pr_err("No module loaded addr=%lx\n", addr); return -EFAULT; } rec->arch.mod = mod; } else if (mod) { if (mod != rec->arch.mod) { - printk(KERN_ERR - "Record mod %p not equal to passed in mod %p\n", + pr_err("Record mod %p not equal to passed in mod %p\n", rec->arch.mod, mod); return -EINVAL; } @@ -307,26 +306,25 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op[0] != 0x48000008) || ((op[1] & 0xffff00000) != 0xe8410000)) { - printk(KERN_ERR "Unexpected call sequence: %x %x\n", - op[0], op[1]); + if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { + pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } /* Ensure branch is within 24 bits */ - if (create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { - printk(KERN_ERR "Branch out of range"); + if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + pr_err("Branch out of range\n"); return -EINVAL; } if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { - printk(KERN_ERR "REL24 out of range!\n"); + pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -345,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (op != PPC_INST_NOP) { - printk(KERN_ERR "Expected NOP but have %x\n", op); + pr_err("Expected NOP but have %x\n", op); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } @@ -359,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) op = create_branch((unsigned int *)ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (!op) { - printk(KERN_ERR "REL24 out of range!\n"); + pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -397,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * already have a module defined. */ if (!rec->arch.mod) { - printk(KERN_ERR "No module loaded\n"); + pr_err("No module loaded\n"); return -EINVAL; } @@ -527,6 +525,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long)&return_to_handler; + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index c334f53453f7..b5061abbd2e0 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1210,10 +1210,12 @@ clear_utlb_entry: /* We configure icbi to invalidate 128 bytes at a time since the * current 32-bit kernel code isn't too happy with icache != dcache - * block size + * block size. We also disable the BTAC as this can cause errors + * in some circumstances (see IBM Erratum 47). */ mfspr r3,SPRN_CCR0 oris r3,r3,0x0020 + ori r3,r3,0x0040 mtspr SPRN_CCR0,r3 isync diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 36ff6f03f903..d48125d0c048 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -640,7 +640,7 @@ __secondary_start: addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD std r14,PACAKSAVE(r13) - /* Do early setup for that CPU (stab, slb, hash table pointer) */ + /* Do early setup for that CPU (SLB and hash table pointer) */ bl early_setup_secondary /* @@ -793,8 +793,10 @@ start_here_multiplatform: li r0,0 stdu r0,-STACK_FRAME_OVERHEAD(r1) - /* Do very early kernel initializations, including initial hash table, - * stab and slb setup before we turn on relocation. */ + /* + * Do very early kernel initializations, including initial hash table + * and SLB setup before we turn on relocation. + */ /* Restore parameters passed from prom_init/kexec */ mr r3,r31 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaaf..1f7d84e2e8b2 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -293,7 +293,7 @@ out: /* * Handle single-step exceptions following a DABR hit. */ -int __kprobes single_step_dabr_instruction(struct die_args *args) +static int __kprobes single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 1114d13ac19f..ac86c53e2542 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */ struct bus_type ibmebus_bus_type; /* These devices will automatically be added to the bus during init */ -static struct of_device_id __initdata ibmebus_matches[] = { +static const struct of_device_id ibmebus_matches[] __initconst = { { .compatible = "IBM,lhca" }, { .compatible = "IBM,lhea" }, {}, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 2480256272d4..c0754bbf8118 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,7 @@ _GLOBAL(power7_powersave_common) /* Check if something happened while soft-disabled */ lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 + andi. r0,r0,~PACA_IRQ_HARD_DIS@l beq 1f cmpwi cr0,r4,0 beq 1f @@ -131,21 +131,72 @@ _GLOBAL(power7_nap) _GLOBAL(power7_sleep) li r3,1 - li r4,0 + li r4,1 b power7_powersave_common /* No return */ +/* + * Make opal call in realmode. This is a generic function to be called + * from realmode from reset vector. It handles endianess. + * + * r13 - paca pointer + * r1 - stack pointer + * r3 - opal token + */ +opal_call_realmode: + mflr r12 + std r12,_LINK(r1) + ld r2,PACATOC(r13) + /* Set opal return address */ + LOAD_REG_ADDR(r0,return_from_opal_call) + mtlr r0 + /* Handle endian-ness */ + li r0,MSR_LE + mfmsr r12 + andc r12,r12,r0 + mtspr SPRN_HSRR1,r12 + mr r0,r3 /* Move opal token to r0 */ + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +return_from_opal_call: + FIXUP_ENDIAN + ld r0,_LINK(r1) + mtlr r0 + blr + +#define CHECK_HMI_INTERRUPT \ + mfspr r0,SPRN_SRR1; \ +BEGIN_FTR_SECTION_NESTED(66); \ + rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ +FTR_SECTION_ELSE_NESTED(66); \ + rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ + cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ + bne 20f; \ + /* Invoke opal call to handle hmi */ \ + ld r2,PACATOC(r13); \ + ld r1,PACAR1(r13); \ + std r3,ORIG_GPR3(r1); /* Save original r3 */ \ + li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + bl opal_call_realmode; \ + ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ +20: nop; + + _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - LOAD_REG_ADDR(r11,opal); - ld r12,8(r11); - ld r2,0(r11); - mtctr r12 - bctrl + li r3,OPAL_RESYNC_TIMEBASE + bl opal_call_realmode; /* TODO: Check r3 for failure */ @@ -163,6 +214,9 @@ _GLOBAL(power7_wakeup_tb_loss) _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) ld r3,_CCR(r1) @@ -178,6 +232,9 @@ _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 bne power7_wakeup_loss +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index b82227e7e21b..12e48d56f771 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -23,7 +23,7 @@ unsigned int ioread16(void __iomem *addr) } unsigned int ioread16be(void __iomem *addr) { - return in_be16(addr); + return readw_be(addr); } unsigned int ioread32(void __iomem *addr) { @@ -31,7 +31,7 @@ unsigned int ioread32(void __iomem *addr) } unsigned int ioread32be(void __iomem *addr) { - return in_be32(addr); + return readl_be(addr); } EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -49,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr) } void iowrite16be(u16 val, void __iomem *addr) { - out_be16(addr, val); + writew_be(val, addr); } void iowrite32(u32 val, void __iomem *addr) { @@ -57,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr) } void iowrite32be(u32 val, void __iomem *addr) { - out_be32(addr, val); + writel_be(val, addr); } EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -75,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be); */ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) { - _insb((u8 __iomem *) addr, dst, count); + readsb(addr, dst, count); } void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) { - _insw_ns((u16 __iomem *) addr, dst, count); + readsw(addr, dst, count); } void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) { - _insl_ns((u32 __iomem *) addr, dst, count); + readsl(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); @@ -91,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep); void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsb((u8 __iomem *) addr, src, count); + writesb(addr, src, count); } void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsw_ns((u16 __iomem *) addr, src, count); + writesw(addr, src, count); } void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsl_ns((u32 __iomem *) addr, src, count); + writesl(addr, src, count); } EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 88e3ec6e1d96..a10642a0d861 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl), + __func__, hwaddr, entry << tbl->it_page_shift, hwaddr, ret); */ return ret; @@ -1056,7 +1056,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */ + tce, entry << tbl->it_page_shift, ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership); int iommu_add_device(struct device *dev) { struct iommu_table *tbl; - int ret = 0; - if (WARN_ON(dev->iommu_group)) { - pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", - dev_name(dev), - iommu_group_id(dev->iommu_group)); + /* + * The sysfs entries should be populated before + * binding IOMMU group. If sysfs entries isn't + * ready, we simply bail. + */ + if (!device_is_registered(dev)) + return -ENOENT; + + if (dev->iommu_group) { + pr_debug("%s: Skipping device %s with iommu group %d\n", + __func__, dev_name(dev), + iommu_group_id(dev->iommu_group)); return -EBUSY; } tbl = get_iommu_table_base(dev); if (!tbl || !tbl->it_group) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); + pr_debug("%s: Skipping device %s with no tbl\n", + __func__, dev_name(dev)); return 0; } - pr_debug("iommu_tce: adding %s to iommu group %d\n", - dev_name(dev), iommu_group_id(tbl->it_group)); + pr_debug("%s: Adding %s to iommu group %d\n", + __func__, dev_name(dev), + iommu_group_id(tbl->it_group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { - pr_err("iommu_tce: unsupported iommu page size."); - pr_err("%s has not been added\n", dev_name(dev)); + pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", + __func__, IOMMU_PAGE_SIZE(tbl), + PAGE_SIZE, dev_name(dev)); return -EINVAL; } - ret = iommu_group_add_device(tbl->it_group, dev); - if (ret < 0) - pr_err("iommu_tce: %s has not been added, ret=%d\n", - dev_name(dev), ret); - - return ret; + return iommu_group_add_device(tbl->it_group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 248ee7e5bebd..8eb857f216c1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -189,6 +189,11 @@ notrace unsigned int __check_irq_replay(void) } #endif /* CONFIG_PPC_BOOK3E */ + /* Check if an hypervisor Maintenance interrupt happened */ + local_paca->irq_happened &= ~PACA_IRQ_HMI; + if (happened & PACA_IRQ_HMI) + return 0xe60; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -377,6 +382,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + seq_printf(p, "%*s: ", prec, "HMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, " Hypervisor Maintenance Interrupts\n"); + } + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -400,6 +413,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; + sum += per_cpu(irq_stat, cpu).hmi_exceptions; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif @@ -430,13 +444,13 @@ void migrate_irqs(void) cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); + pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } if (chip->irq_set_affinity) chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); + pr_err("Cannot set affinity for irq %i\n", irq); } free_cpumask_var(mask); @@ -456,7 +470,7 @@ static inline void check_stack_overflow(void) /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", + pr_err("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); } diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 90fab64d911d..2f72af82513c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> @@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } -#ifdef CONFIG_PPC64 unsigned long arch_deref_entry_point(void *entry) { - return ((func_descr_t *)entry)->entry; + return ppc_global_function_entry(entry); } -#endif int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { @@ -508,8 +507,12 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); #ifdef CONFIG_PPC64 +#if defined(_CALL_ELF) && _CALL_ELF == 2 + regs->gpr[12] = (unsigned long)jp->entry; +#else regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif +#endif return 1; } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 936258881c98..7b750c4ed5c7 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -35,7 +35,7 @@ static struct legacy_serial_info { phys_addr_t taddr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; -static struct of_device_id legacy_serial_parents[] __initdata = { +static const struct of_device_id legacy_serial_parents[] __initconst = { {.type = "soc",}, {.type = "tsi-bridge",}, {.type = "opb", }, diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 6cff040bf456..c94d2e018d84 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/moduleloader.h> #include <linux/elf.h> @@ -28,12 +31,6 @@ #include <linux/sort.h> #include <asm/setup.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif - /* Count how many different relocations (different symbol, different addend) */ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) @@ -121,8 +118,8 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, continue; if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %u\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %u\n", (void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf32_Rela)); @@ -161,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + pr_err("Module doesn't contain .plt or .init.plt sections.\n"); return -ENOEXEC; } @@ -189,7 +186,7 @@ static uint32_t do_plt_call(void *location, { struct ppc_plt_entry *entry; - DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ if (location >= mod->module_core && location < mod->module_core + mod->core_size) @@ -208,7 +205,7 @@ static uint32_t do_plt_call(void *location, entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ - DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } @@ -224,7 +221,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, uint32_t *location; uint32_t value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ @@ -268,17 +265,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, sechdrs, module); /* Only replace bits 2 through 26 */ - DEBUGP("REL24 value = %08X. location = %08X\n", + pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); - DEBUGP("Location before: %08X.\n", + pr_debug("Location before: %08X.\n", *(uint32_t *)location); *(uint32_t *)location = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); - DEBUGP("Location after: %08X.\n", + pr_debug("Location after: %08X.\n", *(uint32_t *)location); - DEBUGP("ie. jump to %08X+%08X = %08X\n", + pr_debug("ie. jump to %08X+%08X = %08X\n", *(uint32_t *)location & 0x03fffffc, (uint32_t)location, (*(uint32_t *)location & 0x03fffffc) @@ -291,7 +288,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, break; default: - printk("%s: unknown ADD relocation: %u\n", + pr_err("%s: unknown ADD relocation: %u\n", module->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 077d2ce6c5a7..68384514506b 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -15,6 +15,9 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/elf.h> #include <linux/moduleloader.h> @@ -36,11 +39,6 @@ Using a magic allocator which places modules within 32MB solves this, and makes other things simpler. Anton? --RR. */ -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt , ...) -#endif #if defined(_CALL_ELF) && _CALL_ELF == 2 #define R2_STACK_OFFSET 24 @@ -279,8 +277,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* Every relocated section... */ for (i = 1; i < hdr->e_shnum; i++) { if (sechdrs[i].sh_type == SHT_RELA) { - DEBUGP("Found relocations in section %u\n", i); - DEBUGP("Ptr: %p. Number: %lu\n", + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %Lu\n", (void *)sechdrs[i].sh_addr, sechdrs[i].sh_size / sizeof(Elf64_Rela)); @@ -304,7 +302,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs++; #endif - DEBUGP("Looks like a total of %lu stubs, max\n", relocs); + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -315,8 +313,17 @@ static void dedotify_versions(struct modversion_info *vers, struct modversion_info *end; for (end = (void *)vers + size; vers < end; vers++) - if (vers->name[0] == '.') + if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); +#ifdef ARCH_RELOCATES_KCRCTAB + /* The TOC symbol has no CRC computed. To avoid CRC + * check failing, we must force it to the expected + * value (see CRC check in module.c). + */ + if (!strcmp(vers->name, "TOC.")) + vers->crc = -(unsigned long)reloc_start; +#endif + } } /* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */ @@ -381,7 +388,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, } if (!me->arch.stubs_section) { - printk("%s: doesn't contain .stubs.\n", me->name); + pr_err("%s: doesn't contain .stubs.\n", me->name); return -ENOEXEC; } @@ -425,11 +432,11 @@ static inline int create_stub(Elf64_Shdr *sechdrs, /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - printk("%s: Address %p of stub out of range of %p.\n", + pr_err("%s: Address %p of stub out of range of %p.\n", me->name, (void *)reladdr, (void *)my_r2); return 0; } - DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr); + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); @@ -468,7 +475,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { - printk("%s: Expect noop after relocate, got %08x\n", + pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } @@ -489,7 +496,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, unsigned long *location; unsigned long value; - DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); /* First time we're called, we can fix up .TOC. */ @@ -510,7 +517,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rela[i].r_info); - DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n", + pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n", location, (long)ELF64_R_TYPE(rela[i].r_info), strtab + sym->st_name, (unsigned long)sym->st_value, (long)rela[i].r_addend); @@ -537,7 +544,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if (value + 0x8000 > 0xffff) { - printk("%s: bad TOC16 relocation (%lu)\n", + pr_err("%s: bad TOC16 relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -558,7 +565,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0 || value + 0x8000 > 0xffff) { - printk("%s: bad TOC16_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -571,7 +578,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Subtract TOC pointer */ value -= my_r2(sechdrs, me); if ((value & 3) != 0) { - printk("%s: bad TOC16_LO_DS relocation (%lu)\n", + pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n", me->name, value); return -ENOEXEC; } @@ -604,7 +611,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Convert value to relative */ value -= (unsigned long)location; if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){ - printk("%s: REL24 %li out of range!\n", + pr_err("%s: REL24 %li out of range!\n", me->name, (long int)value); return -ENOEXEC; } @@ -646,7 +653,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, break; default: - printk("%s: Unknown ADD relocation: %lu\n", + pr_err("%s: Unknown ADD relocation: %lu\n", me->name, (unsigned long)ELF64_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 28b898e68185..34f7c9b7cd96 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -567,7 +567,7 @@ static int __init nvram_init(void) return rc; } -void __exit nvram_cleanup(void) +static void __exit nvram_cleanup(void) { misc_deregister( &nvram_dev ); } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index a7b743076720..f87bc1b4bdda 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -97,7 +97,7 @@ static int of_pci_phb_probe(struct platform_device *dev) return 0; } -static struct of_device_id of_pci_phb_ids[] = { +static const struct of_device_id of_pci_phb_ids[] = { { .type = "pci", }, { .type = "pcix", }, { .type = "pcie", }, diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b49c72fd7f16..bd70a51d5747 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -123,21 +123,12 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, void pcibios_reset_secondary_bus(struct pci_dev *dev) { - u16 ctrl; - if (ppc_md.pcibios_reset_secondary_bus) { ppc_md.pcibios_reset_secondary_bus(dev); return; } - pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - msleep(2); - - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - ssleep(1); + pci_reset_secondary_bus(dev); } static resource_size_t pcibios_io_size(const struct pci_controller *hose) @@ -1149,7 +1140,7 @@ static int reparent_resources(struct resource *parent, * as well. */ -void pcibios_allocate_bus_resources(struct pci_bus *bus) +static void pcibios_allocate_bus_resources(struct pci_bus *bus) { struct pci_bus *b; int i; @@ -1570,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8) EARLY_PCI_OP(write, word, u16) EARLY_PCI_OP(write, dword, u32) -extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); int early_find_capability(struct pci_controller *hose, int bus, int devfn, int cap) { diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 44562aa97f16..e6245e9c7d8d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -unsigned int pci_parse_of_flags(u32 addr0, int bridge) +static unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 48d17d6fca5b..c4dfff6c2719 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -1,207 +1,41 @@ -#include <linux/export.h> -#include <linux/threads.h> -#include <linux/smp.h> -#include <linux/sched.h> -#include <linux/elfcore.h> -#include <linux/string.h> -#include <linux/interrupt.h> -#include <linux/screen_info.h> -#include <linux/vt_kern.h> -#include <linux/nvram.h> -#include <linux/irq.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/bitops.h> +#include <linux/ftrace.h> +#include <linux/mm.h> -#include <asm/page.h> #include <asm/processor.h> -#include <asm/cacheflush.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <linux/atomic.h> -#include <asm/checksum.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <linux/adb.h> -#include <linux/cuda.h> -#include <linux/pmu.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/dma.h> -#include <asm/machdep.h> -#include <asm/hw_irq.h> -#include <asm/nvram.h> -#include <asm/mmu_context.h> -#include <asm/backlight.h> -#include <asm/time.h> -#include <asm/cputable.h> -#include <asm/btext.h> -#include <asm/div64.h> -#include <asm/signal.h> -#include <asm/dcr.h> -#include <asm/ftrace.h> #include <asm/switch_to.h> +#include <asm/cacheflush.h> #include <asm/epapr_hcalls.h> -#ifdef CONFIG_PPC32 -extern void transfer_to_handler(void); -extern void do_IRQ(struct pt_regs *regs); -extern void machine_check_exception(struct pt_regs *regs); -extern void alignment_exception(struct pt_regs *regs); -extern void program_check_exception(struct pt_regs *regs); -extern void single_step_exception(struct pt_regs *regs); -extern int sys_sigreturn(struct pt_regs *regs); +EXPORT_SYMBOL(flush_dcache_range); +EXPORT_SYMBOL(flush_icache_range); -EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(ISA_DMA_THRESHOLD); -EXPORT_SYMBOL(DMA_MODE_READ); -EXPORT_SYMBOL(DMA_MODE_WRITE); +EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(transfer_to_handler); -EXPORT_SYMBOL(do_IRQ); -EXPORT_SYMBOL(machine_check_exception); -EXPORT_SYMBOL(alignment_exception); -EXPORT_SYMBOL(program_check_exception); -EXPORT_SYMBOL(single_step_exception); -EXPORT_SYMBOL(sys_sigreturn); -#endif +long long __bswapdi2(long long); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); - -#ifndef CONFIG_GENERIC_CSUM -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); -#endif - -EXPORT_SYMBOL(__copy_tofrom_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(copy_page); - -#if defined(CONFIG_PCI) && defined(CONFIG_PPC32) -EXPORT_SYMBOL(isa_io_base); -EXPORT_SYMBOL(isa_mem_base); -EXPORT_SYMBOL(pci_dram_offset); -#endif /* CONFIG_PCI */ - -EXPORT_SYMBOL(start_thread); - #ifdef CONFIG_PPC_FPU EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif + #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -EXPORT_SYMBOL(giveup_vsx); -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE -EXPORT_SYMBOL(giveup_spe); -#endif /* CONFIG_SPE */ - -#ifndef CONFIG_PPC64 -EXPORT_SYMBOL(flush_instruction_cache); #endif -EXPORT_SYMBOL(flush_dcache_range); -EXPORT_SYMBOL(flush_icache_range); -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(smp_hw_index); -#endif -#endif - -#ifdef CONFIG_ADB -EXPORT_SYMBOL(adb_request); -EXPORT_SYMBOL(adb_register); -EXPORT_SYMBOL(adb_unregister); -EXPORT_SYMBOL(adb_poll); -EXPORT_SYMBOL(adb_try_handler_change); -#endif /* CONFIG_ADB */ -#ifdef CONFIG_ADB_CUDA -EXPORT_SYMBOL(cuda_request); -EXPORT_SYMBOL(cuda_poll); -#endif /* CONFIG_ADB_CUDA */ -EXPORT_SYMBOL(to_tm); - -#ifdef CONFIG_PPC32 -long long __ashrdi3(long long, int); -long long __ashldi3(long long, int); -long long __lshrdi3(long long, int); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__lshrdi3); -int __ucmpdi2(unsigned long long, unsigned long long); -EXPORT_SYMBOL(__ucmpdi2); -int __cmpdi2(long long, long long); -EXPORT_SYMBOL(__cmpdi2); -#endif -long long __bswapdi2(long long); -EXPORT_SYMBOL(__bswapdi2); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memchr); - -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(tb_ticks_per_jiffy); -EXPORT_SYMBOL(cacheable_memcpy); -EXPORT_SYMBOL(cacheable_memzero); -#endif - -#ifdef CONFIG_PPC32 -EXPORT_SYMBOL(switch_mmu_context); -#endif - -#ifdef CONFIG_PPC_STD_MMU_32 -extern long mol_trampoline; -EXPORT_SYMBOL(mol_trampoline); /* For MOL */ -EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ -#ifdef CONFIG_SMP -extern int mmu_hash_lock; -EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ -#endif /* CONFIG_SMP */ -extern long *intercept_table; -EXPORT_SYMBOL(intercept_table); -#endif /* CONFIG_PPC_STD_MMU_32 */ -#ifdef CONFIG_PPC_DCR_NATIVE -EXPORT_SYMBOL(__mtdcr); -EXPORT_SYMBOL(__mfdcr); -#endif -EXPORT_SYMBOL(empty_zero_page); - -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(__arch_hweight8); -EXPORT_SYMBOL(__arch_hweight16); -EXPORT_SYMBOL(__arch_hweight32); -EXPORT_SYMBOL(__arch_hweight64); +#ifdef CONFIG_VSX +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif -#ifdef CONFIG_PPC_BOOK3S_64 -EXPORT_SYMBOL_GPL(mmu_psize_defs); +#ifdef CONFIG_SPE +EXPORT_SYMBOL(giveup_spe); #endif #ifdef CONFIG_EPAPR_PARAVIRT diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c new file mode 100644 index 000000000000..30ddd8a24eee --- /dev/null +++ b/arch/powerpc/kernel/ppc_ksyms_32.c @@ -0,0 +1,61 @@ +#include <linux/export.h> +#include <linux/smp.h> + +#include <asm/page.h> +#include <asm/dma.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/time.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/dcr.h> + +EXPORT_SYMBOL(clear_pages); +EXPORT_SYMBOL(ISA_DMA_THRESHOLD); +EXPORT_SYMBOL(DMA_MODE_READ); +EXPORT_SYMBOL(DMA_MODE_WRITE); + +#if defined(CONFIG_PCI) +EXPORT_SYMBOL(isa_io_base); +EXPORT_SYMBOL(isa_mem_base); +EXPORT_SYMBOL(pci_dram_offset); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(smp_hw_index); +#endif + +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __lshrdi3(long long, int); +int __ucmpdi2(unsigned long long, unsigned long long); +int __cmpdi2(long long, long long); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__ucmpdi2); +EXPORT_SYMBOL(__cmpdi2); + +EXPORT_SYMBOL(timer_interrupt); +EXPORT_SYMBOL(tb_ticks_per_jiffy); + +EXPORT_SYMBOL(switch_mmu_context); + +#ifdef CONFIG_PPC_STD_MMU_32 +extern long mol_trampoline; +EXPORT_SYMBOL(mol_trampoline); /* For MOL */ +EXPORT_SYMBOL(flush_hash_pages); /* For MOL */ +#ifdef CONFIG_SMP +extern int mmu_hash_lock; +EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */ +#endif /* CONFIG_SMP */ +extern long *intercept_table; +EXPORT_SYMBOL(intercept_table); +#endif /* CONFIG_PPC_STD_MMU_32 */ + +#ifdef CONFIG_PPC_DCR_NATIVE +EXPORT_SYMBOL(__mtdcr); +EXPORT_SYMBOL(__mfdcr); +#endif + +EXPORT_SYMBOL(flush_instruction_cache); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index be99774d3f44..aa1df89c8b2a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -228,6 +228,7 @@ void giveup_vsx(struct task_struct *tsk) giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } +EXPORT_SYMBOL(giveup_vsx); void flush_vsx_to_thread(struct task_struct *tsk) { @@ -1095,6 +1096,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + unsigned long sp_vsid; + unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; + + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) + << SLB_VSID_SHIFT_1T; + else + sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT; + sp_vsid |= SLB_VSID_KERNEL | llp; + p->thread.ksp_vsid = sp_vsid; +#endif +} + /* * Copy a thread.. */ @@ -1174,21 +1192,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.vr_save_area = NULL; #endif -#ifdef CONFIG_PPC_STD_MMU_64 - if (mmu_has_feature(MMU_FTR_SLB)) { - unsigned long sp_vsid; - unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; + setup_ksp_vsid(p, sp); - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) - sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) - << SLB_VSID_SHIFT_1T; - else - sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M) - << SLB_VSID_SHIFT; - sp_vsid |= SLB_VSID_KERNEL | llp; - p->thread.ksp_vsid = sp_vsid; - } -#endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { p->thread.dscr_inherit = current->thread.dscr_inherit; @@ -1312,6 +1317,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ } +EXPORT_SYMBOL(start_thread); #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ | PR_FP_EXC_RES | PR_FP_EXC_INV) @@ -1577,7 +1583,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- Exception: %lx at %pS\n LR = %pS\n", + printk("--- interrupt: %lx at %pS\n LR = %pS\n", regs->trap, (void *)regs->nip, (void *)lr); firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0448b1e9c0ae..099f27e6d1b0 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -155,7 +155,6 @@ static struct ibm_pa_feature { } ibm_pa_features[] __initdata = { {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {0, MMU_FTR_SLB, 0, 0, 2, 0}, {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, @@ -387,8 +386,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_chosen_ppc(unsigned long node, + const char *uname, + int depth, void *data) { const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ @@ -642,6 +642,10 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%p)\n", params); + /* Too early to BUG_ON(), do it by hand */ + if (!early_init_dt_verify(params)) + panic("BUG: Failed verifying flat device tree, bad version?"); + /* Setup flat device-tree pointer */ initial_boot_params = params; @@ -660,25 +664,16 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); #endif - /* Pre-initialize the cmd_line with the content of boot_commmand_line, - * which will be empty except when the content of the variable has - * been overriden by a bootloading mechanism. This happens typically - * with HAL takeover - */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); - /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); + of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); /* Scan memory nodes and rebuild MEMBLOCKs */ of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); - /* Save command line for /proc/cmdline and then parse parameters */ - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ @@ -826,76 +821,6 @@ int cpu_to_chip_id(int cpu) } EXPORT_SYMBOL(cpu_to_chip_id); -#ifdef CONFIG_PPC_PSERIES -/* - * Fix up the uninitialized fields in a new device node: - * name, type and pci-specific fields - */ - -static int of_finish_dynamic_node(struct device_node *node) -{ - struct device_node *parent = of_get_parent(node); - int err = 0; - const phandle *ibm_phandle; - - node->name = of_get_property(node, "name", NULL); - node->type = of_get_property(node, "device_type", NULL); - - if (!node->name) - node->name = "<NULL>"; - if (!node->type) - node->type = "<NULL>"; - - if (!parent) { - err = -ENODEV; - goto out; - } - - /* We don't support that function on PowerMac, at least - * not yet - */ - if (machine_is(powermac)) - return -ENODEV; - - /* fix up new node's phandle field */ - if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->phandle = *ibm_phandle; - -out: - of_node_put(parent); - return err; -} - -static int prom_reconfig_notifier(struct notifier_block *nb, - unsigned long action, void *node) -{ - int err; - - switch (action) { - case OF_RECONFIG_ATTACH_NODE: - err = of_finish_dynamic_node(node); - if (err < 0) - printk(KERN_ERR "finish_node returned %d\n", err); - break; - default: - err = 0; - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block prom_reconfig_nb = { - .notifier_call = prom_reconfig_notifier, - .priority = 10, /* This one needs to run first */ -}; - -static int __init prom_reconfig_setup(void) -{ - return of_reconfig_notifier_register(&prom_reconfig_nb); -} -__initcall(prom_reconfig_setup); -#endif - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return (int)phys_id == get_hard_smp_processor_id(cpu); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 078145acf7fb..1a85d8f96739 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1268,201 +1268,6 @@ static u64 __initdata prom_opal_base; static u64 __initdata prom_opal_entry; #endif -#ifdef __BIG_ENDIAN__ -/* XXX Don't change this structure without updating opal-takeover.S */ -static struct opal_secondary_data { - s64 ack; /* 0 */ - u64 go; /* 8 */ - struct opal_takeover_args args; /* 16 */ -} opal_secondary_data; - -static u64 __initdata prom_opal_align; -static u64 __initdata prom_opal_size; -static int __initdata prom_rtas_start_cpu; -static u64 __initdata prom_rtas_data; -static u64 __initdata prom_rtas_entry; - -extern char opal_secondary_entry; - -static void __init prom_query_opal(void) -{ - long rc; - - /* We must not query for OPAL presence on a machine that - * supports TNK takeover (970 blades), as this uses the same - * h-call with different arguments and will crash - */ - if (PHANDLE_VALID(call_prom("finddevice", 1, 1, - ADDR("/tnk-memory-map")))) { - prom_printf("TNK takeover detected, skipping OPAL check\n"); - return; - } - - prom_printf("Querying for OPAL presence... "); - - rc = opal_query_takeover(&prom_opal_size, - &prom_opal_align); - prom_debug("(rc = %ld) ", rc); - if (rc != 0) { - prom_printf("not there.\n"); - return; - } - of_platform = PLATFORM_OPAL; - prom_printf(" there !\n"); - prom_debug(" opal_size = 0x%lx\n", prom_opal_size); - prom_debug(" opal_align = 0x%lx\n", prom_opal_align); - if (prom_opal_align < 0x10000) - prom_opal_align = 0x10000; -} - -static int __init prom_rtas_call(int token, int nargs, int nret, - int *outputs, ...) -{ - struct rtas_args rtas_args; - va_list list; - int i; - - rtas_args.token = token; - rtas_args.nargs = nargs; - rtas_args.nret = nret; - rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]); - va_start(list, outputs); - for (i = 0; i < nargs; ++i) - rtas_args.args[i] = va_arg(list, rtas_arg_t); - va_end(list); - - for (i = 0; i < nret; ++i) - rtas_args.rets[i] = 0; - - opal_enter_rtas(&rtas_args, prom_rtas_data, - prom_rtas_entry); - - if (nret > 1 && outputs != NULL) - for (i = 0; i < nret-1; ++i) - outputs[i] = rtas_args.rets[i+1]; - return (nret > 0)? rtas_args.rets[0]: 0; -} - -static void __init prom_opal_hold_cpus(void) -{ - int i, cnt, cpu, rc; - long j; - phandle node; - char type[64]; - u32 servers[8]; - void *entry = (unsigned long *)&opal_secondary_entry; - struct opal_secondary_data *data = &opal_secondary_data; - - prom_debug("prom_opal_hold_cpus: start...\n"); - prom_debug(" - entry = 0x%x\n", entry); - prom_debug(" - data = 0x%x\n", data); - - data->ack = -1; - data->go = 0; - - /* look for cpus */ - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) - continue; - - /* Skip non-configured cpus. */ - if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) - continue; - - cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, - sizeof(servers)); - if (cnt == PROM_ERROR) - break; - cnt >>= 2; - for (i = 0; i < cnt; i++) { - cpu = servers[i]; - prom_debug("CPU %d ... ", cpu); - if (cpu == prom.cpu) { - prom_debug("booted !\n"); - continue; - } - prom_debug("starting ... "); - - /* Init the acknowledge var which will be reset by - * the secondary cpu when it awakens from its OF - * spinloop. - */ - data->ack = -1; - rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1, - NULL, cpu, entry, data); - prom_debug("rtas rc=%d ...", rc); - - for (j = 0; j < 100000000 && data->ack == -1; j++) { - HMT_low(); - mb(); - } - HMT_medium(); - if (data->ack != -1) - prom_debug("done, PIR=0x%x\n", data->ack); - else - prom_debug("timeout !\n"); - } - } - prom_debug("prom_opal_hold_cpus: end...\n"); -} - -static void __init prom_opal_takeover(void) -{ - struct opal_secondary_data *data = &opal_secondary_data; - struct opal_takeover_args *args = &data->args; - u64 align = prom_opal_align; - u64 top_addr, opal_addr; - - args->k_image = (u64)_stext; - args->k_size = _end - _stext; - args->k_entry = 0; - args->k_entry2 = 0x60; - - top_addr = _ALIGN_UP(args->k_size, align); - - if (prom_initrd_start != 0) { - args->rd_image = prom_initrd_start; - args->rd_size = prom_initrd_end - args->rd_image; - args->rd_loc = top_addr; - top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); - } - - /* Pickup an address for the HAL. We want to go really high - * up to avoid problem with future kexecs. On the other hand - * we don't want to be all over the TCEs on P5IOC2 machines - * which are going to be up there too. We assume the machine - * has plenty of memory, and we ask for the HAL for now to - * be just below the 1G point, or above the initrd - */ - opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align); - if (opal_addr < top_addr) - opal_addr = top_addr; - args->hal_addr = opal_addr; - - /* Copy the command line to the kernel image */ - strlcpy(boot_command_line, prom_cmd_line, - COMMAND_LINE_SIZE); - - prom_debug(" k_image = 0x%lx\n", args->k_image); - prom_debug(" k_size = 0x%lx\n", args->k_size); - prom_debug(" k_entry = 0x%lx\n", args->k_entry); - prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2); - prom_debug(" hal_addr = 0x%lx\n", args->hal_addr); - prom_debug(" rd_image = 0x%lx\n", args->rd_image); - prom_debug(" rd_size = 0x%lx\n", args->rd_size); - prom_debug(" rd_loc = 0x%lx\n", args->rd_loc); - prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); - prom_close_stdin(); - mb(); - data->go = 1; - for (;;) - opal_do_takeover(args); -} -#endif /* __BIG_ENDIAN__ */ - /* * Allocate room for and instantiate OPAL */ @@ -1597,12 +1402,6 @@ static void __init prom_instantiate_rtas(void) &val, sizeof(val)) != PROM_ERROR) rtas_has_query_cpu_stopped = true; -#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) - /* PowerVN takeover hack */ - prom_rtas_data = base; - prom_rtas_entry = entry; - prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4); -#endif prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); prom_debug("rtas size = 0x%x\n", (long)size); @@ -3027,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV -#ifdef __BIG_ENDIAN__ - /* Detect HAL and try instanciating it & doing takeover */ - if (of_platform == PLATFORM_PSERIES_LPAR) { - prom_query_opal(); - if (of_platform == PLATFORM_OPAL) { - prom_opal_hold_cpus(); - prom_opal_takeover(); - } - } else -#endif /* __BIG_ENDIAN__ */ if (of_platform == PLATFORM_OPAL) prom_instantiate_opal(); #endif /* CONFIG_PPC_POWERNV */ diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 77aa1e95e904..12640f7e726b 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext -opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line __prom_init_toc_start __prom_init_toc_end -btext_setup_display TOC." +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." NM="$1" OBJ="$2" @@ -52,24 +50,14 @@ do done # ignore register save/restore funcitons - if [ "${UNDEF:0:9}" = "_restgpr_" ]; then + case $UNDEF in + _restgpr_*|_restgpr0_*|_rest32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then - OK=1 - fi - if [ "${UNDEF:0:9}" = "_savegpr_" ]; then + ;; + _savegpr_*|_savegpr0_*|_save32gpr_*) OK=1 - fi - if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then - OK=1 - fi - if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then - OK=1 - fi + ;; + esac if [ $OK -eq 0 ]; then ERROR=1 diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2e3d2bf536c5..cdb404ea3468 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -932,7 +932,7 @@ void ptrace_triggered(struct perf_event *bp, } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 658e89d2025b..db2b482af658 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)__pa(f->blocks[i].data); + f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data)); image_size += f->blocks[i].length; + f->blocks[i].length = cpu_to_be64(f->blocks[i].length); } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)__pa(f->next); + f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next)); else f->next = NULL; /* make num_blocks into the version/length field */ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + f->num_blocks = cpu_to_be64(f->num_blocks); } printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e736387fee6a..5a2c049c1c61 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -286,7 +286,7 @@ static void prrn_work_fn(struct work_struct *work) static DECLARE_WORK(prrn_work, prrn_work_fn); -void prrn_schedule_update(u32 scope) +static void prrn_schedule_update(u32 scope) { flush_work(&prrn_work); prrn_update_scope = scope; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1bb4dcde0dcc..1362cd62b3fa 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -81,8 +81,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); unsigned long klimit = (unsigned long) _end; -char cmd_line[COMMAND_LINE_SIZE]; - /* * This still seems to be needed... -- paulus */ @@ -94,6 +92,9 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 16 }; +#if defined(CONFIG_FB_VGA16_MODULE) +EXPORT_SYMBOL(screen_info); +#endif /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; @@ -382,7 +383,7 @@ void __init check_for_initrd(void) initrd_start = initrd_end = 0; if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); DBG(" <- check_for_initrd()\n"); #endif /* CONFIG_BLK_DEV_INITRD */ @@ -471,9 +472,17 @@ void __init smp_setup_cpu_maps(void) nthreads = len / sizeof(int); for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + bool avail; + DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, of_device_is_available(dn)); + + avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); + + set_cpu_present(cpu, avail); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ea4fda60e57b..07831ed0d9ef 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -268,7 +268,7 @@ static void __init exc_lvl_early_init(void) /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* so udelay does something sensible, assume <= 1000 bogomips */ loops_per_jiffy = 500000000 / HZ; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6d06947e7d21..cd07d79ad21c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -149,13 +149,13 @@ static void check_smt_enabled(void) else if (!strcmp(smt_enabled_cmdline, "off")) smt_enabled_at_boot = 0; else { - long smt; + int smt; int rc; - rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + rc = kstrtoint(smt_enabled_cmdline, 10, &smt); if (!rc) smt_enabled_at_boot = - min(threads_per_core, (int)smt); + min(threads_per_core, smt); } } else { dn = of_find_node_by_path("/options"); @@ -201,7 +201,11 @@ static void cpu_ready_for_interrupts(void) /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; - /* Enable AIL if supported */ + /* + * Enable AIL if supported, and we are in hypervisor mode. If we are + * not in hypervisor mode, we enable relocation-on interrupts later + * in pSeries_setup_arch() using the H_SET_MODE hcall. + */ if (cpu_has_feature(CPU_FTR_HVMODE) && cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); @@ -521,21 +525,31 @@ void __init setup_system(void) printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); + printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); if (ppc64_caches.iline_size != 0x80) - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); + printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + + printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); + #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) - printk("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif /* CONFIG_PPC_STD_MMU_64 */ + printk("htab_address = 0x%p\n", htab_address); + + printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + if (PHYSICAL_START > 0) - printk("physical_start = 0x%llx\n", + printk("physical_start = 0x%llx\n", (unsigned long long)PHYSICAL_START); printk("-----------------------------------------------------\n"); @@ -653,7 +667,7 @@ void __init setup_arch(char **cmdline_p) { ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = cmd_line; + *cmdline_p = boot_command_line; /* * Set cache line size based on type of cpu as a default. @@ -677,9 +691,6 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); -#ifdef CONFIG_PPC_STD_MMU_64 - stabs_alloc(); -#endif /* set up the bootmem stuff with available memory */ do_init_bootmem(); sparse_init(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 1c794cef2883..cf8c7e4e0b21 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -31,20 +31,14 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, +void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32) { unsigned long oldsp, newsp; /* Default to using normal stack */ oldsp = get_clean_sp(sp, is_32); - - /* Check for alt stack */ - if ((ka->sa.sa_flags & SA_ONSTACK) && - current->sas_ss_size && !on_sig_stack(oldsp)) - oldsp = (current->sas_ss_sp + current->sas_ss_size); - - /* Get aligned frame */ + oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; /* Check access */ @@ -105,25 +99,23 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -static int do_signal(struct pt_regs *regs) +static void do_signal(struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - siginfo_t info; - int signr; - struct k_sigaction ka; + struct ksignal ksig; int ret; int is32 = is_32bit_task(); - signr = get_signal_to_deliver(&info, &ka, regs, NULL); + get_signal(&ksig); /* Is there any syscall restart business here ? */ - check_syscall_restart(regs, &ka, signr > 0); + check_syscall_restart(regs, &ksig.ka, ksig.sig > 0); - if (signr <= 0) { + if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); regs->trap = 0; - return 0; /* no signals delivered */ + return; /* no signals delivered */ } #ifndef CONFIG_PPC_ADV_DEBUG_REGS @@ -140,23 +132,16 @@ static int do_signal(struct pt_regs *regs) thread_change_pc(current, regs); if (is32) { - if (ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal32(signr, &ka, &info, oldset, - regs); + if (ksig.ka.sa.sa_flags & SA_SIGINFO) + ret = handle_rt_signal32(&ksig, oldset, regs); else - ret = handle_signal32(signr, &ka, &info, oldset, - regs); + ret = handle_signal32(&ksig, oldset, regs); } else { - ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); + ret = handle_rt_signal64(&ksig, oldset, regs); } regs->trap = 0; - if (ret) { - signal_delivered(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - } - - return ret; + signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index c69b9aeb9f23..51b274199dd9 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,15 +12,13 @@ extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); -extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, +extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); -extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, +extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, +extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); extern unsigned long copy_fpr_to_user(void __user *to, @@ -44,14 +42,12 @@ extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, #ifdef CONFIG_PPC64 -extern int handle_rt_signal64(int signr, struct k_sigaction *ka, - siginfo_t *info, sigset_t *set, +extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); #else /* CONFIG_PPC64 */ -static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, - siginfo_t *info, sigset_t *set, +static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { return -EFAULT; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 4e47db686b5d..b171001698ff 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -54,7 +54,6 @@ #include "signal.h" -#undef DEBUG_SIG #ifdef CONFIG_PPC64 #define sys_rt_sigreturn compat_sys_rt_sigreturn @@ -982,9 +981,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) * Set up a signal frame for a "real-time" signal handler * (one which gets siginfo). */ -int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, - struct pt_regs *regs) +int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, + struct pt_regs *regs) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; @@ -996,13 +994,13 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, info) + if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) || __put_user(0, &rt_sf->uc.uc_flags) || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), @@ -1052,21 +1050,17 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Fill registers for signal handler */ regs->gpr[1] = newsp; - regs->gpr[3] = sig; + regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); - return 1; + return 0; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " @@ -1074,8 +1068,7 @@ badframe: current->comm, current->pid, addr, regs->nip, regs->link); - force_sigsegv(sig, current); - return 0; + return 1; } static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sig) @@ -1414,8 +1407,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, /* * OK, we're invoking a handler */ -int handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs) { struct sigcontext __user *sc; struct sigframe __user *frame; @@ -1425,7 +1417,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, unsigned long tramp; /* Set up Signal Frame */ - frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1); + frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -1433,7 +1425,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, #if _NSIG != 64 #error "Please adjust handle_signal()" #endif - if (__put_user(to_user_ptr(ka->sa.sa_handler), &sc->handler) + if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) || __put_user(oldset->sig[0], &sc->oldmask) #ifdef CONFIG_PPC64 || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) @@ -1441,7 +1433,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, || __put_user(oldset->sig[1], &sc->_unused[3]) #endif || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(sig, &sc->signal)) + || __put_user(ksig->sig, &sc->signal)) goto badframe; if (vdso32_sigtramp && current->mm->context.vdso_base) { @@ -1476,18 +1468,14 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, goto badframe; regs->gpr[1] = newsp; - regs->gpr[3] = sig; + regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - return 1; + return 0; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_signal32: " @@ -1495,8 +1483,7 @@ badframe: current->comm, current->pid, frame, regs->nip, regs->link); - force_sigsegv(sig, current); - return 0; + return 1; } /* diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d501dc4dc3e6..2cb0c94cafa5 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -38,7 +38,6 @@ #include "signal.h" -#define DEBUG_SIG 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) @@ -700,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, return 0; badframe: -#if DEBUG_SIG - printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", - regs, uc, &uc->uc_mcontext); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", @@ -713,20 +708,19 @@ badframe: return 0; } -int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0); + frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_siginfo_to_user(&frame->info, &ksig->info); if (err) goto badframe; @@ -741,15 +735,15 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, - regs, signr, + regs, ksig->sig, NULL, - (unsigned long)ka->sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler); } else #endif { err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, - NULL, (unsigned long)ka->sa.sa_handler, + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig, + NULL, (unsigned long)ksig->ka.sa.sa_handler, 1); } err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -775,7 +769,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ksig->ka.sa.sa_handler; regs->gpr[12] = regs->nip; } else { /* Handler is *really* a pointer to the function descriptor for @@ -784,7 +778,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, * entry is the TOC value we need to use. */ func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ka->sa.sa_handler; + (func_descr_t __user *) ksig->ka.sa.sa_handler; err |= get_user(regs->nip, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); @@ -794,9 +788,9 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); regs->gpr[1] = newsp; - regs->gpr[3] = signr; + regs->gpr[3] = ksig->sig; regs->result = 0; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); regs->gpr[6] = (unsigned long) frame; @@ -806,18 +800,13 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, if (err) goto badframe; - return 1; + return 0; badframe: -#if DEBUG_SIG - printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); - force_sigsegv(signr, current); - return 0; + return 1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 51a3ff78838a..71e186d5f331 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -52,6 +52,7 @@ #endif #include <asm/vdso.h> #include <asm/debug.h> +#include <asm/kexec.h> #ifdef DEBUG #include <asm/udbg.h> @@ -376,6 +377,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + /* + * numa_node_id() works after this. + */ + if (cpu_present(cpu)) { + set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); + set_cpu_numa_mem(cpu, + local_memory_node(numa_cpu_lookup_table[cpu])); + } } cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); @@ -723,9 +732,6 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); - /* - * numa_node_id() works after this. - */ set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); @@ -747,7 +753,7 @@ int setup_profiling_timer(unsigned int multiplier) #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ -static const int powerpc_smt_flags(void) +static int powerpc_smt_flags(void) { int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 895c50ca943c..7ab5d434e2ee 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -39,9 +39,6 @@ .section .rodata,"a" #ifdef CONFIG_PPC64 -#define sys_sigpending sys_ni_syscall -#define sys_old_getrlimit sys_ni_syscall - .p2align 3 #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9fff9cdcc519..7505599c2593 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -479,7 +479,7 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -void __timer_interrupt(void) +static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); @@ -643,7 +643,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) return found; } -void start_cpu_decrementer(void) +static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Clear any pending timer interrupts */ @@ -741,7 +741,7 @@ static cycle_t timebase_read(struct clocksource *cs) } void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) + struct clocksource *clock, u32 mult, cycle_t cycle_last) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; @@ -774,7 +774,7 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, * We expect the caller to have done the first increment of * vdso_data->tb_update_count already. */ - vdso_data->tb_orig_stamp = clock->cycle_last; + vdso_data->tb_orig_stamp = cycle_last; vdso_data->stamp_xsec = new_stamp_xsec; vdso_data->tb_to_xs = new_tb_to_xs; vdso_data->wtom_clock_sec = wtm->tv_sec; @@ -1024,6 +1024,7 @@ void to_tm(int tim, struct rtc_time * tm) */ GregorianDay(tm); } +EXPORT_SYMBOL(to_tm); /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index cb9cfe448ee8..0dc43f9932cf 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -302,6 +302,16 @@ long machine_check_early(struct pt_regs *regs) return handled; } +long hmi_exception_realmode(struct pt_regs *regs) +{ + __get_cpu_var(irq_stat).hmi_exceptions++; + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(regs); + + return 0; +} + #endif /* @@ -738,6 +748,20 @@ void SMIException(struct pt_regs *regs) die("System Management Interrupt", regs, SIGABRT); } +void handle_hmi_exception(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + + if (ppc_md.handle_hmi_exception) + ppc_md.handle_hmi_exception(regs); + + irq_exit(); + set_irq_regs(old_regs); +} + void unknown_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ce74c335a6a4..f174351842cf 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -840,19 +840,3 @@ static int __init vdso_init(void) return 0; } arch_initcall(vdso_init); - -int in_gate_area_no_mm(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return NULL; -} - diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 904c66128fae..5bfdab9047be 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -977,7 +977,7 @@ static ssize_t viodev_cmo_desired_set(struct device *dev, size_t new_desired; int ret; - ret = strict_strtoul(buf, 10, &new_desired); + ret = kstrtoul(buf, 10, &new_desired); if (ret) return ret; diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c deleted file mode 100644 index 9cb4b0a36031..000000000000 --- a/arch/powerpc/kvm/44x.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/kvm_host.h> -#include <linux/slab.h> -#include <linux/err.h> -#include <linux/export.h> -#include <linux/module.h> -#include <linux/miscdevice.h> - -#include <asm/reg.h> -#include <asm/cputable.h> -#include <asm/tlbflush.h> -#include <asm/kvm_44x.h> -#include <asm/kvm_ppc.h> - -#include "44x_tlb.h" -#include "booke.h" - -static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) -{ - kvmppc_booke_vcpu_load(vcpu, cpu); - kvmppc_44x_tlb_load(vcpu); -} - -static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) -{ - kvmppc_44x_tlb_put(vcpu); - kvmppc_booke_vcpu_put(vcpu); -} - -int kvmppc_core_check_processor_compat(void) -{ - int r; - - if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0) - r = 0; - else - r = -ENOTSUPP; - - return r; -} - -int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; - int i; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - /* XXX CCR1 doesn't exist on all 440 SoCs. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) - vcpu_44x->shadow_refs[i].gtlb_index = -1; - - vcpu->arch.cpu_type = KVM_CPU_440; - vcpu->arch.pvr = mfspr(SPRN_PVR); - - return 0; -} - -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - -static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_get_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) -{ - return kvmppc_set_sregs_ivor(vcpu, sregs); -} - -static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, - union kvmppc_one_reg *val) -{ - return -EINVAL; -} - -static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, - unsigned int id) -{ - struct kvmppc_vcpu_44x *vcpu_44x; - struct kvm_vcpu *vcpu; - int err; - - vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu_44x) { - err = -ENOMEM; - goto out; - } - - vcpu = &vcpu_44x->vcpu; - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - - vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); - if (!vcpu->arch.shared) - goto uninit_vcpu; - - return vcpu; - -uninit_vcpu: - kvm_vcpu_uninit(vcpu); -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -out: - return ERR_PTR(err); -} - -static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - - free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu_44x); -} - -static int kvmppc_core_init_vm_44x(struct kvm *kvm) -{ - return 0; -} - -static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) -{ -} - -static struct kvmppc_ops kvm_ops_44x = { - .get_sregs = kvmppc_core_get_sregs_44x, - .set_sregs = kvmppc_core_set_sregs_44x, - .get_one_reg = kvmppc_get_one_reg_44x, - .set_one_reg = kvmppc_set_one_reg_44x, - .vcpu_load = kvmppc_core_vcpu_load_44x, - .vcpu_put = kvmppc_core_vcpu_put_44x, - .vcpu_create = kvmppc_core_vcpu_create_44x, - .vcpu_free = kvmppc_core_vcpu_free_44x, - .mmu_destroy = kvmppc_mmu_destroy_44x, - .init_vm = kvmppc_core_init_vm_44x, - .destroy_vm = kvmppc_core_destroy_vm_44x, - .emulate_op = kvmppc_core_emulate_op_44x, - .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, - .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, -}; - -static int __init kvmppc_44x_init(void) -{ - int r; - - r = kvmppc_booke_init(); - if (r) - goto err_out; - - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); - if (r) - goto err_out; - kvm_ops_44x.owner = THIS_MODULE; - kvmppc_pr_ops = &kvm_ops_44x; - -err_out: - return r; -} - -static void __exit kvmppc_44x_exit(void) -{ - kvmppc_pr_ops = NULL; - kvmppc_booke_exit(); -} - -module_init(kvmppc_44x_init); -module_exit(kvmppc_44x_exit); -MODULE_ALIAS_MISCDEV(KVM_MINOR); -MODULE_ALIAS("devname:kvm"); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c deleted file mode 100644 index 92c9ab4bcfec..000000000000 --- a/arch/powerpc/kvm/44x_emulate.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <asm/kvm_ppc.h> -#include <asm/dcr.h> -#include <asm/dcr-regs.h> -#include <asm/disassemble.h> -#include <asm/kvm_44x.h> -#include "timing.h" - -#include "booke.h" -#include "44x_tlb.h" - -#define XOP_MFDCRX 259 -#define XOP_MFDCR 323 -#define XOP_MTDCRX 387 -#define XOP_MTDCR 451 -#define XOP_TLBSX 914 -#define XOP_ICCCI 966 -#define XOP_TLBWE 978 - -static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) -{ - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - return EMULATE_DONE; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); - vcpu->run->dcr.is_write = 1; - vcpu->arch.dcr_is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } -} - -static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) -{ - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - vcpu->run->dcr.dcrn = dcrn; - vcpu->run->dcr.data = 0; - vcpu->run->dcr.is_write = 0; - vcpu->arch.dcr_is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - return EMULATE_DO_DCR; - } - - return EMULATE_DONE; -} - -int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance) -{ - int emulated = EMULATE_DONE; - int dcrn = get_dcrn(inst); - int ra = get_ra(inst); - int rb = get_rb(inst); - int rc = get_rc(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int ws = get_ws(inst); - - switch (get_op(inst)) { - case 31: - switch (get_xop(inst)) { - - case XOP_MFDCR: - emulated = emulate_mfdcr(vcpu, rt, dcrn); - break; - - case XOP_MFDCRX: - emulated = emulate_mfdcr(vcpu, rt, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_MTDCR: - emulated = emulate_mtdcr(vcpu, rs, dcrn); - break; - - case XOP_MTDCRX: - emulated = emulate_mtdcr(vcpu, rs, - kvmppc_get_gpr(vcpu, ra)); - break; - - case XOP_TLBWE: - emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); - break; - - case XOP_TLBSX: - emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); - break; - - case XOP_ICCCI: - break; - - default: - emulated = EMULATE_FAIL; - } - - break; - - default: - emulated = EMULATE_FAIL; - } - - if (emulated == EMULATE_FAIL) - emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance); - - return emulated; -} - -int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - kvmppc_set_pid(vcpu, spr_val); break; - case SPRN_MMUCR: - vcpu->arch.mmucr = spr_val; break; - case SPRN_CCR0: - vcpu->arch.ccr0 = spr_val; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = spr_val; break; - default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); - } - - return emulated; -} - -int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) -{ - int emulated = EMULATE_DONE; - - switch (sprn) { - case SPRN_PID: - *spr_val = vcpu->arch.pid; break; - case SPRN_MMUCR: - *spr_val = vcpu->arch.mmucr; break; - case SPRN_CCR0: - *spr_val = vcpu->arch.ccr0; break; - case SPRN_CCR1: - *spr_val = vcpu->arch.ccr1; break; - default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); - } - - return emulated; -} - diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c deleted file mode 100644 index 0deef1082e02..000000000000 --- a/arch/powerpc/kvm/44x_tlb.c +++ /dev/null @@ -1,528 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/types.h> -#include <linux/string.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/highmem.h> - -#include <asm/tlbflush.h> -#include <asm/mmu-44x.h> -#include <asm/kvm_ppc.h> -#include <asm/kvm_44x.h> -#include "timing.h" - -#include "44x_tlb.h" -#include "trace.h" - -#ifndef PPC44x_TLBE_SIZE -#define PPC44x_TLBE_SIZE PPC44x_TLB_4K -#endif - -#define PAGE_SIZE_4K (1<<12) -#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) - -#define PPC44x_TLB_UATTR_MASK \ - (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) -#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) -#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) - -#ifdef DEBUG -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - tlbe = &vcpu_44x->guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} -#endif - -static inline void kvmppc_44x_tlbie(unsigned int index) -{ - /* 0 <= index < 64, so the V bit is clear and we can use the index as - * word0. */ - asm volatile( - "tlbwe %[index], %[index], 0\n" - : - : [index] "r"(index) - ); -} - -static inline void kvmppc_44x_tlbre(unsigned int index, - struct kvmppc_44x_tlbe *tlbe) -{ - asm volatile( - "tlbre %[word0], %[index], 0\n" - "mfspr %[tid], %[sprn_mmucr]\n" - "andi. %[tid], %[tid], 0xff\n" - "tlbre %[word1], %[index], 1\n" - "tlbre %[word2], %[index], 2\n" - : [word0] "=r"(tlbe->word0), - [word1] "=r"(tlbe->word1), - [word2] "=r"(tlbe->word2), - [tid] "=r"(tlbe->tid) - : [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - : "cc" - ); -} - -static inline void kvmppc_44x_tlbwe(unsigned int index, - struct kvmppc_44x_tlbe *stlbe) -{ - unsigned long tmp; - - asm volatile( - "mfspr %[tmp], %[sprn_mmucr]\n" - "rlwimi %[tmp], %[tid], 0, 0xff\n" - "mtspr %[sprn_mmucr], %[tmp]\n" - "tlbwe %[word0], %[index], 0\n" - "tlbwe %[word1], %[index], 1\n" - "tlbwe %[word2], %[index], 2\n" - : [tmp] "=&r"(tmp) - : [word0] "r"(stlbe->word0), - [word1] "r"(stlbe->word1), - [word2] "r"(stlbe->word2), - [tid] "r"(stlbe->tid), - [index] "r"(index), - [sprn_mmucr] "i"(SPRN_MMUCR) - ); -} - -static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) -{ - /* We only care about the guest's permission and user bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; - - if (!usermode) { - /* Guest is in supervisor mode, so we need to translate guest - * supervisor permissions into user permissions. */ - attrib &= ~PPC44x_TLB_USER_PERM_MASK; - attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; - } - - /* Make sure host can always access this memory. */ - attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; - - /* WIMGE = 0b00100 */ - attrib |= PPC44x_TLB_M; - - return attrib; -} - -/* Load shadow TLB back into hardware. */ -void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbwe(i, stlbe); - } -} - -static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int i) -{ - vcpu_44x->shadow_tlb_mod[i] = 1; -} - -/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ -void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) { - struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; - - if (vcpu_44x->shadow_tlb_mod[i]) - kvmppc_44x_tlbre(i, stlbe); - - if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) - kvmppc_44x_tlbie(i); - } -} - - -/* Search the guest TLB for a matching entry. */ -int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, - unsigned int as) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { - struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; - unsigned int tid; - - if (eaddr < get_tlb_eaddr(tlbe)) - continue; - - if (eaddr > get_tlb_end(tlbe)) - continue; - - tid = get_tlb_tid(tlbe); - if (tid && (tid != pid)) - continue; - - if (!get_tlb_v(tlbe)) - continue; - - if (get_tlb_ts(tlbe) != as) - continue; - - return i; - } - - return -1; -} - -gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, - gva_t eaddr) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - unsigned int pgmask = get_tlb_bytes(gtlbe) - 1; - - return get_tlb_raddr(gtlbe) | (eaddr & pgmask); -} - -int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) -{ - unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS); - - return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); -} - -void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu) -{ -} - -static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, - unsigned int stlb_index) -{ - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; - - if (!ref->page) - return; - - /* Discard from the TLB. */ - /* Note: we could actually invalidate a host mapping, if the host overwrote - * this TLB entry since we inserted a guest mapping. */ - kvmppc_44x_tlbie(stlb_index); - - /* Now release the page. */ - if (ref->writeable) - kvm_release_page_dirty(ref->page); - else - kvm_release_page_clean(ref->page); - - ref->page = NULL; - - /* XXX set tlb_44x_index to stlb_index? */ - - trace_kvm_stlb_inval(stlb_index); -} - -void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu_44x, i); -} - -/** - * kvmppc_mmu_map -- create a host mapping for guest memory - * - * If the guest wanted a larger page than the host supports, only the first - * host page is mapped here and the rest are demand faulted. - * - * If the guest wanted a smaller page than the host page size, we map only the - * guest-size page (i.e. not a full host page mapping). - * - * Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. - */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - unsigned int gtlb_index) -{ - struct kvmppc_44x_tlbe stlbe; - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; - struct kvmppc_44x_shadow_ref *ref; - struct page *new_page; - hpa_t hpaddr; - gfn_t gfn; - u32 asid = gtlbe->tid; - u32 flags = gtlbe->word2; - u32 max_bytes = get_tlb_bytes(gtlbe); - unsigned int victim; - - /* Select TLB entry to clobber. Indirectly guard against races with the TLB - * miss handler by disabling interrupts. */ - local_irq_disable(); - victim = ++tlb_44x_index; - if (victim > tlb_44x_hwater) - victim = 0; - tlb_44x_index = victim; - local_irq_enable(); - - /* Get reference to new page. */ - gfn = gpaddr >> PAGE_SHIFT; - new_page = gfn_to_page(vcpu->kvm, gfn); - if (is_error_page(new_page)) { - printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", - (unsigned long long)gfn); - return; - } - hpaddr = page_to_phys(new_page); - - /* Invalidate any previous shadow mappings. */ - kvmppc_44x_shadow_release(vcpu_44x, victim); - - /* XXX Make sure (va, size) doesn't overlap any other - * entries. 440x6 user manual says the result would be - * "undefined." */ - - /* XXX what about AS? */ - - /* Force TS=1 for all guest mappings. */ - stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; - - if (max_bytes >= PAGE_SIZE) { - /* Guest mapping is larger than or equal to host page size. We can use - * a "native" host mapping. */ - stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; - } else { - /* Guest mapping is smaller than host page size. We must restrict the - * size of the mapping to be at most the smaller of the two, but for - * simplicity we fall back to a 4K mapping (this is probably what the - * guest is using anyways). */ - stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; - - /* 'hpaddr' is a host page, which is larger than the mapping we're - * inserting here. To compensate, we must add the in-page offset to the - * sub-page. */ - hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); - } - - stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.shared->msr & MSR_PR); - stlbe.tid = !(asid & 0xff); - - /* Keep track of the reference so we can properly release it later. */ - ref = &vcpu_44x->shadow_refs[victim]; - ref->page = new_page; - ref->gtlb_index = gtlb_index; - ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); - ref->tid = stlbe.tid; - - /* Insert shadow mapping into hardware TLB. */ - kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); - kvmppc_44x_tlbwe(victim, &stlbe); - trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1, - stlbe.word2); -} - -/* For a particular guest TLB entry, invalidate the corresponding host TLB - * mappings and release the host pages. */ -static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, - unsigned int gtlb_index) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - if (ref->gtlb_index == gtlb_index) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) -{ - int usermode = vcpu->arch.shared->msr & MSR_PR; - - vcpu->arch.shadow_pid = !usermode; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - int i; - - if (unlikely(vcpu->arch.pid == new_pid)) - return; - - vcpu->arch.pid = new_pid; - - /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it - * can't access guest kernel mappings (TID=1). When we switch to a new - * guest PID, which will also use host PID=0, we must discard the old guest - * userspace mappings. */ - for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { - struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; - - if (ref->tid == 0) - kvmppc_44x_shadow_release(vcpu_44x, i); - } -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvmppc_44x_tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) -{ - struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - struct kvmppc_44x_tlbe *tlbe; - unsigned int gtlb_index; - int idx; - - gtlb_index = kvmppc_get_gpr(vcpu, ra); - if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { - printk("%s: index %d\n", __func__, gtlb_index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu_44x->guest_tlb[gtlb_index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ - if (tlbe->word0 & PPC44x_TLB_VALID) - kvmppc_44x_invalidate(vcpu, gtlb_index); - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = get_mmucr_stid(vcpu); - tlbe->word0 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = kvmppc_get_gpr(vcpu, rs); - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = kvmppc_get_gpr(vcpu, rs); - break; - - default: - return EMULATE_FAIL; - } - - idx = srcu_read_lock(&vcpu->kvm->srcu); - - if (tlbe_is_host_safe(vcpu, tlbe)) { - gva_t eaddr; - gpa_t gpaddr; - u32 bytes; - - eaddr = get_tlb_eaddr(tlbe); - gpaddr = get_tlb_raddr(tlbe); - - /* Use the advertised page size to mask effective and real addrs. */ - bytes = get_tlb_bytes(tlbe); - eaddr &= ~(bytes - 1); - gpaddr &= ~(bytes - 1); - - kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index); - } - - srcu_read_unlock(&vcpu->kvm->srcu, idx); - - trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); - return EMULATE_DONE; -} - -int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) -{ - u32 ea; - int gtlb_index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - - gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - u32 cr = kvmppc_get_cr(vcpu); - - if (gtlb_index < 0) - kvmppc_set_cr(vcpu, cr & ~0x20000000); - else - kvmppc_set_cr(vcpu, cr | 0x20000000); - } - kvmppc_set_gpr(vcpu, rt, gtlb_index); - - kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); - return EMULATE_DONE; -} diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h deleted file mode 100644 index a9ff80e51526..000000000000 --- a/arch/powerpc/kvm/44x_tlb.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2007 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#ifndef __KVM_POWERPC_TLB_H__ -#define __KVM_POWERPC_TLB_H__ - -#include <linux/kvm_host.h> -#include <asm/mmu-44x.h> - -extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, - unsigned int pid, unsigned int as); - -extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, - u8 rc); -extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); - -/* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 4) & 0xf; -} - -static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->word0 & 0xfffffc00; -} - -static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1 << 10 << (pgsize << 1); -} - -static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) -{ - return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; -} - -static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) -{ - u64 word1 = tlbe->word1; - return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); -} - -static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) -{ - return tlbe->tid & 0xff; -} - -static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 8) & 0x1; -} - -static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) -{ - return (tlbe->word0 >> 9) & 0x1; -} - -static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.mmucr & 0xff; -} - -static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.mmucr >> 16) & 0x1; -} - -#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index d6a53b95de94..602eb51d20bc 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -75,7 +75,6 @@ config KVM_BOOK3S_64 config KVM_BOOK3S_64_HV tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 - depends on !CPU_LITTLE_ENDIAN select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA @@ -113,23 +112,9 @@ config KVM_BOOK3S_64_PR config KVM_BOOKE_HV bool -config KVM_440 - bool "KVM support for PowerPC 440 processors" - depends on 44x - select KVM - select KVM_MMIO - ---help--- - Support running unmodified 440 guest kernels in virtual machines on - 440 host processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. - - If unsure, say N. - config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500V2 || KVM_E500MC + depends on KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -173,6 +158,7 @@ config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI help @@ -184,6 +170,8 @@ config KVM_MPIC config KVM_XICS bool "KVM in-kernel XICS emulation" depends on KVM_BOOK3S_64 && !KVM_MPIC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ce569b6bf4d8..0570eef83fba 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,27 +10,17 @@ KVM := ../../../virt/kvm common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o -CFLAGS_44x_tlb.o := -I. CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. +CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o +common-objs-y += powerpc.o emulate.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-440-objs := \ - $(common-objs-y) \ - booke.o \ - booke_emulate.o \ - booke_interrupts.o \ - 44x.o \ - 44x_tlb.o \ - 44x_emulate.o -kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs) - kvm-e500-objs := \ $(common-objs-y) \ booke.o \ @@ -58,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ kvm-pr-y := \ fpu.o \ + emulate.o \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ @@ -90,7 +81,6 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ - book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) endif @@ -101,7 +91,7 @@ kvm-book3s_64-module-objs += \ $(KVM)/kvm_main.o \ $(KVM)/eventfd.o \ powerpc.o \ - emulate.o \ + emulate_loadstore.o \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ @@ -127,7 +117,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o kvm-objs := $(kvm-objs-m) $(kvm-objs-y) -obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500V2) += kvm.o obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c254c27f240e..dd03f6b299ba 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) { } +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} +EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { if (!is_kvmppc_hv_enabled(vcpu->kvm)) @@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { + kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); @@ -218,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); } +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, + ulong flags) +{ + kvmppc_set_dar(vcpu, dar); + kvmppc_set_dsisr(vcpu, flags); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) +{ + u64 msr = kvmppc_get_msr(vcpu); + msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT); + kvmppc_set_msr_fast(vcpu, msr); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; @@ -342,18 +371,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); -pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, +pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable) { - ulong mp_pa = vcpu->arch.magic_page_pa; + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; + gfn_t gfn = gpa >> PAGE_SHIFT; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; /* Magic page override */ - if (unlikely(mp_pa) && - unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == - ((mp_pa & PAGE_MASK) & KVM_PAM))) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) { ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; pfn_t pfn; @@ -366,11 +395,13 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } -EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); +EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); -static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, - bool iswrite, struct kvmppc_pte *pte) +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) { + bool data = (xlid == XLATE_DATA); + bool iswrite = (xlrw == XLATE_WRITE); int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR)); int r; @@ -384,88 +415,34 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, pte->may_write = true; pte->may_execute = true; r = 0; + + if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR && + !data) { + if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte->raddr &= ~SPLIT_HACK_MASK; + } } return r; } -static hva_t kvmppc_bad_hva(void) -{ - return PAGE_OFFSET; -} - -static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, - bool read) -{ - hva_t hpage; - - if (read && !pte->may_read) - goto err; - - if (!read && !pte->may_write) - goto err; - - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) - goto err; - - return hpage | (pte->raddr & ~PAGE_MASK); -err: - return kvmppc_bad_hva(); -} - -int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) -{ - struct kvmppc_pte pte; - - vcpu->stat.st++; - - if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) - return -ENOENT; - - *eaddr = pte.raddr; - - if (!pte.may_write) - return -EPERM; - - if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) - return EMULATE_DO_MMIO; - - return EMULATE_DONE; -} -EXPORT_SYMBOL_GPL(kvmppc_st); - -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *inst) { - struct kvmppc_pte pte; - hva_t hva = *eaddr; - - vcpu->stat.ld++; - - if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) - goto nopte; - - *eaddr = pte.raddr; - - hva = kvmppc_pte_to_hva(vcpu, &pte, true); - if (kvm_is_error_hva(hva)) - goto mmio; - - if (copy_from_user(ptr, (void __user *)hva, size)) { - printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; - } + ulong pc = kvmppc_get_pc(vcpu); + int r; - return EMULATE_DONE; + if (type == INST_SC) + pc -= 4; -nopte: - return -ENOENT; -mmio: - return EMULATE_DO_MMIO; + r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false); + if (r == EMULATE_DONE) + return r; + else + return EMULATE_AGAIN; } -EXPORT_SYMBOL_GPL(kvmppc_ld); +EXPORT_SYMBOL_GPL(kvmppc_load_last_inst); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -646,6 +623,12 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: val = get_reg_val(reg->id, vcpu->arch.bescr); break; + case KVM_REG_PPC_VTB: + val = get_reg_val(reg->id, vcpu->arch.vtb); + break; + case KVM_REG_PPC_IC: + val = get_reg_val(reg->id, vcpu->arch.ic); + break; default: r = -EINVAL; break; @@ -750,6 +733,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(reg->id, val); + break; default: r = -EINVAL; break; @@ -913,6 +902,11 @@ int kvmppc_core_check_processor_compat(void) return 0; } +int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) +{ + return kvm->arch.kvm_ops->hcall_implemented(hcall); +} + static int kvmppc_book3s_init(void) { int r; diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 93503bbdae43..cd0b0730e29e 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, if (r < 0) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, true); - if (r < 0) + if (r == -ENOENT) r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, iswrite, false); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 678e75370495..2035d16a9262 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, - iswrite, &writable); + hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(hpaddr)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", - orig_pte->eaddr); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0ac98392f363..b982d925c710 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, smp_rmb(); /* Get host physical address for gpa */ - pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); + pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); if (is_error_noslot_pfn(pfn)) { - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); + printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", + orig_pte->raddr); r = -EINVAL; goto out; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80561074078d..79294c4c5015 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,8 +37,6 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -#include "book3s_hv_cma.h" - /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 @@ -64,10 +62,10 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) } kvm->arch.hpt_cma_alloc = 0; - VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER); - page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT)); + page = kvm_alloc_hpt(1ul << (order - PAGE_SHIFT)); if (page) { hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + memset((void *)hpt, 0, (1ul << order)); kvm->arch.hpt_cma_alloc = 1; } @@ -450,7 +448,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long slb_v; unsigned long pp, key; unsigned long v, gr; - unsigned long *hptep; + __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -473,13 +471,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_enable(); return -ENOENT; } - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hptep[0] & ~HPTE_V_HVLOCK; + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; gr = kvm->arch.revmap[index].guest_rpte; /* Unlock the HPTE */ asm volatile("lwsync" : : : "memory"); - hptep[0] = v; + hptep[0] = cpu_to_be64(v); preempt_enable(); gpte->eaddr = eaddr; @@ -530,21 +528,14 @@ static int instruction_is_store(unsigned int instr) static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store) { - int ret; u32 last_inst; - unsigned long srr0 = kvmppc_get_pc(vcpu); - /* We try to load the last instruction. We don't let - * emulate_instruction do it as it doesn't check what - * kvmppc_ld returns. + /* * If we fail, we just return to the guest and try executing it again. */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) - return RESUME_GUEST; - vcpu->arch.last_inst = last_inst; - } + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != + EMULATE_DONE) + return RESUME_GUEST; /* * WARNING: We do not know for sure whether the instruction we just @@ -558,7 +549,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, * we just return and retry the instruction. */ - if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store) + if (instruction_is_store(last_inst) != !!is_store) return RESUME_GUEST; /* @@ -583,7 +574,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long *hptep, hpte[3], r; + unsigned long hpte[3], r; + __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; unsigned long gpa, gfn, hva, pfn; @@ -606,16 +598,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; index = vcpu->arch.pgfault_index; - hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); + hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; - hpte[1] = hptep[1]; + hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + hpte[1] = be64_to_cpu(hptep[1]); hpte[2] = r = rev->guest_rpte; asm volatile("lwsync" : : : "memory"); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); preempt_enable(); if (hpte[0] != vcpu->arch.pgfault_hpte[0] || @@ -731,8 +723,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || - rev->guest_rpte != hpte[2]) + if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || + be64_to_cpu(hptep[1]) != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -752,20 +745,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; r &= rcbits | ~(HPTE_R_R | HPTE_R_C); - if (hptep[0] & HPTE_V_VALID) { + if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) { /* HPTE was previously valid, so we need to invalidate it */ unlock_rmap(rmap); - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, index); /* don't lose previous R and C bits */ - r |= hptep[1] & (HPTE_R_R | HPTE_R_C); + r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } - hptep[1] = r; + hptep[1] = cpu_to_be64(r); eieio(); - hptep[0] = hpte[0]; + hptep[0] = cpu_to_be64(hpte[0]); asm volatile("ptesync" : : : "memory"); preempt_enable(); if (page && hpte_is_writable(r)) @@ -784,7 +777,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; out_unlock: - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); goto out_put; } @@ -860,7 +853,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long h, i, j; - unsigned long *hptep; + __be64 *hptep; unsigned long ptel, psize, rcbits; for (;;) { @@ -876,11 +869,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, * rmap chain lock. */ i = *rmapp & KVMPPC_RMAP_INDEX; - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); continue; } @@ -899,14 +892,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ ptel = rev[i].guest_rpte; - psize = hpte_page_size(hptep[0], ptel); - if ((hptep[0] & HPTE_V_VALID) && + psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { if (kvm->arch.using_mmu_notifiers) - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ - rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); + rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; if (rcbits & ~rev[i].guest_rpte) { rev[i].guest_rpte = ptel | rcbits; @@ -914,7 +907,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } } unlock_rmap(rmapp); - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } return 0; } @@ -961,7 +954,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, { struct revmap_entry *rev = kvm->arch.revmap; unsigned long head, i, j; - unsigned long *hptep; + __be64 *hptep; int ret = 0; retry: @@ -977,23 +970,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* If this HPTE isn't referenced, ignore it */ - if (!(hptep[1] & HPTE_R_R)) + if (!(be64_to_cpu(hptep[1]) & HPTE_R_R)) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { + if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptep[1]) & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); if (!(rev[i].guest_rpte & HPTE_R_R)) { rev[i].guest_rpte |= HPTE_R_R; @@ -1001,7 +995,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, } ret = 1; } - hptep[0] &= ~HPTE_V_HVLOCK; + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1035,7 +1029,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, do { hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; - if (hp[1] & HPTE_R_R) + if (be64_to_cpu(hp[1]) & HPTE_R_R) goto out; } while ((i = j) != head); } @@ -1075,7 +1069,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) unsigned long head, i, j; unsigned long n; unsigned long v, r; - unsigned long *hptep; + __be64 *hptep; int npages_dirty = 0; retry: @@ -1091,7 +1085,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) i = head = *rmapp & KVMPPC_RMAP_INDEX; do { - hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); + unsigned long hptep1; + hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; /* @@ -1108,29 +1103,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) * Otherwise we need to do the tlbie even if C==0 in * order to pick up any delayed writeback of C. */ - if (!(hptep[1] & HPTE_R_C) && - (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) + hptep1 = be64_to_cpu(hptep[1]); + if (!(hptep1 & HPTE_R_C) && + (!hpte_is_writable(hptep1) || vcpus_running(kvm))) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { /* unlock rmap before spinning on the HPTE lock */ unlock_rmap(rmapp); - while (hptep[0] & HPTE_V_HVLOCK) + while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK)) cpu_relax(); goto retry; } /* Now check and modify the HPTE */ - if (!(hptep[0] & HPTE_V_VALID)) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) continue; /* need to make it temporarily absent so C is stable */ - hptep[0] |= HPTE_V_ABSENT; + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); - v = hptep[0]; - r = hptep[1]; + v = be64_to_cpu(hptep[0]); + r = be64_to_cpu(hptep[1]); if (r & HPTE_R_C) { - hptep[1] = r & ~HPTE_R_C; + hptep[1] = cpu_to_be64(r & ~HPTE_R_C); if (!(rev[i].guest_rpte & HPTE_R_C)) { rev[i].guest_rpte |= HPTE_R_C; note_hpte_modification(kvm, &rev[i]); @@ -1143,7 +1139,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); v |= HPTE_V_VALID; - hptep[0] = v; + hptep[0] = cpu_to_be64(v); } while ((i = j) != head); unlock_rmap(rmapp); @@ -1307,7 +1303,7 @@ struct kvm_htab_ctx { * Returns 1 if this HPT entry has been modified or has pending * R/C bit changes. */ -static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp) { unsigned long rcbits_unset; @@ -1316,13 +1312,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) /* Also need to consider changes in reference and changed bits */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) && + (be64_to_cpu(hptp[1]) & rcbits_unset)) return 1; return 0; } -static long record_hpte(unsigned long flags, unsigned long *hptp, +static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { @@ -1337,10 +1334,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, return 0; valid = 0; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) { valid = 1; if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && - !(hptp[0] & HPTE_V_BOLTED)) + !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED)) valid = 0; } if (valid != want_valid) @@ -1352,7 +1349,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, preempt_disable(); while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); - v = hptp[0]; + v = be64_to_cpu(hptp[0]); /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1360,9 +1357,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & hptp[1])) { - revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | - HPTE_GR_MODIFIED; + if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { + revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } @@ -1381,13 +1378,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, revp->guest_rpte = r; } asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hptp[0] &= ~HPTE_V_HVLOCK; + hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); preempt_enable(); if (!(valid == want_valid && (first_pass || dirty))) ok = 0; } - hpte[0] = v; - hpte[1] = r; + hpte[0] = cpu_to_be64(v); + hpte[1] = cpu_to_be64(r); return ok; } @@ -1397,7 +1394,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, struct kvm_htab_ctx *ctx = file->private_data; struct kvm *kvm = ctx->kvm; struct kvm_get_htab_header hdr; - unsigned long *hptp; + __be64 *hptp; struct revmap_entry *revp; unsigned long i, nb, nw; unsigned long __user *lbuf; @@ -1413,7 +1410,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, flags = ctx->flags; i = ctx->index; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); revp = kvm->arch.revmap + i; lbuf = (unsigned long __user *)buf; @@ -1497,7 +1494,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, unsigned long i, j; unsigned long v, r; unsigned long __user *lbuf; - unsigned long *hptp; + __be64 *hptp; unsigned long tmp[2]; ssize_t nb; long int err, ret; @@ -1539,7 +1536,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) break; - hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { err = -EFAULT; @@ -1551,7 +1548,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, lbuf += 2; nb += HPTE_SIZE; - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); err = -EIO; ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, @@ -1562,7 +1559,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!rma_setup && is_vrma_hpte(v)) { - unsigned long psize = hpte_page_size(v, r); + unsigned long psize = hpte_base_page_size(v, r); unsigned long senc = slb_pgsize_encoding(psize); unsigned long lpcr; @@ -1577,7 +1574,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, } for (j = 0; j < hdr.n_invalid; ++j) { - if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) kvmppc_do_h_remove(kvm, 0, i, 0, tmp); ++i; hptp += 2; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 3f295269af37..5a2bc4b0dfe5 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; - case SPRN_PURR: - to_book3s(vcpu)->purr_offset = spr_val - get_tb(); - break; - case SPRN_SPURR: - to_book3s(vcpu)->spurr_offset = spr_val - get_tb(); - break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: @@ -455,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_GQR7: to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: - vcpu->arch.fscr = spr_val; + kvmppc_set_fscr(vcpu, spr_val); break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: vcpu->arch.bescr = spr_val; break; @@ -572,10 +566,22 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = 0; break; case SPRN_PURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated purr + */ + *spr_val = vcpu->arch.purr; break; case SPRN_SPURR: - *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + /* + * On exit we would have updated spurr + */ + *spr_val = vcpu->arch.spurr; + break; + case SPRN_VTB: + *spr_val = vcpu->arch.vtb; + break; + case SPRN_IC: + *spr_val = vcpu->arch.ic; break; case SPRN_GQR0: case SPRN_GQR1: @@ -587,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_GQR7: *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; +#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_FSCR: *spr_val = vcpu->arch.fscr; break; -#ifdef CONFIG_PPC_BOOK3S_64 case SPRN_BESCR: *spr_val = vcpu->arch.bescr; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7a12edbb61e7..27cced9c7249 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -35,6 +35,7 @@ #include <asm/reg.h> #include <asm/cputable.h> +#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> @@ -67,6 +68,15 @@ /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) +static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); + +#if defined(CONFIG_PPC_64K_PAGES) +#define MPP_BUFFER_ORDER 0 +#elif defined(CONFIG_PPC_4K_PAGES) +#define MPP_BUFFER_ORDER 3 +#endif + + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -270,7 +280,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) { vpa->__old_status |= LPPACA_OLD_SHARED_PROC; - vpa->yield_count = 1; + vpa->yield_count = cpu_to_be32(1); } static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, @@ -293,8 +303,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, struct reg_vpa { u32 dummy; union { - u16 hword; - u32 word; + __be16 hword; + __be32 word; } length; }; @@ -333,9 +343,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, if (va == NULL) return H_PARAMETER; if (subfunc == H_VPA_REG_VPA) - len = ((struct reg_vpa *)va)->length.hword; + len = be16_to_cpu(((struct reg_vpa *)va)->length.hword); else - len = ((struct reg_vpa *)va)->length.word; + len = be32_to_cpu(((struct reg_vpa *)va)->length.word); kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ @@ -540,21 +550,63 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, return; memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; - dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = now + vc->tb_offset; - dt->enqueue_to_dispatch_time = stolen; - dt->srr0 = kvmppc_get_pc(vcpu); - dt->srr1 = vcpu->arch.shregs.msr; + dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); + dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); + dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_ptr = dt; /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); - vpa->dtl_idx = ++vcpu->arch.dtl_index; + vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); vcpu->arch.dtl.dirty = true; } +static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) + return true; + if ((!vcpu->arch.vcore->arch_compat) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return true; + return false; +} + +static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, + unsigned long resource, unsigned long value1, + unsigned long value2) +{ + switch (resource) { + case H_SET_MODE_RESOURCE_SET_CIABR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (value2) + return H_P4; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + /* Guests can't breakpoint the hypervisor */ + if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) + return H_P3; + vcpu->arch.ciabr = value1; + return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr = value1; + vcpu->arch.dawrx = value2; + return H_SUCCESS; + default: + return H_TOO_HARD; + } +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -562,6 +614,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; int idx, rc; + if (req <= MAX_HCALL_OPCODE && + !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) + return RESUME_HOST; + switch (req) { case H_ENTER: idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -620,7 +676,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; - + case H_SET_MODE: + ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5), + kvmppc_get_gpr(vcpu, 6), + kvmppc_get_gpr(vcpu, 7)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; case H_XIRR: case H_CPPR: case H_EOI: @@ -639,6 +702,29 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvmppc_hcall_impl_hv(unsigned long cmd) +{ + switch (cmd) { + case H_CEDE: + case H_PROD: + case H_CONFER: + case H_REGISTER_VPA: + case H_SET_MODE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + + /* See if it's in the real-mode table */ + return kvmppc_hcall_impl_hv_realmode(cmd); +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -785,7 +871,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } -static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) +static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, + bool preserve_top32) { struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; @@ -820,6 +907,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) mask |= LPCR_AIL; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -894,12 +985,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; - case KVM_REG_PPC_IC: - *val = get_reg_val(id, vcpu->arch.ic); - break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_CSIGR: *val = get_reg_val(id, vcpu->arch.csigr); break; @@ -939,6 +1024,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: *val = get_reg_val(id, vcpu->arch.vcore->lpcr); break; case KVM_REG_PPC_PPR: @@ -1094,12 +1180,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ break; - case KVM_REG_PPC_IC: - vcpu->arch.ic = set_reg_val(id, *val); - break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_CSIGR: vcpu->arch.csigr = set_reg_val(id, *val); break; @@ -1150,7 +1230,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, ALIGN(set_reg_val(id, *val), 1UL << 24); break; case KVM_REG_PPC_LPCR: - kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); + break; + case KVM_REG_PPC_LPCR_64: + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); break; case KVM_REG_PPC_PPR: vcpu->arch.ppr = set_reg_val(id, *val); @@ -1228,6 +1311,33 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) +{ + struct kvmppc_vcore *vcore; + + vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); + + if (vcore == NULL) + return NULL; + + INIT_LIST_HEAD(&vcore->runnable_threads); + spin_lock_init(&vcore->lock); + init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = TB_NIL; + vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_subcore; + vcore->kvm = kvm; + + vcore->mpp_buffer_is_valid = false; + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcore->mpp_buffer = (void *)__get_free_pages( + GFP_KERNEL|__GFP_ZERO, + MPP_BUFFER_ORDER); + + return vcore; +} + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { @@ -1279,16 +1389,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_lock(&kvm->lock); vcore = kvm->arch.vcores[core]; if (!vcore) { - vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); - if (vcore) { - INIT_LIST_HEAD(&vcore->runnable_threads); - spin_lock_init(&vcore->lock); - init_waitqueue_head(&vcore->wq); - vcore->preempt_tb = TB_NIL; - vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; - vcore->kvm = kvm; - } + vcore = kvmppc_vcore_create(kvm, core); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; } @@ -1500,6 +1601,33 @@ static int on_primary_thread(void) return 1; } +static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); + logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); + + vc->mpp_buffer_is_valid = true; +} + +static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) +{ + phys_addr_t phy_addr, mpp_addr; + + phy_addr = virt_to_phys(vc->mpp_buffer); + mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; + + /* We must abort any in-progress save operations to ensure + * the table is valid so that prefetch engine knows when to + * stop prefetching. */ + logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); + mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. @@ -1577,9 +1705,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vc->kvm->srcu); + if (vc->mpp_buffer_is_valid) + kvmppc_start_restoring_l2_cache(vc); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); + + if (vc->mpp_buffer) + kvmppc_start_saving_l2_cache(vc); + /* disable sending of IPIs on virtual external irqs */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) vcpu->cpu = -1; @@ -1929,12 +2064,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, (*sps)->page_shift = def->shift; (*sps)->slb_enc = def->sllp; (*sps)->enc[0].page_shift = def->shift; - /* - * Only return base page encoding. We don't want to return - * all the supporting pte_enc, because our H_ENTER doesn't - * support MPSS yet. Once they do, we can start passing all - * support pte_enc here - */ (*sps)->enc[0].pte_enc = def->penc[linux_psize]; /* * Add 16MB MPSS support if host supports it @@ -2281,6 +2410,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); + /* Start out with the default set of hcalls enabled */ + memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, + sizeof(kvm->arch.enabled_hcalls)); + kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -2323,8 +2456,14 @@ static void kvmppc_free_vcores(struct kvm *kvm) { long int i; - for (i = 0; i < KVM_MAX_VCORES; ++i) + for (i = 0; i < KVM_MAX_VCORES; ++i) { + if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; + free_pages((unsigned long)vc->mpp_buffer, + MPP_BUFFER_ORDER); + } kfree(kvm->arch.vcores[i]); + } kvm->arch.online_vcores = 0; } @@ -2419,6 +2558,49 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_REMOVE, + H_ENTER, + H_READ, + H_PROTECT, + H_BULK_REMOVE, + H_GET_TCE, + H_PUT_TCE, + H_SET_DABR, + H_SET_XDABR, + H_CEDE, + H_PROD, + H_CONFER, + H_REGISTER_VPA, +#ifdef CONFIG_KVM_XICS + H_EOI, + H_CPPR, + H_IPI, + H_IPOLL, + H_XIRR, + H_XIRR_X, +#endif + 0 +}; + +static void init_default_hcalls(void) +{ + int i; + unsigned int hcall; + + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_hv(hcall)); + __set_bit(hcall / 4, default_enabled_hcalls); + } +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2451,6 +2633,7 @@ static struct kvmppc_ops kvm_ops_hv = { .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, + .hcall_implemented = kvmppc_hcall_impl_hv, }; static int kvmppc_book3s_init_hv(void) @@ -2466,6 +2649,8 @@ static int kvmppc_book3s_init_hv(void) kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; + init_default_hcalls(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cde8a665205..b9615ba5b083 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -16,12 +16,14 @@ #include <linux/init.h> #include <linux/memblock.h> #include <linux/sizes.h> +#include <linux/cma.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include "book3s_hv_cma.h" +#define KVM_CMA_CHUNK_ORDER 18 + /* * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) * should be power of 2. @@ -43,6 +45,8 @@ static unsigned long kvm_cma_resv_ratio = 5; unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ EXPORT_SYMBOL_GPL(kvm_rma_pages); +static struct cma *kvm_cma; + /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) @@ -97,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma() ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); if (!ri) return NULL; - page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages); + page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages)); if (!page) goto err_out; atomic_set(&ri->use_count, 1); @@ -112,7 +116,7 @@ EXPORT_SYMBOL_GPL(kvm_alloc_rma); void kvm_release_rma(struct kvm_rma_info *ri) { if (atomic_dec_and_test(&ri->use_count)) { - kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages); + cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); kfree(ri); } } @@ -131,16 +135,18 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages) { unsigned long align_pages = HPT_ALIGN_PAGES; + VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); + /* Old CPUs require HPT aligned on a multiple of its size */ if (!cpu_has_feature(CPU_FTR_ARCH_206)) align_pages = nr_pages; - return kvm_alloc_cma(nr_pages, align_pages); + return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages)); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); void kvm_release_hpt(struct page *page, unsigned long nr_pages) { - kvm_release_cma(page, nr_pages); + cma_release(kvm_cma, page, nr_pages); } EXPORT_SYMBOL_GPL(kvm_release_hpt); @@ -179,7 +185,8 @@ void __init kvm_cma_reserve(void) align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); - kvm_cma_declare_contiguous(selected_size, align_size); + cma_declare_contiguous(0, selected_size, 0, align_size, + KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } @@ -212,3 +219,16 @@ bool kvm_hv_mode_active(void) { return atomic_read(&hv_vm_count) != 0; } + +extern int hcall_real_table[], hcall_real_table_end[]; + +int kvmppc_hcall_impl_hv_realmode(unsigned long cmd) +{ + cmd /= 4; + if (cmd < hcall_real_table_end - hcall_real_table && + hcall_real_table[cmd]) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c deleted file mode 100644 index d9d3d8553d51..000000000000 --- a/arch/powerpc/kvm/book3s_hv_cma.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA - * for DMA mapping framework - * - * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - * - */ -#define pr_fmt(fmt) "kvm_cma: " fmt - -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - -#include <linux/memblock.h> -#include <linux/mutex.h> -#include <linux/sizes.h> -#include <linux/slab.h> - -#include "book3s_hv_cma.h" - -struct kvm_cma { - unsigned long base_pfn; - unsigned long count; - unsigned long *bitmap; -}; - -static DEFINE_MUTEX(kvm_cma_mutex); -static struct kvm_cma kvm_cma_area; - -/** - * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling - * for kvm hash pagetable - * @size: Size of the reserved memory. - * @alignment: Alignment for the contiguous memory area - * - * This function reserves memory for kvm cma area. It should be - * called by arch code when early allocator (memblock or bootmem) - * is still activate. - */ -long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment) -{ - long base_pfn; - phys_addr_t addr; - struct kvm_cma *cma = &kvm_cma_area; - - pr_debug("%s(size %lx)\n", __func__, (unsigned long)size); - - if (!size) - return -EINVAL; - /* - * Sanitise input arguments. - * We should be pageblock aligned for CMA. - */ - alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order)); - size = ALIGN(size, alignment); - /* - * Reserve memory - * Use __memblock_alloc_base() since - * memblock_alloc_base() panic()s. - */ - addr = __memblock_alloc_base(size, alignment, 0); - if (!addr) { - base_pfn = -ENOMEM; - goto err; - } else - base_pfn = PFN_DOWN(addr); - - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma->base_pfn = base_pfn; - cma->count = size >> PAGE_SHIFT; - pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M); - return 0; -err: - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return base_pfn; -} - -/** - * kvm_alloc_cma() - allocate pages from contiguous area - * @nr_pages: Requested number of pages. - * @align_pages: Requested alignment in number of pages - * - * This function allocates memory buffer for hash pagetable. - */ -struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages) -{ - int ret; - struct page *page = NULL; - struct kvm_cma *cma = &kvm_cma_area; - unsigned long chunk_count, nr_chunk; - unsigned long mask, pfn, pageno, start = 0; - - - if (!cma || !cma->count) - return NULL; - - pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__, - (void *)cma, nr_pages, align_pages); - - if (!nr_pages) - return NULL; - /* - * align mask with chunk size. The bit tracks pages in chunk size - */ - VM_BUG_ON(!is_power_of_2(align_pages)); - mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1; - BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER); - - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - - mutex_lock(&kvm_cma_mutex); - for (;;) { - pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count, - start, nr_chunk, mask); - if (pageno >= chunk_count) - break; - - pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)); - ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA); - if (ret == 0) { - bitmap_set(cma->bitmap, pageno, nr_chunk); - page = pfn_to_page(pfn); - memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT); - break; - } else if (ret != -EBUSY) { - break; - } - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); - /* try again with a bit different memory target */ - start = pageno + mask + 1; - } - mutex_unlock(&kvm_cma_mutex); - pr_debug("%s(): returned %p\n", __func__, page); - return page; -} - -/** - * kvm_release_cma() - release allocated pages for hash pagetable - * @pages: Allocated pages. - * @nr_pages: Number of allocated pages. - * - * This function releases memory allocated by kvm_alloc_cma(). - * It returns false when provided pages do not belong to contiguous area and - * true otherwise. - */ -bool kvm_release_cma(struct page *pages, unsigned long nr_pages) -{ - unsigned long pfn; - unsigned long nr_chunk; - struct kvm_cma *cma = &kvm_cma_area; - - if (!cma || !pages) - return false; - - pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages); - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - - VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count); - nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - - mutex_lock(&kvm_cma_mutex); - bitmap_clear(cma->bitmap, - (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT), - nr_chunk); - free_contig_range(pfn, nr_pages); - mutex_unlock(&kvm_cma_mutex); - - return true; -} - -static int __init kvm_cma_activate_area(unsigned long base_pfn, - unsigned long count) -{ - unsigned long pfn = base_pfn; - unsigned i = count >> pageblock_order; - struct zone *zone; - - WARN_ON_ONCE(!pfn_valid(pfn)); - zone = page_zone(pfn_to_page(pfn)); - do { - unsigned j; - base_pfn = pfn; - for (j = pageblock_nr_pages; j; --j, pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - /* - * alloc_contig_range requires the pfn range - * specified to be in the same zone. Make this - * simple by forcing the entire CMA resv range - * to be in the same zone. - */ - if (page_zone(pfn_to_page(pfn)) != zone) - return -EINVAL; - } - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); - } while (--i); - return 0; -} - -static int __init kvm_cma_init_reserved_areas(void) -{ - int bitmap_size, ret; - unsigned long chunk_count; - struct kvm_cma *cma = &kvm_cma_area; - - pr_debug("%s()\n", __func__); - if (!cma->count) - return 0; - chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long); - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!cma->bitmap) - return -ENOMEM; - - ret = kvm_cma_activate_area(cma->base_pfn, cma->count); - if (ret) - goto error; - return 0; - -error: - kfree(cma->bitmap); - return ret; -} -core_initcall(kvm_cma_init_reserved_areas); diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h deleted file mode 100644 index 655144f75fa5..000000000000 --- a/arch/powerpc/kvm/book3s_hv_cma.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Contiguous Memory Allocator for ppc KVM hash pagetable based on CMA - * for DMA mapping framework - * - * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - * - */ - -#ifndef __POWERPC_KVM_CMA_ALLOC_H__ -#define __POWERPC_KVM_CMA_ALLOC_H__ -/* - * Both RMA and Hash page allocation will be multiple of 256K. - */ -#define KVM_CMA_CHUNK_ORDER 18 - -extern struct page *kvm_alloc_cma(unsigned long nr_pages, - unsigned long align_pages); -extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages); -extern long kvm_cma_declare_contiguous(phys_addr_t size, - phys_addr_t alignment) __init; -#endif diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 8c86422a1e37..731be7478b27 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -127,11 +127,6 @@ BEGIN_FTR_SECTION stw r10, HSTATE_PMC + 24(r13) stw r11, HSTATE_PMC + 28(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -BEGIN_FTR_SECTION - mfspr r9, SPRN_SIER - std r8, HSTATE_MMCR + 40(r13) - std r9, HSTATE_MMCR + 48(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 31: /* diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 3a5c568b1e89..d562c8e2bc30 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu) return; /* Sanity check */ - n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE); if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end) return; /* Load up the SLB from that */ for (i = 0; i < n; ++i) { - unsigned long rb = slb->save_area[i].esid; - unsigned long rs = slb->save_area[i].vsid; + unsigned long rb = be64_to_cpu(slb->save_area[i].esid); + unsigned long rs = be64_to_cpu(slb->save_area[i].vsid); rb = (rb & ~0xFFFul) | i; /* insert entry number */ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e6224318c36..084ad54c73cd 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); } -static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) +static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) { asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hpte[0] = hpte_v; + hpte[0] = cpu_to_be64(hpte_v); } long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, { unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; @@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); for (i = 0; i < 8; ++i) { - if ((*hpte & HPTE_V_VALID) == 0 && + if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) break; @@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, */ hpte -= 16; for (i = 0; i < 8; ++i) { + u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) + pte = be64_to_cpu(*hpte); + if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; - *hpte &= ~HPTE_V_HVLOCK; + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); hpte += 2; } if (i == 8) @@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte_index += i; } else { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | HPTE_V_ABSENT)) { /* Lock the slot and check again */ + u64 pte; + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { - *hpte &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(*hpte); + if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { + *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_PTEG_FULL; } } @@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } - hpte[1] = ptel; + hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); - hpte[0] = pteh; + hpte[0] = cpu_to_be64(pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; @@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, unsigned long pte_index, unsigned long avpn, unsigned long *hpret) { - unsigned long *hpte; + __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || - ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || + ((flags & H_ANDCOND) && (pte & avpn) != 0)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); - v = hpte[0] & ~HPTE_V_HVLOCK; + v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - hpte[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(v, hpte[1], pte_index); + u64 pte1; + + pte1 = be64_to_cpu(hpte[1]); + hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(v, pte1, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); + remove_revmap_chain(kvm, pte_index, rev, v, pte1); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); @@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; unsigned long *args = &vcpu->arch.gpr[4]; - unsigned long *hp, *hptes[4], tlbrb[4]; + __be64 *hp, *hptes[4]; + unsigned long tlbrb[4]; long int i, j, k, n, found, indexes[4]; unsigned long flags, req, pte_index, rcbits; int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; + u64 hp0; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - hp = (unsigned long *) - (kvm->arch.hpt_virt + (pte_index << 4)); + hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4)); /* to avoid deadlock, don't spin except for first */ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { if (n) @@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) cpu_relax(); } found = 0; - if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { + hp0 = be64_to_cpu(hp[0]); + if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ found = 1; break; case 1: /* andcond */ - if (!(hp[0] & args[j + 1])) + if (!(hp0 & args[j + 1])) found = 1; break; case 2: /* AVPN */ - if ((hp[0] & ~0x7fUL) == args[j + 1]) + if ((hp0 & ~0x7fUL) == args[j + 1]) found = 1; break; } } if (!found) { - hp[0] &= ~HPTE_V_HVLOCK; + hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); args[j] = ((0x90 | flags) << 56) + pte_index; continue; } @@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); note_hpte_modification(kvm, rev); - if (!(hp[0] & HPTE_V_VALID)) { + if (!(hp0 & HPTE_V_VALID)) { /* insert R and C bits from PTE */ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); @@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) continue; } - hp[0] &= ~HPTE_V_VALID; /* leave it locked */ - tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); + /* leave it locked */ + hp[0] &= ~cpu_to_be64(HPTE_V_VALID); + tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), + be64_to_cpu(hp[1]), pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) pte_index = args[j] & ((1ul << 56) - 1); hp = hptes[k]; rev = revs[k]; - remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); + remove_revmap_chain(kvm, pte_index, rev, + be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; @@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long va) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte; + __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; + u64 pte; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { - hpte[0] &= ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[0]); + if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { + hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); return H_NOT_FOUND; } - v = hpte[0]; + v = pte; bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (hpte[1] & ~mask) | bits; + r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = v & ~HPTE_V_VALID; + hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * If the host has this page as readonly but the guest @@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } } } - hpte[1] = r; + hpte[1] = cpu_to_be64(r); eieio(); - hpte[0] = v & ~HPTE_V_HVLOCK; + hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } @@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index) { struct kvm *kvm = vcpu->kvm; - unsigned long *hpte, v, r; + __be64 *hpte; + unsigned long v, r; int i, n = 1; struct revmap_entry *rev = NULL; @@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); for (i = 0; i < n; ++i, ++pte_index) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } -void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; - hptep[0] &= ~HPTE_V_VALID; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); + hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); -void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, +void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; unsigned char rbyte; - rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); - rbyte = (hptep[1] & ~HPTE_R_R) >> 8; + rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), + pte_index); + rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; do_tlbies(kvm, &rb, 1, 1, false); @@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long somask; unsigned long vsid, hash; unsigned long avpn; - unsigned long *hpte; + __be64 *hpte; unsigned long mask, val; unsigned long v, r; @@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); + hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ - v = hpte[i] & ~HPTE_V_HVLOCK; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -810,22 +830,19 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = hpte[i] & ~HPTE_V_HVLOCK; - r = hpte[i+1]; + v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[i+1]); /* - * Check the HPTE again, including large page size - * Since we don't currently allow any MPSS (mixed - * page-size segment) page sizes, it is sufficient - * to check against the actual page size. + * Check the HPTE again, including base page size */ if ((v & valid) && (v & mask) == val && - hpte_page_size(v, r) == (1ul << pshift)) + hpte_base_page_size(v, r) == (1ul << pshift)) /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); /* Unlock and move on */ - hpte[i] = v; + hpte[i] = cpu_to_be64(v); } if (val & HPTE_V_SECONDARY) @@ -854,7 +871,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct kvm *kvm = vcpu->kvm; long int index; unsigned long v, r, gr; - unsigned long *hpte; + __be64 *hpte; unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; @@ -870,9 +887,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return status; /* there really was no HPTE */ return 0; /* for prot fault, HPTE disappeared */ } - hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); - v = hpte[0] & ~HPTE_V_HVLOCK; - r = hpte[1]; + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); rev = real_vmalloc_addr(&kvm->arch.revmap[index]); gr = rev->guest_rpte; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b4b0082f761c..3ee38e6e884f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -401,6 +401,11 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_REJECT; icp->rm_reject = irq; } + + if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) { + icp->rm_action |= XICS_RM_NOTIFY_EOI; + icp->rm_eoied_irq = irq; + } bail: return check_too_hard(xics, icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 868347ef09fd..f0c4db7704c3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,10 +32,6 @@ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) -#ifdef __LITTLE_ENDIAN__ -#error Need to fix lppaca and SLB shadow accesses in little endian mode -#endif - /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 @@ -48,7 +44,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) @@ -159,6 +155,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL BEGIN_FTR_SECTION beq 11f + cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI + beq cr2, 14f /* HMI check */ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ @@ -179,6 +177,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 13: b machine_check_fwnmi +14: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + b hmi_exception_after_realmode + kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* set our bit in napping_threads */ @@ -595,9 +597,10 @@ kvmppc_got_guest: ld r3, VCPU_VPA(r4) cmpdi r3, 0 beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) + li r6, LPPACA_YIELDCOUNT + LWZX_BE r5, r3, r6 addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) + STWX_BE r5, r3, r6 li r6, 1 stb r6, VCPU_VPA_DIRTY(r4) 25: @@ -671,9 +674,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) mr r31, r4 addi r3, r31, VCPU_FPRS_TM - bl .load_fp_state + bl load_fp_state addi r3, r31, VCPU_VRS_TM - bl .load_vr_state + bl load_vr_state mr r4, r31 lwz r7, VCPU_VRSAVE_TM(r4) mtspr SPRN_VRSAVE, r7 @@ -1417,9 +1420,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Save FP/VSX. */ addi r3, r9, VCPU_FPRS_TM - bl .store_fp_state + bl store_fp_state addi r3, r9, VCPU_VRS_TM - bl .store_vr_state + bl store_vr_state mfspr r6, SPRN_VRSAVE stw r6, VCPU_VRSAVE_TM(r9) 1: @@ -1442,9 +1445,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) + li r4, LPPACA_YIELDCOUNT + LWZX_BE r3, r8, r4 addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) + STWX_BE r3, r8, r4 li r3, 1 stb r3, VCPU_VPA_DIRTY(r9) 25: @@ -1757,8 +1761,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r8) - ld r6,SLBSHADOW_SAVEAREA+8(r8) + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 andis. r7,r5,SLB_ESID_V@h beq 1f slbmte r6,r5 @@ -1909,12 +1915,23 @@ hcall_try_real_mode: clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont + /* See if this hcall is enabled for in-kernel handling */ + ld r4, VCPU_KVM(r9) + srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ + sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ + add r4, r4, r0 + ld r0, KVM_ENABLED_HCALLS(r4) + rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ + srd r0, r0, r4 + andi. r0, r0, 1 + beq guest_exit_cont + /* Get pointer to handler, if any, and call it */ LOAD_REG_ADDR(r4, hcall_real_table) lwax r3,r3,r4 cmpwi r3,0 beq guest_exit_cont - add r3,r3,r4 - mtctr r3 + add r12,r3,r4 + mtctr r12 mr r3,r9 /* get vcpu pointer */ ld r4,VCPU_GPR(R4)(r9) bctrl @@ -2031,6 +2048,7 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table + .globl hcall_real_table_end hcall_real_table_end: ignore_hdec: @@ -2338,7 +2356,18 @@ kvmppc_read_intr: cmpdi r6, 0 beq- 1f lwzcix r0, r6, r7 - rlwinm. r3, r0, 0, 0xffffff + /* + * Save XIRR for later. Since we get in in reverse endian on LE + * systems, save it byte reversed and fetch it back in host endian. + */ + li r3, HSTATE_SAVED_XIRR + STWX_BE r0, r3, r13 +#ifdef __LITTLE_ENDIAN__ + lwz r3, HSTATE_SAVED_XIRR(r13) +#else + mr r3, r0 +#endif + rlwinm. r3, r3, 0, 0xffffff sync beq 1f /* if nothing pending in the ICP */ @@ -2370,10 +2399,9 @@ kvmppc_read_intr: li r3, -1 1: blr -42: /* It's not an IPI and it's for the host, stash it in the PACA - * before exit, it will be picked up by the host ICP driver +42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in + * the PACA earlier, it will be picked up by the host ICP driver */ - stw r0, HSTATE_SAVED_XIRR(r13) li r3, 1 b 1b @@ -2408,11 +2436,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r3,VCPU_FPRS - bl .store_fp_state + bl store_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .store_vr_state + bl store_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE @@ -2444,11 +2472,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) mtmsrd r8 isync addi r3,r4,VCPU_FPRS - bl .load_fp_state + bl load_fp_state #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION addi r3,r31,VCPU_VRS - bl .load_vr_state + bl load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif lwz r7,VCPU_VRSAVE(r31) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e381dc7..d044b8b7c69d 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include <asm/exception-64s.h> #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 6c8011fd57e6..bfb8035314e3 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); + u32 inst; enum emulation_result emulated = EMULATE_DONE; + int ax_rd, ax_ra, ax_rb, ax_rc; + short full_d; + u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c; - int ax_rd = inst_get_field(inst, 6, 10); - int ax_ra = inst_get_field(inst, 11, 15); - int ax_rb = inst_get_field(inst, 16, 20); - int ax_rc = inst_get_field(inst, 21, 25); - short full_d = inst_get_field(inst, 16, 31); - - u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); - u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); - u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); - u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); - - bool rcomp = (inst & 1) ? true : false; - u32 cr = kvmppc_get_cr(vcpu); + bool rcomp; + u32 cr; #ifdef DEBUG int i; #endif + emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ax_rd = inst_get_field(inst, 6, 10); + ax_ra = inst_get_field(inst, 11, 15); + ax_rb = inst_get_field(inst, 16, 20); + ax_rc = inst_get_field(inst, 21, 25); + full_d = inst_get_field(inst, 16, 31); + + fpr_d = &VCPU_FPR(vcpu, ax_rd); + fpr_a = &VCPU_FPR(vcpu, ax_ra); + fpr_b = &VCPU_FPR(vcpu, ax_rb); + fpr_c = &VCPU_FPR(vcpu, ax_rc); + + rcomp = (inst & 1) ? true : false; + cr = kvmppc_get_cr(vcpu); + if (!kvmppc_inst_is_paired_single(vcpu, inst)) return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8eef1e519077..faffb27badd9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define HW_PAGE_SIZE PAGE_SIZE #endif +static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + return (msr & (MSR_IR|MSR_DR)) == MSR_DR; +} + +static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) +{ + ulong msr = kvmppc_get_msr(vcpu); + ulong pc = kvmppc_get_pc(vcpu); + + /* We are in DR only split real mode */ + if ((msr & (MSR_IR|MSR_DR)) != MSR_DR) + return; + + /* We have not fixed up the guest already */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) + return; + + /* The code is in fixupable address space */ + if (pc & SPLIT_HACK_MASK) + return; + + vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK; + kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); +} + +void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -71,10 +100,19 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->in_use = 0; svcpu_put(svcpu); #endif + + /* Disable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; #endif + + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); } static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) @@ -89,8 +127,17 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif + if (kvmppc_is_split_real(vcpu)) + kvmppc_unfixup_split_real(vcpu); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + vcpu->cpu = -1; } @@ -120,6 +167,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, #ifdef CONFIG_PPC_BOOK3S_64 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; #endif + /* + * Now also save the current time base value. We use this + * to find the guest purr and spurr value. + */ + vcpu->arch.entry_tb = get_tb(); + vcpu->arch.entry_vtb = get_vtb(); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.entry_ic = mfspr(SPRN_IC); svcpu->in_use = true; } @@ -166,6 +221,14 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, #ifdef CONFIG_PPC_BOOK3S_64 vcpu->arch.shadow_fscr = svcpu->shadow_fscr; #endif + /* + * Update purr and spurr using time base on exit. + */ + vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; + vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; out: @@ -294,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) } } + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + else + kvmppc_unfixup_split_real(vcpu); + if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { kvmppc_mmu_flush_segments(vcpu); @@ -443,19 +511,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) +static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; if (!(kvmppc_get_msr(vcpu) & MSR_SF)) mp_pa = (uint32_t)mp_pa; - if (unlikely(mp_pa) && - unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { + gpa &= ~0xFFFULL; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { return 1; } - return kvm_is_visible_gfn(vcpu->kvm, gfn); + return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); } int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -494,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); break; case MSR_DR: + if (!data && + (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) && + ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)) + pte.raddr &= ~SPLIT_HACK_MASK; + /* fall through */ case MSR_IR: vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); @@ -541,7 +614,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); } else if (!is_mmio && - kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { + kvmppc_visible_gpa(vcpu, pte.raddr)) { if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { /* * There is already a host HPTE there, presumably @@ -637,42 +710,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) #endif } -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = kvmppc_get_pc(vcpu); - u32 last_inst = kvmppc_get_last_inst(vcpu); - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); - if (ret == -ENOENT) { - ulong msr = kvmppc_get_msr(vcpu); - - msr = kvmppc_set_field(msr, 33, 33, 1); - msr = kvmppc_set_field(msr, 34, 36, 0); - msr = kvmppc_set_field(msr, 42, 47, 0); - kvmppc_set_msr_fast(vcpu, msr); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) @@ -834,6 +871,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) return RESUME_GUEST; } + +void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) +{ + if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { + /* TAR got dropped, drop it in shadow too */ + kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + } + vcpu->arch.fscr = fscr; +} #endif int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -858,6 +904,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ulong shadow_srr1 = vcpu->arch.shadow_srr1; vcpu->stat.pf_instruc++; + if (kvmppc_is_split_real(vcpu)) + kvmppc_fixup_split_real(vcpu); + #ifdef CONFIG_PPC_BOOK3S_32 /* We set segments as unused segments when invalidating them. So * treat the respective fault as segment fault. */ @@ -960,6 +1009,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_DECREMENTER: case BOOK3S_INTERRUPT_HV_DECREMENTER: case BOOK3S_INTERRUPT_DOORBELL: + case BOOK3S_INTERRUPT_H_DOORBELL: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; @@ -977,15 +1027,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result er; ulong flags; + u32 last_inst; + int emul; program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + if (emul != EMULATE_DONE) { + r = RESUME_GUEST; + break; + } + if (kvmppc_get_msr(vcpu) & MSR_PR) { #ifdef EXIT_DEBUG - printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n", + kvmppc_get_pc(vcpu), last_inst); #endif - if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != + if ((last_inst & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) { kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; @@ -1004,7 +1063,7 @@ program_interrupt: break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); + __func__, kvmppc_get_pc(vcpu), last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; @@ -1021,8 +1080,23 @@ program_interrupt: break; } case BOOK3S_INTERRUPT_SYSCALL: + { + u32 last_sc; + int emul; + + /* Get last sc for papr */ + if (vcpu->arch.papr_enabled) { + /* The sc instuction points SRR0 to the next inst */ + emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc); + if (emul != EMULATE_DONE) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4); + r = RESUME_GUEST; + break; + } + } + if (vcpu->arch.papr_enabled && - (kvmppc_get_last_sc(vcpu) == 0x44000022) && + (last_sc == 0x44000022) && !(kvmppc_get_msr(vcpu) & MSR_PR)) { /* SC 1 papr hypercalls */ ulong cmd = kvmppc_get_gpr(vcpu, 3); @@ -1067,36 +1141,51 @@ program_interrupt: r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_FP_UNAVAIL: case BOOK3S_INTERRUPT_ALTIVEC: case BOOK3S_INTERRUPT_VSX: { int ext_msr = 0; + int emul; + u32 last_inst; + + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) { + /* Do paired single instruction emulation */ + emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &last_inst); + if (emul == EMULATE_DONE) + goto program_interrupt; + else + r = RESUME_GUEST; - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + break; } - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + /* Enable external provider */ + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: + ext_msr = MSR_FP; break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; + + case BOOK3S_INTERRUPT_ALTIVEC: + ext_msr = MSR_VEC; break; - default: - /* nothing to worry about - go again */ + + case BOOK3S_INTERRUPT_VSX: + ext_msr = MSR_VSX; break; } + + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); break; } case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - u32 last_inst = kvmppc_get_last_inst(vcpu); + { + u32 last_inst; + int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); + + if (emul == EMULATE_DONE) { u32 dsisr; u64 dar; @@ -1110,6 +1199,7 @@ program_interrupt: } r = RESUME_GUEST; break; + } #ifdef CONFIG_PPC_BOOK3S_64 case BOOK3S_INTERRUPT_FAC_UNAVAIL: kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); @@ -1233,6 +1323,7 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, to_book3s(vcpu)->hior); break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: /* * We are only interested in the LPCR_ILE bit */ @@ -1268,6 +1359,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior_explicit = true; break; case KVM_REG_PPC_LPCR: + case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); break; default: @@ -1310,8 +1402,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, p = __get_free_page(GFP_KERNEL|__GFP_ZERO); if (!p) goto uninit_vcpu; - /* the real shared page fills the last 4k of our page */ - vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); + vcpu->arch.shared = (void *)p; #ifdef CONFIG_PPC_BOOK3S_64 /* Always start the shared struct in native endian mode */ #ifdef __BIG_ENDIAN__ @@ -1568,6 +1659,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) { mutex_init(&kvm->arch.hpt_mutex); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Start out with the default set of hcalls enabled */ + kvmppc_pr_init_default_hcalls(kvm); +#endif + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) @@ -1636,6 +1732,9 @@ static struct kvmppc_ops kvm_ops_pr = { .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, .fast_vcpu_kick = kvm_vcpu_kick, .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, +#ifdef CONFIG_PPC_BOOK3S_64 + .hcall_implemented = kvmppc_hcall_impl_pr, +#endif }; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 52a63bfe3f07..ce3c893d509b 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) { long flags = kvmppc_get_gpr(vcpu, 4); long pte_index = kvmppc_get_gpr(vcpu, 5); - unsigned long pteg[2 * 8]; - unsigned long pteg_addr, i, *hpte; + __be64 pteg[2 * 8]; + __be64 *hpte; + unsigned long pteg_addr, i; long int ret; i = pte_index & 7; @@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); /* tsl = AVPN */ flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; @@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); copy_from_user(pte, (void __user *)pteg, sizeof(pte)); - pte[0] = be64_to_cpu(pte[0]); - pte[1] = be64_to_cpu(pte[1]); + pte[0] = be64_to_cpu((__force __be64)pte[0]); + pte[1] = be64_to_cpu((__force __be64)pte[1]); ret = H_NOT_FOUND; if ((pte[0] & HPTE_V_VALID) == 0 || @@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) rb = compute_tlbie_rb(v, r, pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); - pte[0] = cpu_to_be64(pte[0]); - pte[1] = cpu_to_be64(pte[1]); + pte[0] = (__force u64)cpu_to_be64(pte[0]); + pte[1] = (__force u64)cpu_to_be64(pte[1]); copy_to_user((void __user *)pteg, pte, sizeof(pte)); ret = H_SUCCESS; @@ -266,6 +267,12 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + int rc, idx; + + if (cmd <= MAX_HCALL_OPCODE && + !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) + return EMULATE_FAIL; + switch (cmd) { case H_ENTER: return kvmppc_h_pr_enter(vcpu); @@ -294,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) - return RESUME_HOST; - if (kvmppc_rtas_hcall(vcpu)) + break; + idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvmppc_rtas_hcall(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (rc) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; @@ -303,3 +313,61 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } + +int kvmppc_hcall_impl_pr(unsigned long cmd) +{ + switch (cmd) { + case H_ENTER: + case H_REMOVE: + case H_PROTECT: + case H_BULK_REMOVE: + case H_PUT_TCE: + case H_CEDE: +#ifdef CONFIG_KVM_XICS + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + case H_IPOLL: + case H_XIRR_X: +#endif + return 1; + } + return 0; +} + +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_ENTER, + H_REMOVE, + H_PROTECT, + H_BULK_REMOVE, + H_PUT_TCE, + H_CEDE, +#ifdef CONFIG_KVM_XICS + H_XIRR, + H_CPPR, + H_EOI, + H_IPI, + H_IPOLL, + H_XIRR_X, +#endif + 0 +}; + +void kvmppc_pr_init_default_hcalls(struct kvm *kvm) +{ + int i; + unsigned int hcall; + + for (i = 0; default_hcall_list[i]; ++i) { + hcall = default_hcall_list[i]; + WARN_ON(!kvmppc_hcall_impl_pr(hcall)); + __set_bit(hcall / 4, kvm->arch.enabled_hcalls); + } +} diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675220e6..16c4d88ba27d 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) @@ -146,7 +150,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index edb14ba992b3..ef27fbd5d9c5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 3 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; - server = args->args[1]; - priority = args->args[2]; + irq = be32_to_cpu(args->args[0]); + server = be32_to_cpu(args->args[1]); + priority = be32_to_cpu(args->args[2]); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 1 || args->nret != 3) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); server = priority = 0; rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) goto out; } - args->rets[1] = server; - args->rets[2] = priority; + args->rets[1] = cpu_to_be32(server); + args->rets[2] = cpu_to_be32(priority); out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_off(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_on(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } #endif /* CONFIG_KVM_XICS */ @@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) return rc; } -static void kvmppc_rtas_swap_endian_in(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - args->token = be32_to_cpu(args->token); - args->nargs = be32_to_cpu(args->nargs); - args->nret = be32_to_cpu(args->nret); - for (i = 0; i < args->nargs; i++) - args->args[i] = be32_to_cpu(args->args[i]); -#endif -} - -static void kvmppc_rtas_swap_endian_out(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - for (i = 0; i < args->nret; i++) - args->args[i] = cpu_to_be32(args->args[i]); - args->token = cpu_to_be32(args->token); - args->nargs = cpu_to_be32(args->nargs); - args->nret = cpu_to_be32(args->nret); -#endif -} - int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) { struct rtas_token_definition *d; @@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc) goto fail; - kvmppc_rtas_swap_endian_in(&args); - /* * args->rets is a pointer into args->args. Now that we've * copied args we need to fix it up to point into our copy, @@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; - args.rets = &args.args[args.nargs]; + args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { - if (d->token == args.token) { + if (d->token == be32_to_cpu(args.token)) { d->handler->handler(vcpu, &args); rc = 0; break; @@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc == 0) { args.rets = orig_rets; - kvmppc_rtas_swap_endian_out(&args); rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) goto fail; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d1acd32a64c0..eaeb78047fb8 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -64,8 +64,12 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); -static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, - bool report_status) +/* + * Return value ideally indicates how the interrupt was handled, but no + * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS), + * so just return 0. + */ +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) { struct ics_irq_state *state; struct kvmppc_ics *ics; @@ -82,17 +86,14 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, if (!state->exists) return -EINVAL; - if (report_status) - return state->asserted; - /* * We set state->asserted locklessly. This should be fine as * we are the only setter, thus concurrent access is undefined * to begin with. */ - if (level == KVM_INTERRUPT_SET_LEVEL) + if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL) state->asserted = 1; - else if (level == KVM_INTERRUPT_UNSET) { + else if (level == 0 || level == KVM_INTERRUPT_UNSET) { state->asserted = 0; return 0; } @@ -100,7 +101,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); - return state->asserted; + return 0; } static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, @@ -772,6 +773,8 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) if (state->asserted) icp_deliver_irq(xics, icp, irq); + kvm_notify_acked_irq(vcpu->kvm, 0, irq); + return H_SUCCESS; } @@ -789,6 +792,8 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) icp_check_resend(xics, icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); + if (icp->rm_action & XICS_RM_NOTIFY_EOI) + kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq); icp->rm_action = 0; @@ -1170,7 +1175,16 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvmppc_xics *xics = kvm->arch.xics; - return ics_deliver_irq(xics, irq, level, line_status); + return ics_deliver_irq(xics, irq, level); +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi, + level, line_status); } static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) @@ -1301,3 +1315,26 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) vcpu->arch.icp = NULL; vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; } + +static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status); +} + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) +{ + entries->gsi = gsi; + entries->type = KVM_IRQ_ROUTING_IRQCHIP; + entries->set = xics_set_irq; + entries->irqchip.irqchip = 0; + entries->irqchip.pin = gsi; + return 1; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index dd9326c5c19b..e8aaa7a3f209 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -71,9 +71,11 @@ struct kvmppc_icp { #define XICS_RM_KICK_VCPU 0x1 #define XICS_RM_CHECK_RESEND 0x2 #define XICS_RM_REJECT 0x4 +#define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; u32 rm_reject; + u32 rm_eoied_irq; /* Debug stuff for real mode */ union kvmppc_icp_state rm_dbgstate; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ab62109fdfa3..b4c89fa6f109 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers; struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio", VCPU_STAT(mmio_exits) }, - { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, @@ -185,24 +184,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, set_bit(priority, &vcpu->arch.pending_exceptions); } -static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); } -static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, - ulong dear_flags, ulong esr_flags) +void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, + ulong dear_flags, ulong esr_flags) { vcpu->arch.queued_dear = dear_flags; vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); } -static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, - ulong esr_flags) +void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); +} + +void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); @@ -266,13 +269,8 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GSRR0, srr0); - mtspr(SPRN_GSRR1, srr1); -#else - vcpu->arch.shared->srr0 = srr0; - vcpu->arch.shared->srr1 = srr1; -#endif + kvmppc_set_srr0(vcpu, srr0); + kvmppc_set_srr1(vcpu, srr1); } static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) @@ -297,51 +295,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) vcpu->arch.mcsrr1 = srr1; } -static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GDEAR); -#else - return vcpu->arch.shared->dar; -#endif -} - -static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GDEAR, dear); -#else - vcpu->arch.shared->dar = dear; -#endif -} - -static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GESR); -#else - return vcpu->arch.shared->esr; -#endif -} - -static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) -{ -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_GESR, esr); -#else - vcpu->arch.shared->esr = esr; -#endif -} - -static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_KVM_BOOKE_HV - return mfspr(SPRN_GEPR); -#else - return vcpu->arch.epr; -#endif -} - /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -450,9 +403,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - set_guest_esr(vcpu, vcpu->arch.queued_esr); + kvmppc_set_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - set_guest_dear(vcpu, vcpu->arch.queued_dear); + kvmppc_set_dar(vcpu, vcpu->arch.queued_dear); if (update_epr == true) { if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); @@ -752,9 +705,8 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) * they were actually modified by emulation. */ return RESUME_GUEST_NV; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - return RESUME_HOST; + case EMULATE_AGAIN: + return RESUME_GUEST; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", @@ -866,6 +818,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, } } +static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + enum emulation_result emulated, u32 last_inst) +{ + switch (emulated) { + case EMULATE_AGAIN: + return RESUME_GUEST; + + case EMULATE_FAIL: + pr_debug("%s: load instruction from guest address %lx failed\n", + __func__, vcpu->arch.pc); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -877,6 +851,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, int r = RESUME_HOST; int s; int idx; + u32 last_inst = KVM_INST_FETCH_FAILED; + enum emulation_result emulated = EMULATE_DONE; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -884,6 +860,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); + /* + * get last instruction before beeing preempted + * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA + */ + switch (exit_nr) { + case BOOKE_INTERRUPT_DATA_STORAGE: + case BOOKE_INTERRUPT_DTLB_MISS: + case BOOKE_INTERRUPT_HV_PRIV: + emulated = kvmppc_get_last_inst(vcpu, false, &last_inst); + break; + default: + break; + } + local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); @@ -892,6 +882,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + if (emulated != EMULATE_DONE) { + r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst); + goto out; + } + switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); @@ -1181,6 +1176,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } +out: /* * To avoid clobbering exit_reason, only check for signals if we * aren't already exiting to userspace for some other reason. @@ -1265,17 +1261,17 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->lr = vcpu->arch.lr; regs->xer = kvmppc_get_xer(vcpu); regs->msr = vcpu->arch.shared->msr; - regs->srr0 = vcpu->arch.shared->srr0; - regs->srr1 = vcpu->arch.shared->srr1; + regs->srr0 = kvmppc_get_srr0(vcpu); + regs->srr1 = kvmppc_get_srr1(vcpu); regs->pid = vcpu->arch.pid; - regs->sprg0 = vcpu->arch.shared->sprg0; - regs->sprg1 = vcpu->arch.shared->sprg1; - regs->sprg2 = vcpu->arch.shared->sprg2; - regs->sprg3 = vcpu->arch.shared->sprg3; - regs->sprg4 = vcpu->arch.shared->sprg4; - regs->sprg5 = vcpu->arch.shared->sprg5; - regs->sprg6 = vcpu->arch.shared->sprg6; - regs->sprg7 = vcpu->arch.shared->sprg7; + regs->sprg0 = kvmppc_get_sprg0(vcpu); + regs->sprg1 = kvmppc_get_sprg1(vcpu); + regs->sprg2 = kvmppc_get_sprg2(vcpu); + regs->sprg3 = kvmppc_get_sprg3(vcpu); + regs->sprg4 = kvmppc_get_sprg4(vcpu); + regs->sprg5 = kvmppc_get_sprg5(vcpu); + regs->sprg6 = kvmppc_get_sprg6(vcpu); + regs->sprg7 = kvmppc_get_sprg7(vcpu); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); @@ -1293,17 +1289,17 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.lr = regs->lr; kvmppc_set_xer(vcpu, regs->xer); kvmppc_set_msr(vcpu, regs->msr); - vcpu->arch.shared->srr0 = regs->srr0; - vcpu->arch.shared->srr1 = regs->srr1; + kvmppc_set_srr0(vcpu, regs->srr0); + kvmppc_set_srr1(vcpu, regs->srr1); kvmppc_set_pid(vcpu, regs->pid); - vcpu->arch.shared->sprg0 = regs->sprg0; - vcpu->arch.shared->sprg1 = regs->sprg1; - vcpu->arch.shared->sprg2 = regs->sprg2; - vcpu->arch.shared->sprg3 = regs->sprg3; - vcpu->arch.shared->sprg4 = regs->sprg4; - vcpu->arch.shared->sprg5 = regs->sprg5; - vcpu->arch.shared->sprg6 = regs->sprg6; - vcpu->arch.shared->sprg7 = regs->sprg7; + kvmppc_set_sprg0(vcpu, regs->sprg0); + kvmppc_set_sprg1(vcpu, regs->sprg1); + kvmppc_set_sprg2(vcpu, regs->sprg2); + kvmppc_set_sprg3(vcpu, regs->sprg3); + kvmppc_set_sprg4(vcpu, regs->sprg4); + kvmppc_set_sprg5(vcpu, regs->sprg5); + kvmppc_set_sprg6(vcpu, regs->sprg6); + kvmppc_set_sprg7(vcpu, regs->sprg7); for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); @@ -1321,8 +1317,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = get_guest_esr(vcpu); - sregs->u.e.dear = get_guest_dear(vcpu); + sregs->u.e.esr = kvmppc_get_esr(vcpu); + sregs->u.e.dear = kvmppc_get_dar(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -1339,8 +1335,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - set_guest_esr(vcpu, sregs->u.e.esr); - set_guest_dear(vcpu, sregs->u.e.dear); + kvmppc_set_esr(vcpu, sregs->u.e.esr); + kvmppc_set_dar(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); @@ -1493,7 +1489,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); break; case KVM_REG_PPC_EPR: { - u32 epr = get_guest_epr(vcpu); + u32 epr = kvmppc_get_epr(vcpu); val = get_reg_val(reg->id, epr); break; } @@ -1788,6 +1784,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) #endif } +int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid, + enum xlate_readwrite xlrw, struct kvmppc_pte *pte) +{ + int gtlb_index; + gpa_t gpaddr; + +#ifdef CONFIG_KVM_E500V2 + if (!(vcpu->arch.shared->msr & MSR_PR) && + (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { + pte->eaddr = eaddr; + pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) | + (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; + } +#endif + + /* Check the guest TLB. */ + switch (xlid) { + case XLATE_INST: + gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr); + break; + case XLATE_DATA: + gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); + break; + default: + BUG(); + } + + /* Do we have a TLB entry at all? */ + if (gtlb_index < 0) + return -ENOENT; + + gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr); + + pte->eaddr = eaddr; + pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK); + pte->vpage = eaddr >> PAGE_SHIFT; + + /* XXX read permissions from the guest TLB */ + pte->may_read = true; + pte->may_write = true; + pte->may_execute = true; + + return 0; +} + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index b632cd35919b..f753543c56fa 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -99,13 +99,6 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); -extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); -extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int inst, int *advance); -extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong spr_val); -extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, - ulong *spr_val); extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 27a4b2877c10..28c158881d23 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -165,16 +165,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) * guest (PR-mode only). */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; + kvmppc_set_sprg4(vcpu, spr_val); break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; + kvmppc_set_sprg5(vcpu, spr_val); break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; + kvmppc_set_sprg6(vcpu, spr_val); break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; + kvmppc_set_sprg7(vcpu, spr_val); break; case SPRN_IVPR: diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2c6deb5ef2fe..84c308a9a371 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -21,7 +21,6 @@ #include <asm/ppc_asm.h> #include <asm/kvm_asm.h> #include <asm/reg.h> -#include <asm/mmu-44x.h> #include <asm/page.h> #include <asm/asm-offsets.h> @@ -424,10 +423,6 @@ lightweight_exit: mtspr SPRN_PID1, r3 #endif -#ifdef CONFIG_44x - iccci 0, 0 /* XXX hack */ -#endif - /* Load some guest volatiles. */ lwz r0, VCPU_GPR(R0)(r4) lwz r2, VCPU_GPR(R2)(r4) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index a1712b818a5f..e9fa56a911fd 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -24,12 +24,10 @@ #include <asm/ppc_asm.h> #include <asm/kvm_asm.h> #include <asm/reg.h> -#include <asm/mmu-44x.h> #include <asm/page.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> #include <asm/bitsperlong.h> -#include <asm/thread_info.h> #ifdef CONFIG_64BIT #include <asm/exception-64e.h> @@ -122,38 +120,14 @@ 1: .if \flags & NEED_EMU - /* - * This assumes you have external PID support. - * To support a bookehv CPU without external PID, you'll - * need to look up the TLB entry and create a temporary mapping. - * - * FIXME: we don't currently handle if the lwepx faults. PR-mode - * booke doesn't handle it either. Since Linux doesn't use - * broadcast tlbivax anymore, the only way this should happen is - * if the guest maps its memory execute-but-not-read, or if we - * somehow take a TLB miss in the middle of this entry code and - * evict the relevant entry. On e500mc, all kernel lowmem is - * bolted into TLB1 large page mappings, and we don't use - * broadcast invalidates, so we should not take a TLB miss here. - * - * Later we'll need to deal with faults here. Disallowing guest - * mappings that are execute-but-not-read could be an option on - * e500mc, but not on chips with an LRAT if it is used. - */ - - mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ PPC_STL r15, VCPU_GPR(R15)(r4) PPC_STL r16, VCPU_GPR(R16)(r4) PPC_STL r17, VCPU_GPR(R17)(r4) PPC_STL r18, VCPU_GPR(R18)(r4) PPC_STL r19, VCPU_GPR(R19)(r4) - mr r8, r3 PPC_STL r20, VCPU_GPR(R20)(r4) - rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS PPC_STL r21, VCPU_GPR(R21)(r4) - rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR PPC_STL r22, VCPU_GPR(R22)(r4) - rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID PPC_STL r23, VCPU_GPR(R23)(r4) PPC_STL r24, VCPU_GPR(R24)(r4) PPC_STL r25, VCPU_GPR(R25)(r4) @@ -163,33 +137,15 @@ PPC_STL r29, VCPU_GPR(R29)(r4) PPC_STL r30, VCPU_GPR(R30)(r4) PPC_STL r31, VCPU_GPR(R31)(r4) - mtspr SPRN_EPLC, r8 - - /* disable preemption, so we are sure we hit the fixup handler */ - CURRENT_THREAD_INFO(r8, r1) - li r7, 1 - stw r7, TI_PREEMPT(r8) - - isync /* - * In case the read goes wrong, we catch it and write an invalid value - * in LAST_INST instead. + * We don't use external PID support. lwepx faults would need to be + * handled by KVM and this implies aditional code in DO_KVM (for + * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which + * is too intrusive for the host. Get last instuction in + * kvmppc_get_last_inst(). */ -1: lwepx r9, 0, r5 -2: -.section .fixup, "ax" -3: li r9, KVM_INST_FETCH_FAILED - b 2b -.previous -.section __ex_table,"a" - PPC_LONG_ALIGN - PPC_LONG 1b,3b -.previous - - mtspr SPRN_EPLC, r3 - li r7, 0 - stw r7, TI_PREEMPT(r8) + li r9, KVM_INST_FETCH_FAILED stw r9, VCPU_LAST_INST(r4) .endif @@ -441,6 +397,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT PPC_LL r3, PACA_SPRG_VDSO(r13) #endif + mfspr r5, SPRN_SPRG9 PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) @@ -448,6 +405,7 @@ _GLOBAL(kvmppc_resume_host) #ifdef CONFIG_64BIT mtspr SPRN_SPRG_VDSO_WRITE, r3 #endif + PPC_STD(r5, VCPU_SPRG9, r4) PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 PPC_STD(r9, VCPU_SHARED_SPRG7, r11) @@ -682,7 +640,9 @@ lightweight_exit: mtspr SPRN_SPRG5W, r6 PPC_LD(r8, VCPU_SHARED_SPRG7, r11) mtspr SPRN_SPRG6W, r7 + PPC_LD(r5, VCPU_SPRG9, r4) mtspr SPRN_SPRG7W, r8 + mtspr SPRN_SPRG9, r5 /* Load some guest volatiles. */ PPC_LL r3, VCPU_LR(r4) diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 002d51764143..c99c40e9182a 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -250,6 +250,14 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va spr_val); break; + case SPRN_PWRMGTCR0: + /* + * Guest relies on host power management configurations + * Treat the request as a general store + */ + vcpu->arch.pwrmgtcr0 = spr_val; + break; + /* extra exceptions */ case SPRN_IVOR32: vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; @@ -368,6 +376,10 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v *spr_val = vcpu->arch.eptcfg; break; + case SPRN_PWRMGTCR0: + *spr_val = vcpu->arch.pwrmgtcr0; + break; + /* extra exceptions */ case SPRN_IVOR32: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03f406f..08f14bb57897 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -107,11 +107,15 @@ static u32 get_host_mas0(unsigned long eaddr) { unsigned long flags; u32 mas0; + u32 mas4; local_irq_save(flags); mtspr(SPRN_MAS6, 0); + mas4 = mfspr(SPRN_MAS4); + mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK); asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); mas0 = mfspr(SPRN_MAS0); + mtspr(SPRN_MAS4, mas4); local_irq_restore(flags); return mas0; @@ -473,7 +477,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (printk_ratelimit()) pr_err("%s: pte not present: gfn %lx, pfn %lx\n", __func__, (long)gfn, pfn); - return -EINVAL; + ret = -EINVAL; + goto out; } kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); @@ -606,6 +611,104 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, } } +#ifdef CONFIG_KVM_BOOKE_HV +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + gva_t geaddr; + hpa_t addr; + hfn_t pfn; + hva_t eaddr; + u32 mas1, mas2, mas3; + u64 mas7_mas3; + struct page *page; + unsigned int addr_space, psize_shift; + bool pr; + unsigned long flags; + + /* Search TLB for guest pc to get the real address */ + geaddr = kvmppc_get_pc(vcpu); + + addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG; + + local_irq_save(flags); + mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid); + asm volatile("tlbsx 0, %[geaddr]\n" : : + [geaddr] "r" (geaddr)); + mtspr(SPRN_MAS5, 0); + mtspr(SPRN_MAS8, 0); + mas1 = mfspr(SPRN_MAS1); + mas2 = mfspr(SPRN_MAS2); + mas3 = mfspr(SPRN_MAS3); +#ifdef CONFIG_64BIT + mas7_mas3 = mfspr(SPRN_MAS7_MAS3); +#else + mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3; +#endif + local_irq_restore(flags); + + /* + * If the TLB entry for guest pc was evicted, return to the guest. + * There are high chances to find a valid TLB entry next time. + */ + if (!(mas1 & MAS1_VALID)) + return EMULATE_AGAIN; + + /* + * Another thread may rewrite the TLB entry in parallel, don't + * execute from the address if the execute permission is not set + */ + pr = vcpu->arch.shared->msr & MSR_PR; + if (unlikely((pr && !(mas3 & MAS3_UX)) || + (!pr && !(mas3 & MAS3_SX)))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx without execute permission\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* + * The real address will be mapped by a cacheable, memory coherent, + * write-back page. Check for mismatches when LRAT is used. + */ + if (has_feature(vcpu, VCPU_FTR_MMU_V2) && + unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) { + pr_err_ratelimited( + "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n", + __func__, geaddr); + return EMULATE_AGAIN; + } + + /* Get pfn */ + psize_shift = MAS1_GET_TSIZE(mas1) + 10; + addr = (mas7_mas3 & (~0ULL << psize_shift)) | + (geaddr & ((1ULL << psize_shift) - 1ULL)); + pfn = addr >> PAGE_SHIFT; + + /* Guard against emulation from devices area */ + if (unlikely(!page_is_ram(pfn))) { + pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n", + __func__, addr); + return EMULATE_AGAIN; + } + + /* Map a page and get guest's instruction */ + page = pfn_to_page(pfn); + eaddr = (unsigned long)kmap_atomic(page); + *instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK)); + kunmap_atomic((u32 *)eaddr); + + return EMULATE_DONE; +} +#else +int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, + u32 *instr) +{ + return EMULATE_AGAIN; +} +#endif + /************* MMU Notifiers *************/ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 17e456279224..164bad2a19bf 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } -static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); +static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid); static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) { @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_on_cpu) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_on_cpu) = vcpu; + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; } kvmppc_load_guest_fp(vcpu); @@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + *val = get_reg_val(id, vcpu->arch.sprg9); + break; + default: + r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + } + return r; } static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + int r = 0; + + switch (id) { + case KVM_REG_PPC_SPRG9: + vcpu->arch.sprg9 = set_reg_val(id, *val); + break; + default: + r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + } + return r; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index da86d9ba3476..e96b50d0bdab 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -207,36 +207,28 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } -/* XXX to do: - * lhax - * lhaux - * lswx - * lswi - * stswx - * stswi - * lha - * lhau - * lmw - * stmw - * - */ /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { - u32 inst = kvmppc_get_last_inst(vcpu); - int ra = get_ra(inst); - int rs = get_rs(inst); - int rt = get_rt(inst); - int sprn = get_sprn(inst); - enum emulation_result emulated = EMULATE_DONE; + u32 inst; + int rs, rt, sprn; + enum emulation_result emulated; int advance = 1; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); + rs = get_rs(inst); + rt = get_rt(inst); + sprn = get_sprn(inst); + switch (get_op(inst)) { case OP_TRAP: #ifdef CONFIG_PPC_BOOK3S @@ -264,200 +256,24 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) #endif advance = 0; break; - case OP_31_XOP_LWZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - case OP_31_XOP_LBZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_31_XOP_LBZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_STWX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - case OP_31_XOP_STBX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_31_XOP_STBUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_31_XOP_LHAX: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_31_XOP_LHZUX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; case OP_31_XOP_MFSPR: emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); break; - case OP_31_XOP_STHX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_31_XOP_STHUX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - case OP_31_XOP_MTSPR: emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; - case OP_31_XOP_DCBST: - case OP_31_XOP_DCBF: - case OP_31_XOP_DCBI: - /* Do nothing. The guest is performing dcbi because - * hardware DMA is not snooped by the dcache, but - * emulated DMA either goes through the dcache as - * normal writes, or the host kernel has handled dcache - * coherence. */ - break; - - case OP_31_XOP_LWBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); - break; - case OP_31_XOP_TLBSYNC: break; - case OP_31_XOP_STWBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 0); - break; - - case OP_31_XOP_LHBRX: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); - break; - - case OP_31_XOP_STHBRX: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 0); - break; - default: /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; } break; - case OP_LWZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - break; - - /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ - case OP_LD: - rt = get_rt(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); - break; - - case OP_LWZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LBZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - break; - - case OP_LBZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STW: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - break; - - /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ - case OP_STD: - rs = get_rs(inst); - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 8, 1); - break; - - case OP_STWU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STB: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - break; - - case OP_STBU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHZ: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - break; - - case OP_LHZU: - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_LHA: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - - case OP_STH: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - break; - - case OP_STHU: - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); - break; - default: emulated = EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c new file mode 100644 index 000000000000..0de4ffa175a9 --- /dev/null +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -0,0 +1,272 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * Copyright 2011 Freescale Semiconductor, Inc. + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/jiffies.h> +#include <linux/hrtimer.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/kvm_host.h> +#include <linux/clockchips.h> + +#include <asm/reg.h> +#include <asm/time.h> +#include <asm/byteorder.h> +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/ppc-opcode.h> +#include "timing.h" +#include "trace.h" + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + */ +int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 inst; + int ra, rs, rt; + enum emulation_result emulated; + int advance = 1; + + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + + emulated = kvmppc_get_last_inst(vcpu, false, &inst); + if (emulated != EMULATE_DONE) + return emulated; + + ra = get_ra(inst); + rs = get_rs(inst); + rt = get_rt(inst); + + switch (get_op(inst)) { + case 31: + switch (get_xop(inst)) { + case OP_31_XOP_LWZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LBZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_31_XOP_LBZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STWX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + case OP_31_XOP_STBX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_31_XOP_STBUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LHAX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_31_XOP_LHZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STHX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_31_XOP_STHUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_DCBST: + case OP_31_XOP_DCBF: + case OP_31_XOP_DCBI: + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case OP_31_XOP_LWBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case OP_31_XOP_STWBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 0); + break; + + case OP_31_XOP_LHBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case OP_31_XOP_STHBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 0); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case OP_LWZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ + case OP_LD: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + + case OP_LWZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LBZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case OP_LBZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STW: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + break; + + /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ + case OP_STD: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 8, 1); + break; + + case OP_STWU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STB: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + break; + + case OP_STBU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 1, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHZ: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case OP_LHZU: + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LHA: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STH: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + break; + + case OP_STHU: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + default: + emulated = EMULATE_FAIL; + break; + } + + if (emulated == EMULATE_FAIL) { + advance = 0; + kvmppc_core_queue_program(vcpu, 0); + } + + trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); + + /* Advance past emulated instruction. */ + if (advance) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + + return emulated; +} diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b68d0dc9479a..39b3a8f816f2 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1826,8 +1826,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return 0; } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -1839,7 +1838,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin; if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 61c738ab1283..4c79284b58be 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) vcpu->arch.magic_page_pa = param1 & ~0xfffULL; vcpu->arch.magic_page_ea = param2 & ~0xfffULL; +#ifdef CONFIG_PPC_64K_PAGES + /* + * Make sure our 4k magic page is in the same window of a 64k + * page within the guest and within the host's page. + */ + if ((vcpu->arch.magic_page_pa & 0xf000) != + ((ulong)vcpu->arch.shared & 0xf000)) { + void *old_shared = vcpu->arch.shared; + ulong shared = (ulong)vcpu->arch.shared; + void *new_shared; + + shared &= PAGE_MASK; + shared |= vcpu->arch.magic_page_pa & 0xf000; + new_shared = (void*)shared; + memcpy(new_shared, old_shared, 0x1000); + vcpu->arch.shared = new_shared; + } +#endif + r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; r = EV_SUCCESS; @@ -198,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case KVM_HCALL_TOKEN(KVM_HC_FEATURES): r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) - /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif @@ -254,13 +272,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) enum emulation_result er; int r; - er = kvmppc_emulate_instruction(run, vcpu); + er = kvmppc_emulate_loadstore(vcpu); switch (er) { case EMULATE_DONE: /* Future optimization: only reload non-volatiles if they were * actually modified. */ r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_DO_MMIO: run->exit_reason = KVM_EXIT_MMIO; /* We must reload nonvolatiles because "update" load/store @@ -270,11 +291,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST_NV; break; case EMULATE_FAIL: + { + u32 last_inst; + + kvmppc_get_last_inst(vcpu, false, &last_inst); /* XXX Deliver Program interrupt to guest. */ - printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, - kvmppc_get_last_inst(vcpu)); + pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); r = RESUME_HOST; break; + } default: WARN_ON(1); r = RESUME_GUEST; @@ -284,6 +309,81 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; + struct kvmppc_pte pte; + int r; + + vcpu->stat.st++; + + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_WRITE, &pte); + if (r < 0) + return r; + + *eaddr = pte.raddr; + + if (!pte.may_write) + return -EPERM; + + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(magic, ptr, size); + return EMULATE_DONE; + } + + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; +} +EXPORT_SYMBOL_GPL(kvmppc_st); + +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) +{ + ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; + struct kvmppc_pte pte; + int rc; + + vcpu->stat.ld++; + + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, + XLATE_READ, &pte); + if (rc) + return rc; + + *eaddr = pte.raddr; + + if (!pte.may_read) + return -EPERM; + + if (!data && !pte.may_execute) + return -ENOEXEC; + + /* Magic page override */ + if (kvmppc_supports_magic_page(vcpu) && mp_pa && + ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) && + !(kvmppc_get_msr(vcpu) & MSR_PR)) { + void *magic = vcpu->arch.shared; + magic += pte.eaddr & 0xfff; + memcpy(ptr, magic, size); + return EMULATE_DONE; + } + + if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; + + return EMULATE_DONE; +} +EXPORT_SYMBOL_GPL(kvmppc_ld); + int kvm_arch_hardware_enable(void *garbage) { return 0; @@ -366,14 +466,20 @@ void kvm_arch_sync_events(struct kvm *kvm) { } -int kvm_dev_ioctl_check_extension(long ext) +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - /* FIXME!! - * Should some of this be vm ioctl ? is it possible now ? - */ + /* Assume we're using HV mode when the HV module is loaded */ int hv_enabled = kvmppc_hv_ops ? 1 : 0; + if (kvm) { + /* + * Hooray - we know which VM type we're running on. Depend on + * that rather than the guess above. + */ + hv_enabled = is_kvmppc_hv_enabled(kvm); + } + switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: @@ -387,6 +493,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -417,6 +524,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: + case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif @@ -635,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) -{ - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); -} - static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -837,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; - } else if (vcpu->arch.dcr_needed) { - if (!vcpu->arch.dcr_is_write) - kvmppc_complete_dcr_load(vcpu, run); - vcpu->arch.dcr_needed = 0; } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; @@ -1099,6 +1197,42 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + case KVM_CAP_PPC_ENABLE_HCALL: { + unsigned long hcall = cap->args[0]; + + r = -EINVAL; + if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || + cap->args[1] > 1) + break; + if (!kvmppc_book3s_hcall_implemented(kvm, hcall)) + break; + if (cap->args[1]) + set_bit(hcall / 4, kvm->arch.enabled_hcalls); + else + clear_bit(hcall / 4, kvm->arch.enabled_hcalls); + r = 0; + break; + } +#endif + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1118,6 +1252,15 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; @@ -1204,3 +1347,5 @@ void kvm_arch_exit(void) { } + +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 07b6110a4bb7..e44d2b2ea97e 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", [SIGNAL_EXITS] = "SIGNAL", [ITLB_REAL_MISS_EXITS] = "ITLBREAL", [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index bf191e72b2d8..3123690c82dc 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case EMULATED_INST_EXITS: vcpu->stat.emulated_inst_exits++; break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; case DSI_EXITS: vcpu->stat.dsi_exits++; break; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 59fa2de9546d..9f342f134ae4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ - crtsavres.o + crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_HAS_IOMEM) += devres.o diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 0860ee46013c..f09899e35991 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -461,8 +461,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop - * above (following the .Ldst_aligned label) but it runs slightly - * slower on POWER3. + * above (following the .Ldst_aligned label). */ .Lcopy_page_4K: std r31,-32(1) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee8..7ce3870d7ddd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void test_basic_patching(void) { extern unsigned int ftr_fixup_test1; extern unsigned int end_ftr_fixup_test1; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 0c9c8d7d0734..170a0346f756 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw) void arch_spin_unlock_wait(arch_spinlock_t *lock) { + smp_mb(); + while (lock->slock) { HMT_low(); if (SHARED_PROCESSOR) __spin_yield(lock); } HMT_medium(); + + smp_mb(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 0738f96befbf..43435c6892fb 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,7 +77,7 @@ _GLOBAL(memset) stb r4,0(r6) blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove) cmplw 0,r3,r4 bgt backwards_memcpy b memcpy diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c new file mode 100644 index 000000000000..f993959647b5 --- /dev/null +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -0,0 +1,39 @@ +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/bitops.h> +#include <net/checksum.h> + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(cacheable_memcpy); +EXPORT_SYMBOL(cacheable_memzero); +#endif + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +#ifndef CONFIG_GENERIC_CSUM +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); +#endif + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 412dd46dd0b7..54651fc2d412 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) { /* update forms */ - if ((instr>>26) != 47) /* stmw is not an update form */ - regs->gpr[ra] = ea; - } - } return truncate_if_32bit(regs->msr, ea); } @@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static unsigned long __kprobes xform_ea(unsigned int instr, + struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -611,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static int __kprobes trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; +} + /* * Elements of 32-bit rotate and mask instructions. */ @@ -627,26 +633,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and execute it if that can be done just by + * modifying *regs (i.e. integer arithmetic and logical instructions, + * branches, and barrier instructions). + * Returns 1 if the instruction has been executed, or 0 if not. + * Sets *op to indicate what the instruction does. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; long ival; + op->type = COMPUTE; + opcode = instr >> 26; switch (opcode) { case 16: /* bc */ + op->type = BRANCH; imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; @@ -659,26 +666,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; + if ((instr & 0xfe2) == 2) + op->type = SYSCALL; + else + op->type = UNKNOWN; + return 0; #endif case 18: /* b */ + op->type = BRANCH; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; @@ -691,8 +686,16 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 19: switch ((instr >> 1) & 0x3ff) { + case 0: /* mcrf */ + rd = (instr >> 21) & 0x1c; + ra = (instr >> 16) & 0x1c; + val = (regs->ccr >> ra) & 0xf; + regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + goto instr_done; + case 16: /* bclr */ case 528: /* bcctr */ + op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); imm = truncate_if_32bit(regs->msr, imm); @@ -703,9 +706,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 18: /* rfid, scary */ - return -1; + if (regs->msr & MSR_PR) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ + op->type = BARRIER; isync(); goto instr_done; @@ -731,6 +738,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ @@ -745,6 +753,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 854: /* eieio */ + op->type = BARRIER; eieio(); goto instr_done; } @@ -760,6 +769,17 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) rb = (instr >> 11) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; + case 7: /* mulli */ regs->gpr[rd] = regs->gpr[ra] * (short) instr; goto instr_done; @@ -908,35 +928,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + goto instr_done; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + goto instr_done; +#endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; @@ -954,33 +983,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mfxer */ + case SPRN_XER: /* mfxer */ regs->gpr[rd] = regs->xer; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; - case 0x100: /* mflr */ + case SPRN_LR: /* mflr */ regs->gpr[rd] = regs->link; goto instr_done; - case 0x120: /* mfctr */ + case SPRN_CTR: /* mfctr */ regs->gpr[rd] = regs->ctr; goto instr_done; + default: + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + return 0; } break; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mtxer */ + case SPRN_XER: /* mtxer */ regs->xer = (regs->gpr[rd] & 0xffffffffUL); goto instr_done; - case 0x100: /* mtlr */ + case SPRN_LR: /* mtlr */ regs->link = regs->gpr[rd]; goto instr_done; - case 0x120: /* mtctr */ + case SPRN_CTR: /* mtctr */ regs->ctr = regs->gpr[rd]; goto instr_done; + default: + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + return 0; } break; @@ -1198,7 +1237,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1208,7 +1247,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1216,7 +1255,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) regs->gpr[ra] = regs->gpr[rd] << sh; else @@ -1235,7 +1274,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1246,7 +1285,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1257,294 +1296,242 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(instr, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(instr, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. - */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. + * Loads and stores. */ - old_ra = regs->gpr[ra]; + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (instr >> 20) & UPDATE; switch (opcode) { case 31: - u = instr & 0x40; + u = instr & UPDATE; + op->ea = xform_ea(instr, regs); switch ((instr >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; case 21: /* ldx */ case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 8); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC case 103: /* lvx */ case 359: /* lvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(LOAD_VMX, 0, 16); + break; case 231: /* stvx */ case 487: /* stvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; + + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + break; #endif #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; + + case 725: + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX case 844: /* lxvd2x */ case 876: /* lxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, u, 16); + break; case 972: /* stxvd2x */ case 1004: /* stxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, u, 16); + break; #endif /* CONFIG_VSX */ } @@ -1552,178 +1539,123 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(instr, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(instr, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } -#endif /* CONFIG_PPC32 */ - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(instr, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(instr, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(instr, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(instr, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(instr, regs); + break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; #endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; } break; case 62: /* std[u] */ - val = regs->gpr[rd]; + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; } break; #endif /* __powerpc64__ */ } - err = -EINVAL; - - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ - } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + return 0; logical_done: if (instr & 1) @@ -1733,5 +1665,349 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) arith_done: if (instr & 1) set_cr0(regs, rd); - goto instr_done; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; + +#ifdef CONFIG_PPC_FPU + fpunavail: + op->type = INTERRUPT | 0x800; + return 0; +#endif + +#ifdef CONFIG_ALTIVEC + vecunavail: + op->type = INTERRUPT | 0xf20; + return 0; +#endif + +#ifdef CONFIG_VSX + vsxunavail: + op->type = INTERRUPT | 0xf40; + return 0; +#endif +} +EXPORT_SYMBOL_GPL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + /* + * Check if we will touch kernel stack overflow + */ + if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); + return -EINVAL; + } +#endif /* CONFIG_PPC32 */ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static __kprobes void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static __kprobes void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, size; + unsigned long val; + unsigned int cr; + int i, rd, nb; + + r = analyse_instr(&op, regs, instr); + if (r != 0) + return r; + + err = 0; + size = GETSIZE(op.type); + switch (op.type & INSTR_TYPE_MASK) { + case CACHEOP: + if (!address_ok(regs, op.ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(op.ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(op.ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) op.ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) op.ea); + break; + case ICBI: + __cacheop_user_asmx(op.ea, err, "icbi"); + break; + } + if (err) + return 0; + goto instr_done; + + case LARX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __get_user_asmx(val, op.ea, err, "lwarx"); + break; + case 8: + __get_user_asmx(val, op.ea, err, "ldarx"); + break; + default: + return 0; + } + if (!err) + regs->gpr[op.reg] = val; + goto ldst_done; + + case STCX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + break; + case 8: + __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + break; + default: + return 0; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + goto ldst_done; + + case LOAD: + if (regs->msr & MSR_LE) + return 0; + err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + if (!err) { + if (op.type & SIGNEXT) + do_signext(®s->gpr[op.reg], size); + if (op.type & BYTEREV) + do_byterev(®s->gpr[op.reg], size); + } + goto ldst_done; + + case LOAD_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + else + err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + goto ldst_done; +#endif + case LOAD_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + nb = size - i; + if (nb > 4) + nb = 4; + err = read_mem(®s->gpr[rd], op.ea, nb, regs); + if (err) + return 0; + if (nb < 4) /* left-justify last bytes */ + regs->gpr[rd] <<= 32 - 8 * nb; + op.ea += 4; + ++rd; + } + goto instr_done; + + case STORE: + if (regs->msr & MSR_LE) + return 0; + if ((op.type & UPDATE) && size == sizeof(long) && + op.reg == 1 && op.update_reg == 1 && + !(regs->msr & MSR_PR) && + op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(op.ea, regs); + goto ldst_done; + } + err = write_mem(op.val, op.ea, size, regs); + goto ldst_done; + + case STORE_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + else + err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); + goto ldst_done; +#endif + case STORE_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + val = regs->gpr[rd]; + nb = size - i; + if (nb > 4) + nb = 4; + else + val >>= 32 - 8 * nb; + err = write_mem(val, op.ea, nb, regs); + if (err) + return 0; + op.ea += 4; + ++rd; + } + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs->msr = (regs->msr & ~op.val) | (val & op.val); + goto instr_done; + +#ifdef CONFIG_PPC64 + case SYSCALL: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + if (regs->gpr[0] == 0x1ebe && + cpu_has_feature(CPU_FTR_REAL_LE)) { + regs->msr ^= MSR_LE; + goto instr_done; + } + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + + case RFI: + return -1; +#endif + } + return 0; + + ldst_done: + if (err) + return 0; + if (op.type & UPDATE) + regs->gpr[op.update_reg] = op.ea; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 51230ee6a407..d0130fff20e5 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o -obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ - slb_low.o slb.o stab.o \ - $(hash64-y) +obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 7b6c10750179..d85e86aac7fb 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ #include <linux/export.h> #include <asm/tlbflush.h> +#include <asm/dma.h> #include "mmu_decl.h" diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..24b3f4949df4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,6 +33,7 @@ #include <linux/magic.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> +#include <linux/hugetlb.h> #include <asm/firmware.h> #include <asm/page.h> @@ -114,22 +115,37 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int do_sigbus(struct pt_regs *regs, unsigned long address) +static int do_sigbus(struct pt_regs *regs, unsigned long address, + unsigned int fault) { siginfo_t info; + unsigned int lsb = 0; up_read(¤t->mm->mmap_sem); - if (user_mode(regs)) { - current->thread.trap_nr = BUS_ADRERR; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGBUS); + + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + current->comm, current->pid, address); + info.si_code = BUS_MCEERR_AR; } - return MM_FAULT_ERR(SIGBUS); + + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; +#endif + info.si_addr_lsb = lsb; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) @@ -170,11 +186,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_RETURN; } - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); /* We don't understand the fault code, this is fatal */ BUG(); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cf1d325eae8b..afc0a8295f84 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -412,18 +412,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } -static void native_hugepage_invalidate(struct mm_struct *mm, +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize) + int psize, int ssize) { - int ssize = 0, i; - int lock_tlbie; + int i; struct hash_pte *hptep; int actual_psize = MMU_PAGE_16M; unsigned int max_hpte_count, valid; unsigned long flags, s_addr = addr; unsigned long hpte_v, want_v, shift; - unsigned long hidx, vpn = 0, vsid, hash, slot; + unsigned long hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -437,15 +437,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -465,22 +456,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + /* + * We need to do tlb invalidate for all the address, tlbie + * instruction compares entry_VA in tlb with the VA specified + * here + */ + tlbie(vpn, psize, actual_psize, ssize, 0); } - /* - * Since this is a hugepage, we just need a single tlbie. - * use the last vpn. - */ - lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); - - asm volatile("ptesync":::"memory"); - __tlbie(vpn, psize, actual_psize, ssize); - asm volatile("eieio; tlbsync; ptesync":::"memory"); - - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88fdd9d25077..dd73b63f9479 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -92,6 +92,7 @@ extern unsigned long dart_tablebase; static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); struct hash_pte *htab_address; unsigned long htab_size_bytes; @@ -243,7 +244,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, } #ifdef CONFIG_MEMORY_HOTPLUG -static int htab_remove_mapping(unsigned long vstart, unsigned long vend, +int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { unsigned long vaddr; @@ -333,70 +334,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); - if (prop != NULL) { - pr_info("Page sizes from device-tree:\n"); - size /= 4; - cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); - while(size > 0) { - unsigned int base_shift = be32_to_cpu(prop[0]); - unsigned int slbenc = be32_to_cpu(prop[1]); - unsigned int lpnum = be32_to_cpu(prop[2]); - struct mmu_psize_def *def; - int idx, base_idx; - - size -= 3; prop += 3; - base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { - /* - * skip the pte encoding also - */ - prop += lpnum * 2; size -= lpnum * 2; + if (!prop) + return 0; + + pr_info("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); + while(size > 0) { + unsigned int base_shift = be32_to_cpu(prop[0]); + unsigned int slbenc = be32_to_cpu(prop[1]); + unsigned int lpnum = be32_to_cpu(prop[2]); + struct mmu_psize_def *def; + int idx, base_idx; + + size -= 3; prop += 3; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* skip the pte encoding also */ + prop += lpnum * 2; size -= lpnum * 2; + continue; + } + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + + def->shift = base_shift; + if (base_shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (base_shift - 23)) - 1; + def->sllp = slbenc; + /* + * We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + while (size > 0 && lpnum) { + unsigned int shift = be32_to_cpu(prop[0]); + int penc = be32_to_cpu(prop[1]); + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) continue; - } - def = &mmu_psize_defs[base_idx]; - if (base_idx == MMU_PAGE_16M) - cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; - - def->shift = base_shift; - if (base_shift <= 23) - def->avpnm = 0; - else - def->avpnm = (1 << (base_shift - 23)) - 1; - def->sllp = slbenc; - /* - * We don't know for sure what's up with tlbiel, so - * for now we only set it for 4K and 64K pages - */ - if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) - def->tlbiel = 1; - else - def->tlbiel = 0; - - while (size > 0 && lpnum) { - unsigned int shift = be32_to_cpu(prop[0]); - int penc = be32_to_cpu(prop[1]); - - prop += 2; size -= 2; - lpnum--; - - idx = get_idx_from_shift(shift); - if (idx < 0) - continue; - - if (penc == -1) - pr_err("Invalid penc for base_shift=%d " - "shift=%d\n", base_shift, shift); - - def->penc[idx] = penc; - pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," - " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", - base_shift, shift, def->sllp, - def->avpnm, def->tlbiel, def->penc[idx]); - } + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); } - return 1; } - return 0; + + return 1; } #ifdef CONFIG_HUGETLB_PAGE @@ -821,21 +821,14 @@ static void __init htab_initialize(void) void __init early_init_mmu(void) { - /* Setup initial STAB address in the PACA */ - get_paca()->stab_real = __pa((u64)&initial_stab); - get_paca()->stab_addr = (u64)&initial_stab; - /* Initialize the MMU Hash table and create the linear mapping - * of memory. Has to be done before stab/slb initialization as - * this is currently where the page size encoding is obtained + * of memory. Has to be done before SLB initialization as this is + * currently where the page size encoding is obtained. */ htab_initialize(); - /* Initialize stab / SLB management */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_real); + /* Initialize SLB management */ + slb_initialize(); } #ifdef CONFIG_SMP @@ -845,13 +838,8 @@ void early_init_mmu_secondary(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) mtspr(SPRN_SDR1, _SDR1); - /* Initialize STAB/SLB. We use a virtual address as it works - * in real mode on pSeries. - */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_addr); + /* Initialize SLB */ + slb_initialize(); } #endif /* CONFIG_SMP */ @@ -879,7 +867,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -unsigned int get_paca_psize(unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 826893fcb3a7..5f5e6328c21c 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,6 +18,57 @@ #include <linux/mm.h> #include <asm/machdep.h> +static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + + int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * atomically mark the linux large page PMD busy and dirty */ do { - old_pmd = pmd_val(*pmdp); + pmd_t pmd = ACCESS_ONCE(*pmdp); + + old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; @@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); + if (psize == MMU_PAGE_4K) { + /* + * invalidate the old hpte entry if we have that mapped via 64K + * base page size. This is because demote_segment won't flush + * hash page table entries. + */ + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + } valid = hpte_valid(hpte_slot_array, index); if (valid) { @@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * safely update this here. */ valid = 0; - new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; - } else - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } } if (!valid) { @@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pmd |= _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | @@ -132,6 +187,8 @@ repeat: * enable the memory coherence always */ rflags |= HPTE_R_M; +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -172,8 +229,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * No need to use ldarx/stdcx here + * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * base page size 4k. + */ + if (psize == MMU_PAGE_4K) + new_pmd |= _PAGE_COMBO; + /* + * The hpte valid is stored in the pgtable whose address is in the + * second half of the PMD. Order this against clearing of the busy bit in + * huge pmd. */ + smp_wmb(); *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec23..cad68ff8eca5 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM; void MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { + if (strstr(boot_command_line, "nobats")) { __map_without_bats = 1; } - if (strstr(cmd_line, "noltlbs")) { + if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e3734edffa69..3481556a1880 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page) static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; + start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) - if (pfn_valid(vmemmap_section_start(start))) + if (pfn_valid(page_to_pfn((struct page *)start))) return 1; return 0; @@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start, for (i = 0; i < page_size; i += PAGE_SIZE) BUG_ON(map_kernel_page(start + i, phys, flags)); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ +} +#endif #else /* CONFIG_PPC_BOOK3E */ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, @@ -223,17 +231,39 @@ static void __meminit vmemmap_create_mapping(unsigned long start, mmu_kernel_ssize); BUG_ON(mapped < 0); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ + int mapped = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON(mapped < 0); +} +#endif + #endif /* CONFIG_PPC_BOOK3E */ struct vmemmap_backing *vmemmap_list; +static struct vmemmap_backing *next; +static int num_left; +static int num_freed; static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) { - static struct vmemmap_backing *next; - static int num_left; + struct vmemmap_backing *vmem_back; + /* get from freed entries first */ + if (num_freed) { + num_freed--; + vmem_back = next; + next = next->list; + + return vmem_back; + } /* allocate a page when required and hand out chunks */ - if (!next || !num_left) { + if (!num_left) { next = vmemmap_alloc_block(PAGE_SIZE, node); if (unlikely(!next)) { WARN_ON(1); @@ -296,10 +326,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long vmemmap_list_free(unsigned long start) { + struct vmemmap_backing *vmem_back, *vmem_back_prev; + + vmem_back_prev = vmem_back = vmemmap_list; + + /* look for it with prev pointer recorded */ + for (; vmem_back; vmem_back = vmem_back->list) { + if (vmem_back->virt_addr == start) + break; + vmem_back_prev = vmem_back; + } + + if (unlikely(!vmem_back)) { + WARN_ON(1); + return 0; + } + + /* remove it from vmemmap_list */ + if (vmem_back == vmemmap_list) /* remove head */ + vmemmap_list = vmem_back->list; + else + vmem_back_prev->list = vmem_back->list; + + /* next point to this freed entry */ + vmem_back->list = next; + next = vmem_back; + num_freed++; + + return vmem_back->phys; } +void __ref vmemmap_free(unsigned long start, unsigned long end) +{ + unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; + + start = _ALIGN_DOWN(start, page_size); + + pr_debug("vmemmap_free %lx...%lx\n", start, end); + + for (; start < end; start += page_size) { + unsigned long addr; + + /* + * the section has already be marked as invalid, so + * vmemmap_populated() true means some other sections still + * in this page, so skip it. + */ + if (vmemmap_populated(start, page_size)) + continue; + + addr = vmemmap_list_free(start); + if (addr) { + struct page *page = pfn_to_page(addr >> PAGE_SHIFT); + + if (PageReserved(page)) { + /* allocated from bootmem */ + if (page_size < PAGE_SIZE) { + /* + * this shouldn't happen, but if it is + * the case, leave the memory there + */ + WARN_ON_ONCE(1); + } else { + unsigned int nr_pages = + 1 << get_order(page_size); + while (nr_pages--) + free_reserved_page(page++); + } + } else + free_pages((unsigned long)(__va(addr)), + get_order(page_size)); + + vmemmap_remove_mapping(start, page_size); + } + } +} +#endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) { @@ -331,16 +436,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn) if (pg_va < vmem_back->virt_addr) continue; - /* Check that page struct is not split between real pages */ - if ((pg_va + sizeof(struct page)) > - (vmem_back->virt_addr + page_size)) - return NULL; - - page = (struct page *) (vmem_back->phys + pg_va - + /* After vmemmap_list entry free is possible, need check all */ + if ((pg_va + sizeof(struct page)) <= + (vmem_back->virt_addr + page_size)) { + page = (struct page *) (vmem_back->phys + pg_va - vmem_back->virt_addr); - return page; + return page; + } } + /* Probably that page struct is split between real pages */ return NULL; } EXPORT_SYMBOL_GPL(realmode_pfn_to_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 420dfff22ba5..8ebaac75c940 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size) return -EINVAL; /* this should work for most non-highmem platforms */ - zone = pgdata->node_zones; + zone = pgdata->node_zones + + zone_for_memory(nid, start, size, 0); return __add_pages(nid, zone, start_pfn, nr_pages); } diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 78fef6726e10..aa5a7fd89461 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index af3d78e19302..928ebe79668b 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -410,17 +410,7 @@ void __init mmu_context_init(void) } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; - } else -#ifdef CONFIG_PPC_BOOK3E_MMU - if (mmu_has_feature(MMU_FTR_TYPE_3E)) { - u32 mmucfg = mfspr(SPRN_MMUCFG); - u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK) - >> MMUCFG_PIDSIZE_SHIFT; - first_context = 1; - last_context = (1UL << (pid_bits + 1)) - 1; - } else -#endif - { + } else { first_context = 1; last_context = 255; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5eb07f332de1..649666d5d1c2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -538,7 +538,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid; + int nid = -1; struct device_node *cpu; /* @@ -555,19 +555,21 @@ static int numa_setup_cpu(unsigned long lcpu) if (!cpu) { WARN_ON(1); - nid = 0; - goto out; + if (cpu_present(lcpu)) + goto out_present; + else + goto out; } nid = of_node_to_nid_single(cpu); +out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; -out: - map_cpu_to_node(lcpu, nid); + map_cpu_to_node(lcpu, nid); of_node_put(cpu); - +out: return nid; } @@ -611,8 +613,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; @@ -1049,7 +1051,7 @@ static void __init mark_reserved_regions_for_nid(int nid) void __init do_init_bootmem(void) { - int nid; + int nid, cpu; min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; @@ -1122,8 +1124,14 @@ void __init do_init_bootmem(void) reset_numa_cpu_lookup_table(); register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); + /* + * We need the numa_cpu_lookup_table to be accurate for all CPUs, + * even before we online them, so that we can use cpu_to_{node,mem} + * early in boot, cf. smp_prepare_cpus(). + */ + for_each_present_cpu(cpu) { + numa_setup_cpu((unsigned long)cpu); + } } static int __init early_numa(char *p) diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513c..c90e602677c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) (_PAGE_PRESENT | _PAGE_USER); } -struct page * maybe_pte_to_page(pte_t pte) +static struct page *maybe_pte_to_page(pte_t pte) { unsigned long pfn = pte_pfn(pte); struct page *page; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 343a87fa78b5..cf11342bf519 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -41,7 +41,7 @@ unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#ifdef CONFIG_6xx #define HAVE_BATS 1 #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f6ce1f111f5b..c8d709ab489d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -54,6 +54,9 @@ #include "mmu_decl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/thp.h> + /* Some sanity checking */ #if TASK_SIZE_USER64 > PGTABLE_RANGE #error TASK_SIZE_USER64 exceeds pagetable range @@ -68,7 +71,7 @@ unsigned long ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PPC_MMU_NOHASH -static void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; @@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, old = pmd_val(*pmdp); *pmdp = __pmd((old & ~clr) | set); #endif + trace_hugepage_update(addr, old, clr, set); if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(mm, addr, pmdp); + hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, * If we didn't had the splitting flag set, go and flush the * HPTE entries. */ + trace_hugepage_splitting(address, old); if (!(old & _PAGE_SPLITTING)) { /* We need to flush the hpte */ if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); } /* * This ensures that generic code that rely on IRQ disabling @@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif + trace_hugepage_set_pmd(addr, pmd); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } @@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * neesd to be flushed. */ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) + pmd_t *pmdp, unsigned long old_pmd) { int ssize, i; unsigned long s_addr; @@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!hpte_slot_array) return; - /* get the base page size */ + /* get the base page size,vsid and segment size */ +#ifdef CONFIG_DEBUG_VM psize = get_slice_psize(mm, s_addr); + BUG_ON(psize == MMU_PAGE_16M); +#endif + if (old_pmd & _PAGE_COMBO) + psize = MMU_PAGE_4K; + else + psize = MMU_PAGE_64K; + + if (!is_kernel_addr(s_addr)) { + ssize = user_segment_size(s_addr); + vsid = get_vsid(mm->context.id, s_addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(mm, hpte_slot_array, - s_addr, psize); + return ppc_md.hugepage_invalidate(vsid, s_addr, + hpte_slot_array, + psize, ssize); /* * No bluk hpte removal support, invalidate each entry */ @@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 11571e118831..5029dc19b517 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15efc..86f6a755af0b 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -30,9 +30,11 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/hugetlb.h> #include <asm/mman.h> #include <asm/mmu.h> #include <asm/spu.h> +#include <asm/hugetlb.h> /* some sanity checks */ #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c deleted file mode 100644 index 3f8efa6f2997..000000000000 --- a/arch/powerpc/mm/stab.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * PowerPC64 Segment Translation Support. - * - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * - * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/memblock.h> - -#include <asm/pgtable.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/paca.h> -#include <asm/cputable.h> -#include <asm/prom.h> - -struct stab_entry { - unsigned long esid_data; - unsigned long vsid_data; -}; - -#define NR_STAB_CACHE_ENTRIES 8 -static DEFINE_PER_CPU(long, stab_cache_ptr); -static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); - -/* - * Create a segment table entry for the given esid/vsid pair. - */ -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) -{ - unsigned long esid_data, vsid_data; - unsigned long entry, group, old_esid, castout_entry, i; - unsigned int global_entry; - struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; - - vsid_data = vsid << STE_VSID_SHIFT; - esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; - if (! kernel_segment) - esid_data |= STE_ESID_KS; - - /* Search the primary group first. */ - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - - /* Find an empty entry, if one exists. */ - for (group = 0; group < 2; group++) { - for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->esid_data & STE_ESID_V)) { - ste->vsid_data = vsid_data; - eieio(); - ste->esid_data = esid_data; - return (global_entry | entry); - } - } - /* Now search the secondary group. */ - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - } - - /* - * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. - */ - castout_entry = get_paca()->stab_rr; - for (i = 0; i < 16; i++) { - if (castout_entry < 8) { - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - castout_ste = ste + castout_entry; - } else { - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - castout_ste = ste + (castout_entry - 8); - } - - /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) - break; - - castout_entry = (castout_entry + 1) & 0xf; - } - - get_paca()->stab_rr = (castout_entry + 1) & 0xf; - - /* Modify the old entry to the new value. */ - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - old_esid = castout_ste->esid_data >> SID_SHIFT; - castout_ste->esid_data = 0; /* Invalidate old entry */ - - asm volatile("sync" : : : "memory"); /* Order update */ - - castout_ste->vsid_data = vsid_data; - eieio(); /* Order update */ - castout_ste->esid_data = esid_data; - - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); - /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory"); - - return (global_entry | (castout_entry & 0x7)); -} - -/* - * Allocate a segment table entry for the given ea and mm - */ -static int __ste_allocate(unsigned long ea, struct mm_struct *mm) -{ - unsigned long vsid; - unsigned char stab_entry; - unsigned long offset; - - /* Kernel or user address? */ - if (is_kernel_addr(ea)) { - vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); - } else { - if ((ea >= TASK_SIZE_USER64) || (! mm)) - return 1; - - vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M); - } - - stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - - if (!is_kernel_addr(ea)) { - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; - - /* Order update */ - asm volatile("sync":::"memory"); - } - - return 0; -} - -int ste_allocate(unsigned long ea) -{ - return __ste_allocate(ea, current->mm); -} - -/* - * Do the segment table work for a context switch: flush all user - * entries from the table, then preload some probably useful entries - * for the new task - */ -void switch_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; - struct stab_entry *ste; - unsigned long offset; - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - /* - * We need interrupts hard-disabled here, not just soft-disabled, - * so that a PMU interrupt can't occur, which might try to access - * user memory (to get a stack trace) and possible cause an STAB miss - * which would update the stab_cache/stab_cache_ptr per-cpu variables. - */ - hard_irq_disable(); - - offset = __get_cpu_var(stab_cache_ptr); - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - - for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); - ste->esid_data = 0; /* invalidate entry */ - } - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); - entry++, ste++) { - unsigned long ea; - ea = ste->esid_data & ESID_MASK; - if (!is_kernel_addr(ea)) { - ste->esid_data = 0; - } - } - } - - asm volatile("sync; slbia; sync":::"memory"); - - __get_cpu_var(stab_cache_ptr) = 0; - - /* Now preload some entries for the new task */ - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - __ste_allocate(pc, mm); - - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - __ste_allocate(stack, mm); - - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - __ste_allocate(unmapped_base, mm); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* - * Allocate segment tables for secondary CPUs. These must all go in - * the first (bolted) segment, so that do_stab_bolted won't get a - * recursive segment miss on the segment table itself. - */ -void __init stabs_alloc(void) -{ - int cpu; - - if (mmu_has_feature(MMU_FTR_SLB)) - return; - - for_each_possible_cpu(cpu) { - unsigned long newstab; - - if (cpu == 0) - continue; /* stab for CPU 0 is statically allocated */ - - newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, - 1<<SID_SHIFT); - newstab = (unsigned long)__va(newstab); - - memset((void *)newstab, 0, HW_PAGE_SIZE); - - paca[cpu].stab_addr = newstab; - paca[cpu].stab_real = __pa(newstab); - printk(KERN_INFO "Segment table for CPU %d at 0x%llx " - "virtual, 0x%llx absolute\n", - cpu, paca[cpu].stab_addr, paca[cpu].stab_real); - } -} - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M); - unsigned long stabreal; - - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - - /* Set ASR */ - stabreal = get_paca()->stab_real | 0x1ul; - - mtspr(SPRN_ASR, stabreal); -} diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c99f6510a0b2..d2a94b85dbc2 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -30,6 +30,8 @@ #include <asm/tlb.h> #include <asm/bug.h> +#include <trace/events/thp.h> + DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); /* @@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, if (ptep == NULL) continue; pte = pte_val(*ptep); + if (hugepage_shift) + trace_hugepage_invalidate(start, pte_val(pte)); if (!(pte & _PAGE_HASHPTE)) continue; if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) - hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else hpte_need_flush(mm, start, ptep, pte, 0); } diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 92cb18d52ea8..f38ea4df6a85 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -581,42 +581,10 @@ static void setup_mmu_htw(void) /* * Early initialization of the MMU TLB code */ -static void __early_init_mmu(int boot_cpu) +static void early_init_this_mmu(void) { unsigned int mas4; - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - - /* XXX This should be decided at runtime based on supported - * page sizes in the TLB, but for now let's assume 16M is - * always there and a good fit (which it probably is) - * - * Freescale booke only supports 4K pages in TLB0, so use that. - */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - mmu_vmemmap_psize = MMU_PAGE_4K; - else - mmu_vmemmap_psize = MMU_PAGE_16M; - - /* XXX This code only checks for TLB 0 capabilities and doesn't - * check what page size combos are supported by the HW. It - * also doesn't handle the case where a separate array holds - * the IND entries from the array loaded by the PT. - */ - if (boot_cpu) { - /* Look for supported page sizes */ - setup_page_sizes(); - - /* Look for HW tablewalk support */ - setup_mmu_htw(); - } - /* Set MAS4 based on page table setting */ mas4 = 0x4 << MAS4_WIMGED_SHIFT; @@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu) } mtspr(SPRN_MAS4, mas4); - /* Set the global containing the top of the linear mapping - * for use by the TLB miss code - */ - linear_map_top = memblock_end_of_DRAM(); - #ifdef CONFIG_PPC_FSL_BOOK3E if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; @@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu) /* use a quarter of the TLBCAM for bolted linear map */ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_map_top = map_mem_in_cams(linear_map_top, num_cams); + } +#endif - /* limit memory so we dont have linear faults */ - memblock_enforce_memory_limit(linear_map_top); + /* A sync won't hurt us after mucking around with + * the MMU configuration + */ + mb(); +} +static void __init early_init_mmu_global(void) +{ + /* XXX This will have to be decided at runtime, but right + * now our boot and TLB miss code hard wires it. Ideally + * we should find out a suitable page size and patch the + * TLB miss code (either that or use the PACA to store + * the value we want) + */ + mmu_linear_psize = MMU_PAGE_1G; + + /* XXX This should be decided at runtime based on supported + * page sizes in the TLB, but for now let's assume 16M is + * always there and a good fit (which it probably is) + * + * Freescale booke only supports 4K pages in TLB0, so use that. + */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + mmu_vmemmap_psize = MMU_PAGE_4K; + else + mmu_vmemmap_psize = MMU_PAGE_16M; + + /* XXX This code only checks for TLB 0 capabilities and doesn't + * check what page size combos are supported by the HW. It + * also doesn't handle the case where a separate array holds + * the IND entries from the array loaded by the PT. + */ + /* Look for supported page sizes */ + setup_page_sizes(); + + /* Look for HW tablewalk support */ + setup_mmu_htw(); + +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { if (book3e_htw_mode == PPC_HTW_NONE) { extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); @@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu) } #endif - /* A sync won't hurt us after mucking around with - * the MMU configuration + /* Set the global containing the top of the linear mapping + * for use by the TLB miss code */ - mb(); + linear_map_top = memblock_end_of_DRAM(); +} + +static void __init early_mmu_set_memory_limit(void) +{ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + /* + * Limit memory so we dont have linear faults. + * Unlike memblock_set_current_limit, which limits + * memory available during early boot, this permanently + * reduces the memory available to Linux. We need to + * do this because highmem is not supported on 64-bit. + */ + memblock_enforce_memory_limit(linear_map_top); + } +#endif memblock_set_current_limit(linear_map_top); } +/* boot cpu only */ void __init early_init_mmu(void) { - __early_init_mmu(1); + early_init_mmu_global(); + early_init_this_mmu(); + early_mmu_set_memory_limit(); } void early_init_mmu_secondary(void) { - __early_init_mmu(0); + early_init_this_mmu(); } void setup_initial_memory_limit(phys_addr_t first_memblock_base, diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6dcdadefd8d0..3afa6f4c1957 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, +static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx) { int i; @@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) /* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, +static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, unsigned int *addrs) { @@ -390,12 +390,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - PPC_ANDI(r_A, r_A, VLAN_VID_MASK); - else + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT); + } else { PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); + PPC_SRWI(r_A, r_A, 12); + } break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, @@ -565,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, return 0; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int proglen; unsigned int alloclen; @@ -689,7 +693,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 751ec7bd5018..cedbbeced632 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -14,6 +14,6 @@ oprofile-y := $(DRIVER_OBJS) common.o backtrace.o oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o \ cell/spu_task_sync.o -oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o +oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_power4.o op_model_pa6t.o oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o oprofile-$(CONFIG_6xx) += op_model_7450.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index f75301f2c85f..6adf55fa5d88 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include <asm/processor.h> #include <asm/uaccess.h> #include <asm/compat.h> +#include <asm/oprofile_impl.h> #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index c77348c5d463..bf094c5a4bd9 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -205,9 +205,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->sync_stop = model->sync_stop; break; #endif - case PPC_OPROFILE_RS64: - model = &op_model_rs64; - break; case PPC_OPROFILE_POWER4: model = &op_model_power4; break; diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c deleted file mode 100644 index 7e5b8ed3a1b7..000000000000 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/oprofile.h> -#include <linux/smp.h> -#include <asm/ptrace.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/oprofile_impl.h> - -#define dbg(args...) - -static void ctrl_write(unsigned int i, unsigned int val) -{ - unsigned int tmp = 0; - unsigned long shift = 0, mask = 0; - - dbg("ctrl_write %d %x\n", i, val); - - switch(i) { - case 0: - tmp = mfspr(SPRN_MMCR0); - shift = 6; - mask = 0x7F; - break; - case 1: - tmp = mfspr(SPRN_MMCR0); - shift = 0; - mask = 0x3F; - break; - case 2: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 4; - mask = 0x1F; - break; - case 3: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 9; - mask = 0x1F; - break; - case 4: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 14; - mask = 0x1F; - break; - case 5: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 19; - mask = 0x1F; - break; - case 6: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 24; - mask = 0x1F; - break; - case 7: - tmp = mfspr(SPRN_MMCR1); - shift = 31 - 28; - mask = 0xF; - break; - } - - tmp = tmp & ~(mask << shift); - tmp |= val << shift; - - switch(i) { - case 0: - case 1: - mtspr(SPRN_MMCR0, tmp); - break; - default: - mtspr(SPRN_MMCR1, tmp); - } - - dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0), - mfspr(SPRN_MMCR1)); -} - -static unsigned long reset_value[OP_MAX_COUNTER]; - -static int num_counters; - -static int rs64_reg_setup(struct op_counter_config *ctr, - struct op_system_config *sys, - int num_ctrs) -{ - int i; - - num_counters = num_ctrs; - - for (i = 0; i < num_counters; ++i) - reset_value[i] = 0x80000000UL - ctr[i].count; - - /* XXX setup user and kernel profiling */ - return 0; -} - -static int rs64_cpu_setup(struct op_counter_config *ctr) -{ - unsigned int mmcr0; - - /* reset MMCR0 and set the freeze bit */ - mmcr0 = MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - /* reset MMCR1, MMCRA */ - mtspr(SPRN_MMCR1, 0); - - if (cpu_has_feature(CPU_FTR_MMCRA)) - mtspr(SPRN_MMCRA, 0); - - mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; - /* Only applies to POWER3, but should be safe on RS64 */ - mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR0)); - dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), - mfspr(SPRN_MMCR1)); - - return 0; -} - -static int rs64_start(struct op_counter_config *ctr) -{ - int i; - unsigned int mmcr0; - - /* set the PMM bit (see comment below) */ - mtmsrd(mfmsr() | MSR_PMM); - - for (i = 0; i < num_counters; ++i) { - if (ctr[i].enabled) { - classic_ctr_write(i, reset_value[i]); - ctrl_write(i, ctr[i].event); - } else { - classic_ctr_write(i, 0); - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this excetion, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - return 0; -} - -static void rs64_stop(void) -{ - unsigned int mmcr0; - - /* freeze counters */ - mmcr0 = mfspr(SPRN_MMCR0); - mmcr0 |= MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); - - dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); - - mb(); -} - -static void rs64_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) -{ - unsigned int mmcr0; - int is_kernel; - int val; - int i; - unsigned long pc = mfspr(SPRN_SIAR); - - is_kernel = is_kernel_addr(pc); - - /* set the PMM bit (see comment below) */ - mtmsrd(mfmsr() | MSR_PMM); - - for (i = 0; i < num_counters; ++i) { - val = classic_ctr_read(i); - if (val < 0) { - if (ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); - classic_ctr_write(i, reset_value[i]); - } else { - classic_ctr_write(i, 0); - } - } - } - - mmcr0 = mfspr(SPRN_MMCR0); - - /* reset the perfmon trigger */ - mmcr0 |= MMCR0_PMXE; - - /* - * now clear the freeze bit, counting will not start until we - * rfid from this exception, because only at that point will - * the PMM bit be cleared - */ - mmcr0 &= ~MMCR0_FC; - mtspr(SPRN_MMCR0, mmcr0); -} - -struct op_powerpc_model op_model_rs64 = { - .reg_setup = rs64_reg_setup, - .cpu_setup = rs64_cpu_setup, - .start = rs64_start, - .stop = rs64_stop, - .handle_interrupt = rs64_handle_interrupt, -}; diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 74d1e780748b..2396dda282cd 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; /* must be 16-byte aligned */ if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return 0; - if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + if (sp >= prev_sp + STACK_FRAME_MIN_SIZE) return 1; /* * sp could decrease when we jump off an interrupt stack diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4520c9356b54..a6995d4e93d4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -36,7 +36,12 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - unsigned long mmcr[3]; + /* + * The order of the MMCR array is: + * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 + * - 32-bit, MMCR0, MMCR1, MMCR2 + */ + unsigned long mmcr[4]; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -54,9 +59,9 @@ struct cpu_hw_events { struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; }; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -struct power_pmu *ppmu; +static struct power_pmu *ppmu; /* * Normally, to ignore kernel events we set the FCS (freeze counters @@ -112,14 +117,14 @@ static bool is_ebb_event(struct perf_event *event) { return false; } static int ebb_event_check(struct perf_event *event) { return 0; } static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } -static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return mmcr0; + return cpuhw->mmcr[0]; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -void power_pmu_flush_branch_stack(void) {} +static void power_pmu_flush_branch_stack(void) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -370,7 +375,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -void power_pmu_flush_branch_stack(void) +static void power_pmu_flush_branch_stack(void) { if (ppmu->bhrb_nr) power_pmu_bhrb_reset(); @@ -403,7 +408,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -485,7 +490,7 @@ static bool is_ebb_event(struct perf_event *event) * check that the PMU supports EBB, meaning those that don't can still * use bit 63 of the event code for something else if they wish. */ - return (ppmu->flags & PPMU_EBB) && + return (ppmu->flags & PPMU_ARCH_207S) && ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1); } @@ -542,8 +547,10 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; } -static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) +static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { + unsigned long mmcr0 = cpuhw->mmcr[0]; + if (!ebb) goto out; @@ -568,7 +575,15 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) mtspr(SPRN_SIAR, current->thread.siar); mtspr(SPRN_SIER, current->thread.sier); mtspr(SPRN_SDAR, current->thread.sdar); - mtspr(SPRN_MMCR2, current->thread.mmcr2); + + /* + * Merge the kernel & user values of MMCR2. The semantics we implement + * are that the user MMCR2 can set bits, ie. cause counters to freeze, + * but not clear bits. If a task wants to be able to clear bits, ie. + * unfreeze counters, it should not set exclude_xxx in its events and + * instead manage the MMCR2 entirely by itself. + */ + mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); out: return mmcr0; } @@ -777,7 +792,7 @@ void perf_event_print_debug(void) if (ppmu->flags & PPMU_HAS_SIER) sier = mfspr(SPRN_SIER); - if (ppmu->flags & PPMU_EBB) { + if (ppmu->flags & PPMU_ARCH_207S) { pr_info("MMCR2: %016lx EBBHR: %016lx\n", mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR)); pr_info("EBBRR: %016lx BESCR: %016lx\n", @@ -915,6 +930,14 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], int i, n, first; struct perf_event *event; + /* + * If the PMU we're on supports per event exclude settings then we + * don't need to do any of this logic. NB. This assumes no PMU has both + * per event exclude and limited PMCs. + */ + if (ppmu->flags & PPMU_ARCH_207S) + return 0; + n = n_prev + n_new; if (n <= 1) return 0; @@ -996,7 +1019,22 @@ static void power_pmu_read(struct perf_event *event) } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); local64_add(delta, &event->count); - local64_sub(delta, &event->hw.period_left); + + /* + * A number of places program the PMC with (0x80000000 - period_left). + * We never want period_left to be less than 1 because we will program + * the PMC with a value >= 0x800000000 and an edge detected PMC will + * roll around to 0 before taking an exception. We have seen this + * on POWER8. + * + * To fix this, clamp the minimum value of period_left to 1. + */ + do { + prev = local64_read(&event->hw.period_left); + val = prev - delta; + if (val < 1) + val = 1; + } while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev); } /* @@ -1204,28 +1242,31 @@ static void power_pmu_enable(struct pmu *pmu) } /* - * Compute MMCR* values for the new set of events + * Clear all MMCR settings and recompute them for the new set of events. */ + memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr)) { + cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; } - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first event. - * We have already checked that all events have the - * same values for these bits as the first event. - */ - event = cpuhw->event[0]; - if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; - if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + if (!(ppmu->flags & PPMU_ARCH_207S)) { + /* + * Add in MMCR0 freeze bits corresponding to the attr.exclude_* + * bits for the first event. We have already checked that all + * events have the same value for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + } /* * Write the new configuration to MMCR* with the freeze @@ -1237,6 +1278,8 @@ static void power_pmu_enable(struct pmu *pmu) mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); /* * Read off any pre-existing events that need to move @@ -1292,7 +1335,7 @@ static void power_pmu_enable(struct pmu *pmu) out_enable: pmao_restore_workaround(ebb); - mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); + mmcr0 = ebb_switch_in(ebb, cpuhw); mb(); if (cpuhw->bhrb_users) @@ -1530,7 +1573,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1544,7 +1587,7 @@ void power_pmu_start_txn(struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(struct pmu *pmu) +static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); @@ -1557,7 +1600,7 @@ void power_pmu_cancel_txn(struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(struct pmu *pmu) +static int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -1696,7 +1739,7 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { /* PMU has BHRB enabled */ - if (!(ppmu->flags & PPMU_BHRB)) + if (!(ppmu->flags & PPMU_ARCH_207S)) return -EOPNOTSUPP; } @@ -1845,7 +1888,7 @@ ssize_t power_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%02llx\n", pmu_attr->id); } -struct pmu power_pmu = { +static struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, .event_init = power_pmu_event_init, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index e0766b82e165..70d4f748b54b 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -223,7 +223,7 @@ e_free: pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" " rc=%ld\n", catalog_version_num, page_offset, hret); - kfree(page); + kmem_cache_free(hv_page_cache, page); pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", offset, page_offset, count, page_count, catalog_len, @@ -387,8 +387,7 @@ static int h_24x7_event_init(struct perf_event *event) event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || - event->attr.exclude_guest || - is_sampling_event(event)) /* no sampling */ + event->attr.exclude_guest) return -EINVAL; /* no branch sampling */ @@ -513,6 +512,9 @@ static int hv_24x7_init(void) if (!hv_page_cache) return -ENOMEM; + /* sampling not supported */ + h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c9d399a2df82..15fc76c93022 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -210,8 +210,7 @@ static int h_gpci_event_init(struct perf_event *event) event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || - event->attr.exclude_guest || - is_sampling_event(event)) /* no sampling */ + event->attr.exclude_guest) return -EINVAL; /* no branch sampling */ @@ -284,6 +283,9 @@ static int hv_gpci_init(void) return -ENODEV; } + /* sampling not supported */ + h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index fe21b515ca44..d115c5635bf3 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -260,8 +260,9 @@ static const u32 pmcsel_mask[N_COUNTER] = { /* * Compute MMCR0/1/2 values for a set of events. */ -static int mpc7450_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) +static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], + unsigned long mmcr[], + struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; int n_classevent[N_CLASSES]; diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c index 9103a1de864d..ce6072fa481b 100644 --- a/arch/powerpc/perf/power4-pmu.c +++ b/arch/powerpc/perf/power4-pmu.c @@ -356,7 +356,7 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p4_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel, lower; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index b03b6dc0172d..0526dac66007 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -452,7 +452,7 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 1e8ce423c3af..4dc99f9f7962 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -383,7 +383,7 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 31128e086fed..9c9d646b68a1 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -175,7 +175,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 56c67bca2f75..5b62f2389290 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -245,7 +245,7 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index fe2763b6e039..396351db601b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> #include <asm/firmware.h> +#include <asm/cputable.h> /* @@ -266,6 +267,11 @@ #define MMCRA_SDAR_MODE_TLB (1ull << 42) #define MMCRA_IFM_SHIFT 30 +/* Bits in MMCR2 for POWER8 */ +#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) +#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) +#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) + static inline bool event_is_fab_match(u64 event) { @@ -393,9 +399,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long } static int power8_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]) { - unsigned long mmcra, mmcr1, unit, combine, psel, cache, val; + unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; unsigned int pmc, pmc_inuse; int i; @@ -410,7 +417,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, /* In continous sampling mode, update SDAR on TLB miss */ mmcra = MMCRA_SDAR_MODE_TLB; - mmcr1 = 0; + mmcr1 = mmcr2 = 0; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -472,6 +479,19 @@ static int power8_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + if (pevents[i]->attr.exclude_user) + mmcr2 |= MMCR2_FCP(pmc); + + if (pevents[i]->attr.exclude_hv) + mmcr2 |= MMCR2_FCH(pmc); + + if (pevents[i]->attr.exclude_kernel) { + if (cpu_has_feature(CPU_FTR_HVMODE)) + mmcr2 |= MMCR2_FCH(pmc); + else + mmcr2 |= MMCR2_FCS(pmc); + } + hwc[i] = pmc - 1; } @@ -491,6 +511,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev, mmcr[1] = mmcr1; mmcr[2] = mmcra; + mmcr[3] = mmcr2; return 0; } @@ -792,7 +813,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, - .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, + .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, .cache_events = &power8_cache_events, diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 20139ceeacf6..8b6a8a36fa38 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -257,7 +257,7 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]) + unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c index b0389bbe4f94..ddc12a1926ef 100644 --- a/arch/powerpc/platforms/40x/ep405.c +++ b/arch/powerpc/platforms/40x/ep405.c @@ -49,7 +49,7 @@ static void __iomem *bcsr_regs; /* there's more, can't be bothered typing them tho */ -static __initdata struct of_device_id ep405_of_bus[] = { +static const struct of_device_id ep405_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c index 8f3920e5a046..b0c46375dd95 100644 --- a/arch/powerpc/platforms/40x/ppc40x_simple.c +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc40x_of_bus[] = { +static const struct of_device_id ppc40x_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index d0fc6866b00c..9aa7ae2f4164 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -17,7 +17,7 @@ #include <asm/xilinx_pci.h> #include <asm/ppc4xx.h> -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v34-1.01.a", }, { .compatible = "xlnx,plb-v34-1.02.a", }, diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c index 8b691df72f74..f7ac2d0fcb44 100644 --- a/arch/powerpc/platforms/40x/walnut.c +++ b/arch/powerpc/platforms/40x/walnut.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id walnut_of_bus[] = { +static const struct of_device_id walnut_of_bus[] __initconst = { { .compatible = "ibm,plb3", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 4d88f6a19058..82f2da28cd27 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -215,9 +215,9 @@ config AKEBONO select NET_VENDOR_IBM select IBM_EMAC_EMAC4 select IBM_EMAC_RGMII_WOL - select USB - select USB_OHCI_HCD_PLATFORM - select USB_EHCI_HCD_PLATFORM + select USB if USB_SUPPORT + select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD + select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI select MMC_SDHCI_PLTFM select MMC_SDHCI_OF_476GTR diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c index e300dd4c89bf..22ca5430c9cb 100644 --- a/arch/powerpc/platforms/44x/canyonlands.c +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -33,7 +33,7 @@ #define BCSR_USB_EN 0x11 -static __initdata struct of_device_id ppc460ex_of_bus[] = { +static const struct of_device_id ppc460ex_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c index 6a4232bbdf88..ae893226392d 100644 --- a/arch/powerpc/platforms/44x/ebony.c +++ b/arch/powerpc/platforms/44x/ebony.c @@ -28,7 +28,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc4xx.h> -static __initdata struct of_device_id ebony_of_bus[] = { +static const struct of_device_id ebony_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index 4241bc825800..c7c6758b3cfe 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -32,7 +32,7 @@ #include <asm/mpic.h> #include <asm/mmu.h> -static __initdata struct of_device_id iss4xx_of_bus[] = { +static const struct of_device_id iss4xx_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 3ffb915446e3..573c3d2689c6 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/of_platform.h> -static __initdata struct of_device_id ppc44x_of_bus[] = { +static const struct of_device_id ppc44x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 33986c1a05da..58db9d083969 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -38,7 +38,7 @@ #include <linux/pci.h> #include <linux/i2c.h> -static struct of_device_id ppc47x_of_bus[] __initdata = { +static const struct of_device_id ppc47x_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, { .compatible = "ibm,opb", }, diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c index 9e09b835758b..3ee4a03c1496 100644 --- a/arch/powerpc/platforms/44x/sam440ep.c +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -29,7 +29,7 @@ #include <asm/ppc4xx.h> #include <linux/i2c.h> -static __initdata struct of_device_id sam440ep_of_bus[] = { +static const struct of_device_id sam440ep_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index cf96ccaa760c..ad272c17c640 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -21,7 +21,7 @@ #include <asm/ppc4xx.h> #include "44x.h" -static struct of_device_id xilinx_of_bus_ids[] __initdata = { +static const struct of_device_id xilinx_of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "xlnx,plb-v46-1.00.a", }, { .compatible = "xlnx,plb-v46-1.02.a", }, diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 534574a97ec9..501333cf42cf 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -25,9 +25,10 @@ #include <asm/time.h> #include <asm/uic.h> #include <asm/ppc4xx.h> +#include <asm/dma.h> -static __initdata struct of_device_id warp_of_bus[] = { +static const struct of_device_id warp_of_bus[] __initconst = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,opb", }, { .compatible = "ibm,ebc", }, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index adb95f03d4d4..e996e007bc44 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -337,7 +337,7 @@ void __init mpc512x_init_IRQ(void) /* * Nodes to do bus probe on, soc and localbus */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "fsl,mpc5121-immr", }, { .compatible = "fsl,mpc5121-localbus", }, { .compatible = "fsl,mpc5121-mbx", }, diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 6e19b0ad5d26..3feffde9128d 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include <generated/utsrelease.h> #include <linux/pci.h> #include <linux/of.h> +#include <asm/dma.h> #include <asm/prom.h> #include <asm/time.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 1843bc932011..7492de3cf6d0 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -34,13 +34,13 @@ */ /* mpc5200 device tree match tables */ -static struct of_device_id mpc5200_cdm_ids[] __initdata = { +static const struct of_device_id mpc5200_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, {} }; -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 070d315dd6cd..32cae33c4266 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -30,7 +30,7 @@ #include <asm/machdep.h> #include <asm/mpc52xx.h> -static struct of_device_id mpc5200_gpio_ids[] __initdata = { +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, { .compatible = "mpc5200-gpio", }, {} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index d7e94f49532a..26993826a797 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -23,12 +23,12 @@ #include <asm/mpc52xx.h> /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_xlb_ids[] __initdata = { +static const struct of_device_id mpc52xx_xlb_ids[] __initconst = { { .compatible = "fsl,mpc5200-xlb", }, { .compatible = "mpc5200-xlb", }, {} }; -static struct of_device_id mpc52xx_bus_ids[] __initdata = { +static const struct of_device_id mpc52xx_bus_ids[] __initconst = { { .compatible = "fsl,mpc5200-immr", }, { .compatible = "fsl,mpc5200b-immr", }, { .compatible = "simple-bus", }, @@ -108,21 +108,21 @@ void __init mpc52xx_declare_of_platform_devices(void) /* * match tables used by mpc52xx_map_common_devices() */ -static struct of_device_id mpc52xx_gpt_ids[] __initdata = { +static const struct of_device_id mpc52xx_gpt_ids[] __initconst = { { .compatible = "fsl,mpc5200-gpt", }, { .compatible = "mpc5200-gpt", }, /* old */ {} }; -static struct of_device_id mpc52xx_cdm_ids[] __initdata = { +static const struct of_device_id mpc52xx_cdm_ids[] __initconst = { { .compatible = "fsl,mpc5200-cdm", }, { .compatible = "mpc5200-cdm", }, /* old */ {} }; -static const struct of_device_id mpc52xx_gpio_simple[] = { +static const struct of_device_id mpc52xx_gpio_simple[] __initconst = { { .compatible = "fsl,mpc5200-gpio", }, {} }; -static const struct of_device_id mpc52xx_gpio_wkup[] = { +static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = { { .compatible = "fsl,mpc5200-gpio-wkup", }, {} }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 37f7a89c10f2..f8f0081759fb 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -564,7 +564,7 @@ static int mpc52xx_lpbfifo_remove(struct platform_device *op) return 0; } -static struct of_device_id mpc52xx_lpbfifo_match[] = { +static const struct of_device_id mpc52xx_lpbfifo_match[] = { { .compatible = "fsl,mpc5200-lpbfifo", }, {}, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2898b737deb7..2944bc84b9d6 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -119,12 +119,12 @@ /* MPC5200 device tree match tables */ -static struct of_device_id mpc52xx_pic_ids[] __initdata = { +static const struct of_device_id mpc52xx_pic_ids[] __initconst = { { .compatible = "fsl,mpc5200-pic", }, { .compatible = "mpc5200-pic", }, {} }; -static struct of_device_id mpc52xx_sdma_ids[] __initdata = { +static const struct of_device_id mpc52xx_sdma_ids[] __initconst = { { .compatible = "fsl,mpc5200-bestcomm", }, { .compatible = "mpc5200-bestcomm", }, {} diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 79799b29ffe2..3d0c3a01143d 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -298,7 +298,7 @@ static void __init ep8248e_setup_arch(void) ppc_md.progress("ep8248e_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,ep8248e-bcsr", }, {}, diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 058cc1895c88..387b446f4161 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -180,7 +180,7 @@ static void __init km82xx_setup_arch(void) ppc_md.progress("km82xx_setup_arch(), finish", 0); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 6a14cf50f4a2..d24deacf07d0 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -181,7 +181,7 @@ static void __init mpc8272_ads_setup_arch(void) ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index e5f82ec8df17..3a5164ad10ad 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -168,7 +168,7 @@ static int __init pq2fads_probe(void) return of_flat_dt_is_compatible(root, "fsl,pq2fads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index e238b6a55b15..31053eec5cd0 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -213,7 +213,7 @@ static const struct i2c_device_id mcu_ids[] = { }; MODULE_DEVICE_TABLE(i2c, mcu_ids); -static struct of_device_id mcu_of_match_table[] = { +static const struct of_device_id mcu_of_match_table[] = { { .compatible = "fsl,mcu-mpc8349emitx", }, { }, }; diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 125336f750c6..ef9d01a049c1 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -114,7 +114,7 @@ void __init mpc83xx_ipic_and_qe_init_IRQ(void) } #endif /* CONFIG_QUICC_ENGINE */ -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus" }, diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index a494fa57bdf9..80aea8c4b5a3 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -38,7 +38,7 @@ #include "mpc83xx.h" -static struct of_device_id __initdata mpc834x_itx_ids[] = { +static const struct of_device_id mpc834x_itx_ids[] __initconst = { { .compatible = "fsl,pq2pro-localbus", }, {}, }; diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 4b4c081df94d..eeb80e25214d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -321,7 +321,7 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; -static struct of_device_id pmc_match[]; +static const struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { const struct of_device_id *match; @@ -420,7 +420,7 @@ static struct pmc_type pmc_types[] = { } }; -static struct of_device_id pmc_match[] = { +static const struct of_device_id pmc_match[] = { { .compatible = "fsl,mpc8313-pmc", .data = &pmc_types[0], diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index b564b5e23f7c..4a9ad871a168 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -14,7 +14,7 @@ #include "mpc85xx.h" -static struct of_device_id __initdata mpc85xx_common_ids[] = { +static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 3daff7c63569..12019f17f297 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -59,7 +59,7 @@ static void ppa8548_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .type = "soc", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index bb75add67084..8162b0412117 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -24,7 +24,7 @@ static struct device_node *halt_node; -static struct of_device_id child_match[] = { +static const struct of_device_id child_match[] = { { .compatible = "sgy,gpio-halt", }, @@ -147,7 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev) return 0; } -static struct of_device_id gpio_halt_match[] = { +static const struct of_device_id gpio_halt_match[] = { /* We match on the gpio bus itself and scan the children since they * wont be matched against us. We know the bus wont match until it * has been registered too. */ diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index c23f3443880a..bf17933b20f3 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -213,7 +213,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 8a6ac20686ea..8facf5873866 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -200,7 +200,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index 06c72636f299..8c9058df5642 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -190,7 +190,7 @@ static long __init mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index d479d68fbb2b..55413a547ea8 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -85,7 +85,7 @@ static void __init mpc8610_suspend_init(void) static inline void mpc8610_suspend_init(void) { } #endif /* CONFIG_SUSPEND */ -static struct of_device_id __initdata mpc8610_ids[] = { +static const struct of_device_id mpc8610_ids[] __initconst = { { .compatible = "fsl,mpc8610-immr", }, { .compatible = "fsl,mpc8610-guts", }, { .compatible = "simple-bus", }, diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index e8bf3fae5606..07ccb1b0cc7d 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -127,7 +127,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "fsl,srio", }, { .compatible = "gianfar", }, diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index b47a8fd0f3d3..6810b71d54a7 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -92,7 +92,7 @@ mpc86xx_time_init(void) return 0; } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, { .compatible = "fsl,mpc8641-pcie", }, diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 82363e98f50e..61cae4c1edb8 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -92,7 +92,7 @@ static int __init adder875_probe(void) return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875"); } -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .compatible = "simple-bus", }, {}, }; diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index e62166681d08..2bedeb7d5f8f 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -147,7 +147,7 @@ static int __init ep88xc_probe(void) return of_flat_dt_is_compatible(root, "fsl,ep88xc"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 63084640c5c5..78180c5e73ff 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -122,7 +122,7 @@ static int __init mpc86xads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc866ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 5921dcb498fd..4d62bf9dc789 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -197,7 +197,7 @@ static int __init mpc885ads_probe(void) return of_flat_dt_is_compatible(root, "fsl,mpc885ads"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index dda607807def..bee47a2b23e6 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -124,7 +124,7 @@ static int __init tqm8xx_probe(void) return of_flat_dt_is_compatible(node, "tqc,tqm8xx"); } -static struct of_device_id __initdata of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .name = "soc", }, { .name = "cpm", }, { .name = "localbus", }, diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a41bd023647a..35ae7ec392ec 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,7 +61,7 @@ choice help There are two families of 64 bit PowerPC chips supported. The most common ones are the desktop and server CPUs - (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...) + (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...) The other are the "embedded" processors compliant with the "Book 3E" variant of the architecture @@ -116,6 +116,12 @@ config POWER6_CPU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + +config POWER8_CPU + bool "POWER8" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER config E5500_CPU bool "Freescale e5500" @@ -140,14 +146,6 @@ config 6xx depends on PPC32 && PPC_BOOK3S select PPC_HAVE_PMU_SUPPORT -config POWER3 - depends on PPC64 && PPC_BOOK3S - def_bool y - -config POWER4 - depends on PPC64 && PPC_BOOK3S - def_bool y - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -244,7 +242,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" - depends on 6xx || POWER4 || (PPC_E500MC && PPC64) + depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) ---help--- This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring @@ -260,7 +258,7 @@ config ALTIVEC config VSX bool "VSX Support" - depends on POWER4 && ALTIVEC && PPC_FPU + depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU ---help--- This option enables kernel support for the Vector Scaler extensions @@ -276,7 +274,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 + depends on PPC_BOOK3S_64 default n ---help--- @@ -294,7 +292,7 @@ config PPC_ICSWX config PPC_ICSWX_PID bool "icswx requires direct PID management" - depends on PPC_ICSWX && POWER4 + depends on PPC_ICSWX default y ---help--- The PID register in server is used explicitly for ICSWX. In diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 03aabc0e16ac..2fe12046279e 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -24,6 +24,7 @@ #include <asm/i8259.h> #include <asm/time.h> #include <asm/udbg.h> +#include <asm/dma.h> extern void __flush_disable_L1(void); diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 94560db788bf..2c15ff094483 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -125,7 +125,7 @@ static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, i static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos) { u64 reg_value; - int temp; + unsigned int temp; u64 new_value; int ret; diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 173568140a32..2b98a36ef8fb 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -454,7 +454,7 @@ static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { .setup = celleb_setup_fake_pci, }; -static struct of_device_id celleb_phb_match[] __initdata = { +static const struct of_device_id celleb_phb_match[] __initconst = { { .name = "pci-pseudo", .data = &celleb_fake_pci_spec, diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 1d5a4d8ddad9..34e8ce2976aa 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -102,7 +102,7 @@ static void __init celleb_setup_arch_common(void) #endif } -static struct of_device_id celleb_bus_ids[] __initdata = { +static const struct of_device_id celleb_bus_ids[] __initconst = { { .type = "scc", }, { .type = "ioif", }, /* old style */ {}, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index f85db3a69b4a..2930d1e81a05 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -611,7 +611,6 @@ static int __init create_spu(void *data) int ret; static int number; unsigned long flags; - struct timespec ts; ret = -ENOMEM; spu = kzalloc(sizeof (*spu), GFP_KERNEL); @@ -652,8 +651,7 @@ static int __init create_spu(void *data) mutex_unlock(&spu_full_list_mutex); spu->stats.util_state = SPU_UTIL_IDLE_LOADED; - ktime_get_ts(&ts); - spu->stats.tstamp = timespec_to_ns(&ts); + spu->stats.tstamp = ktime_get_ns(); INIT_LIST_HEAD(&spu->aff_list); @@ -676,7 +674,6 @@ static const char *spu_state_names[] = { static unsigned long long spu_acct_time(struct spu *spu, enum spu_utilization_state state) { - struct timespec ts; unsigned long long time = spu->stats.times[state]; /* @@ -684,10 +681,8 @@ static unsigned long long spu_acct_time(struct spu *spu, * statistics are not updated. Apply the time delta from the * last recorded state of the spu. */ - if (spu->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - spu->stats.tstamp; - } + if (spu->stats.util_state == state) + time += ktime_get_ns() - spu->stats.tstamp; return time / NSEC_PER_MSEC; } diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 38e0a1a5cec3..5e6e0bad6db6 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -111,6 +111,7 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) return ret; } +#ifdef CONFIG_COREDUMP int elf_coredump_extra_notes_size(void) { struct spufs_calls *calls; @@ -142,6 +143,7 @@ int elf_coredump_extra_notes_write(struct coredump_params *cprm) return ret; } +#endif void notify_spus_active(void) { diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index b9d5d678aa44..52a7d2596d30 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -1,8 +1,9 @@ obj-$(CONFIG_SPU_FS) += spufs.o -spufs-y += inode.o file.o context.o syscalls.o coredump.o +spufs-y += inode.o file.o context.o syscalls.o spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o spufs-y += switch.o fault.o lscsa_alloc.o +spufs-$(CONFIG_COREDUMP) += coredump.o # magic for the trace events CFLAGS_sched.o := -I$(src) diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 9c6790d17eda..3b4152faeb1f 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -36,7 +36,6 @@ atomic_t nr_spu_contexts = ATOMIC_INIT(0); struct spu_context *alloc_spu_context(struct spu_gang *gang) { struct spu_context *ctx; - struct timespec ts; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) @@ -67,8 +66,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) __spu_update_sched_info(ctx); spu_set_timeslice(ctx); ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; - ktime_get_ts(&ts); - ctx->stats.tstamp = timespec_to_ns(&ts); + ctx->stats.tstamp = ktime_get_ns(); atomic_inc(&nr_spu_contexts); goto out; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 90986923a53a..d966bbe58b8f 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2338,7 +2338,6 @@ static const char *ctx_state_names[] = { static unsigned long long spufs_acct_time(struct spu_context *ctx, enum spu_utilization_state state) { - struct timespec ts; unsigned long long time = ctx->stats.times[state]; /* @@ -2351,8 +2350,7 @@ static unsigned long long spufs_acct_time(struct spu_context *ctx, * of the spu context. */ if (ctx->spu && ctx->stats.util_state == state) { - ktime_get_ts(&ts); - time += timespec_to_ns(&ts) - ctx->stats.tstamp; + time += ktime_get_ns() - ctx->stats.tstamp; } return time / NSEC_PER_MSEC; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 4a0a64fe25df..998f632e7cce 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1039,13 +1039,11 @@ void spuctx_switch_state(struct spu_context *ctx, { unsigned long long curtime; signed long long delta; - struct timespec ts; struct spu *spu; enum spu_utilization_state old_state; int node; - ktime_get_ts(&ts); - curtime = timespec_to_ns(&ts); + curtime = ktime_get_ns(); delta = curtime - ctx->stats.tstamp; WARN_ON(!mutex_is_locked(&ctx->state_mutex)); diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index b045fdda4845..a87200a535fa 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -79,8 +79,10 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, struct spufs_calls spufs_calls = { .create_thread = do_spu_create, .spu_run = do_spu_run, - .coredump_extra_notes_size = spufs_coredump_extra_notes_size, - .coredump_extra_notes_write = spufs_coredump_extra_notes_write, .notify_spus_active = do_notify_spus_active, .owner = THIS_MODULE, +#ifdef CONFIG_COREDUMP + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, +#endif }; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 7044fd36197b..5b77b1919fd2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -258,7 +258,7 @@ static void chrp_init_early(void) struct device_node *node; const char *property; - if (strstr(cmd_line, "console=")) + if (strstr(boot_command_line, "console=")) return; /* find the boot console from /chosen/stdout */ if (!of_chosen) diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index a138e14bad2e..bd4ba5d7d568 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -90,7 +90,7 @@ define_machine(gamecube) { }; -static struct of_device_id gamecube_of_bus[] = { +static const struct of_device_id gamecube_of_bus[] = { { .compatible = "nintendo,flipper", }, { }, }; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 455e7c087422..168e1d80b2e5 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -21,7 +21,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id of_bus_ids[] = { +static const struct of_device_id of_bus_ids[] __initconst = { { .type = "soc", }, { .compatible = "simple-bus", }, {}, diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 25e3bfb64efb..1613303177e6 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -149,7 +149,7 @@ static int __init mvme5100_add_bridge(struct device_node *dev) return 0; } -static struct of_device_id mvme5100_of_bus_ids[] __initdata = { +static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { { .compatible = "hawk-bridge", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index c458b60d14c4..d572833ebd00 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -24,7 +24,7 @@ #include "mpc10x.h" -static __initdata struct of_device_id storcenter_of_bus[] = { +static const struct of_device_id storcenter_of_bus[] __initconst = { { .name = "soc", }, {}, }; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 6d8dadf19f0b..388e29bab8f6 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -235,7 +235,7 @@ define_machine(wii) { .machine_shutdown = wii_shutdown, }; -static struct of_device_id wii_of_bus[] = { +static const struct of_device_id wii_of_bus[] = { { .compatible = "nintendo,hollywood", }, { }, }; diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 15adee544638..ada33358950d 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -290,7 +290,7 @@ static int gpio_mdio_remove(struct platform_device *dev) return 0; } -static struct of_device_id gpio_mdio_match[] = +static const struct of_device_id gpio_mdio_match[] = { { .compatible = "gpio-mdio", diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 8c54de6d8ec4..d71b2c7e8403 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -393,7 +393,7 @@ static inline void pasemi_pcmcia_init(void) #endif -static struct of_device_id pasemi_bus_ids[] = { +static const struct of_device_id pasemi_bus_ids[] = { /* Unfortunately needed for legacy firmwares */ { .type = "localbus", }, { .type = "sdc", }, diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index 1afd10f67858..607124bae2e7 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -10,7 +10,7 @@ config PPC_PMAC config PPC_PMAC64 bool - depends on PPC_PMAC && POWER4 + depends on PPC_PMAC && PPC64 select MPIC select U3_DART select MPIC_U3_HT_IRQS diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 63d82bbc05e9..4882bfd90e27 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -158,7 +158,7 @@ static inline int simple_feature_tweak(struct device_node *node, int type, return 0; } -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 static long ohare_htw_scc_enable(struct device_node *node, long param, long value) @@ -1318,7 +1318,7 @@ intrepid_aack_delay_enable(struct device_node *node, long param, long value) } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long core99_read_gpio(struct device_node *node, long param, long value) @@ -1338,7 +1338,7 @@ core99_write_gpio(struct device_node *node, long param, long value) return 0; } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 static long g5_gmac_enable(struct device_node *node, long param, long value) { struct macio_chip *macio = &macio_chips[0]; @@ -1550,9 +1550,9 @@ void g5_phy_disable_cpu1(void) if (uninorth_maj == 3) UN_OUT(U3_API_PHY_CONFIG_1, 0); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 #ifdef CONFIG_PM @@ -1864,7 +1864,7 @@ core99_sleep_state(struct device_node *node, long param, long value) return 0; } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static long generic_dev_can_wake(struct device_node *node, long param, long value) @@ -1906,7 +1906,7 @@ static struct feature_table_entry any_features[] = { { 0, NULL } }; -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* OHare based motherboards. Currently, we only use these on the * 2400,3400 and 3500 series powerbooks. Some older desktops seem @@ -2056,7 +2056,7 @@ static struct feature_table_entry intrepid_features[] = { { 0, NULL } }; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ /* G5 features */ @@ -2074,10 +2074,10 @@ static struct feature_table_entry g5_features[] = { { 0, NULL } }; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ static struct pmac_mb_def pmac_mb_defs[] = { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* * Desktops */ @@ -2342,7 +2342,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, }, -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ { "PowerMac7,2", "PowerMac G5", PMAC_TYPE_POWERMAC_G5, g5_features, 0, @@ -2373,7 +2373,7 @@ static struct pmac_mb_def pmac_mb_defs[] = { 0, }, #endif /* CONFIG_PPC64 */ -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ }; /* @@ -2441,7 +2441,7 @@ static int __init probe_motherboard(void) /* Fallback to selection depending on mac-io chip type */ switch(macio->type) { -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 case macio_grand_central: pmac_mb.model_id = PMAC_TYPE_PSURGE; pmac_mb.model_name = "Unknown PowerSurge"; @@ -2475,7 +2475,7 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Intrepid-based"; pmac_mb.features = intrepid_features; break; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ case macio_keylargo2: pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; pmac_mb.model_name = "Unknown K2-based"; @@ -2486,13 +2486,13 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown Shasta-based"; pmac_mb.features = g5_features; break; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ default: ret = -ENODEV; goto done; } found: -#ifndef CONFIG_POWER4 +#ifndef CONFIG_PPC64 /* Fixup Hooper vs. Comet */ if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); @@ -2546,9 +2546,9 @@ found: */ powersave_lowspeed = 1; -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ powersave_nap = 1; -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* Check for "mobile" machine */ if (model && (strncmp(model, "PowerBook", 9) == 0 @@ -2786,7 +2786,7 @@ set_initial_features(void) MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); } -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 if (macio_chips[0].type == macio_keylargo2 || macio_chips[0].type == macio_shasta) { #ifndef CONFIG_SMP @@ -2805,28 +2805,23 @@ set_initial_features(void) /* Enable GMAC for now for PCI probing. It will be disabled * later on after PCI probe */ - np = of_find_node_by_name(NULL, "ethernet"); - while(np) { + for_each_node_by_name(np, "ethernet") if (of_device_is_compatible(np, "K2-GMAC")) g5_gmac_enable(np, 0, 1); - np = of_find_node_by_name(np, "ethernet"); - } /* Enable FW before PCI probe. Will be disabled later on * Note: We should have a batter way to check that we are * dealing with uninorth internal cell and not a PCI cell * on the external PCI. The code below works though. */ - np = of_find_node_by_name(NULL, "firewire"); - while(np) { + for_each_node_by_name(np, "firewire") { if (of_device_is_compatible(np, "pci106b,5811")) { macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; g5_fw_enable(np, 0, 1); } - np = of_find_node_by_name(np, "firewire"); } } -#else /* CONFIG_POWER4 */ +#else /* CONFIG_PPC64 */ if (macio_chips[0].type == macio_keylargo || macio_chips[0].type == macio_pangea || @@ -2834,13 +2829,11 @@ set_initial_features(void) /* Enable GMAC for now for PCI probing. It will be disabled * later on after PCI probe */ - np = of_find_node_by_name(NULL, "ethernet"); - while(np) { + for_each_node_by_name(np, "ethernet") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && of_device_is_compatible(np, "gmac")) core99_gmac_enable(np, 0, 1); - np = of_find_node_by_name(np, "ethernet"); } /* Enable FW before PCI probe. Will be disabled later on @@ -2848,8 +2841,7 @@ set_initial_features(void) * dealing with uninorth internal cell and not a PCI cell * on the external PCI. The code below works though. */ - np = of_find_node_by_name(NULL, "firewire"); - while(np) { + for_each_node_by_name(np, "firewire") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && (of_device_is_compatible(np, "pci106b,18") || @@ -2858,18 +2850,16 @@ set_initial_features(void) macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; core99_firewire_enable(np, 0, 1); } - np = of_find_node_by_name(np, "firewire"); } /* Enable ATA-100 before PCI probe. */ np = of_find_node_by_name(NULL, "ata-6"); - while(np) { + for_each_node_by_name(np, "ata-6") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") && of_device_is_compatible(np, "kauai-ata")) { core99_ata100_enable(np, 1); } - np = of_find_node_by_name(np, "ata-6"); } /* Switch airport off */ @@ -2895,7 +2885,7 @@ set_initial_features(void) MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); } -#endif /* CONFIG_POWER4 */ +#endif /* CONFIG_PPC64 */ /* On all machines, switch modem & serial ports off */ for_each_node_by_name(np, "ch-a") diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index cf7009b8c7b6..7e868ccf3b0d 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -698,7 +698,7 @@ static void __init fixup_nec_usb2(void) { struct device_node *nec; - for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { + for_each_node_by_name(nec, "usb") { struct pci_controller *hose; u32 data; const u32 *prop; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 141f8899a633..b127a29ac526 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -336,7 +336,7 @@ static void __init pmac_setup_arch(void) #endif #ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { + if (strstr(boot_command_line, "adb_sync")) { extern int __adb_probe_sync; __adb_probe_sync = 1; } @@ -460,7 +460,7 @@ pmac_halt(void) static void __init pmac_init_early(void) { /* Enable early btext debug if requested */ - if (strstr(cmd_line, "btextdbg")) { + if (strstr(boot_command_line, "btextdbg")) { udbg_adb_init_early(); register_early_udbg_console(); } @@ -469,8 +469,8 @@ static void __init pmac_init_early(void) pmac_feature_init(); /* Initialize debug stuff */ - udbg_scc_init(!!strstr(cmd_line, "sccdbg")); - udbg_adb_init(!!strstr(cmd_line, "btextdbg")); + udbg_scc_init(!!strstr(boot_command_line, "sccdbg")); + udbg_adb_init(!!strstr(boot_command_line, "btextdbg")); #ifdef CONFIG_PPC64 iommu_init_early_dart(); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 5cbd4d67d5c4..af094ae03dbb 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -577,7 +577,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) int ok; /* Look for the clock chip */ - while ((cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL) { + for_each_node_by_name(cc, "i2c-hwclock") { p = of_get_parent(cc); ok = p && of_device_is_compatible(p, "uni-n-i2c"); of_node_put(p); diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c index 44e0b55a2a02..366bd221edec 100644 --- a/arch/powerpc/platforms/powermac/udbg_adb.c +++ b/arch/powerpc/platforms/powermac/udbg_adb.c @@ -191,7 +191,7 @@ int __init udbg_adb_init(int force_btext) * of type "adb". If not, we return a failure, but we keep the * bext output set for now */ - for (np = NULL; (np = of_find_node_by_name(np, "keyboard")) != NULL;) { + for_each_node_by_name(np, "keyboard") { struct device_node *parent = of_get_parent(np); int found = (parent && strcmp(parent->type, "adb") == 0); of_node_put(parent); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index d55891f89a2c..f241accc053d 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,10 +1,11 @@ -obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o +obj-y += setup.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o +obj-y += opal-msglog.o opal-hmi.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o +obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 8ad0c5b891f4..426814a2ede3 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -66,6 +66,54 @@ static struct notifier_block ioda_eeh_nb = { }; #ifdef CONFIG_DEBUG_FS +static ssize_t ioda_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct pnv_phb *phb = hose->private_data; + struct eeh_dev *edev; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!phb->eeh_ops || !phb->eeh_ops->err_inject) + return -ENXIO; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + edev->phb = hose; + edev->pe_config_addr = pe_no; + pe = eeh_pe_get(edev); + kfree(edev); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations ioda_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = ioda_eeh_ei_write, +}; + static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) { struct pci_controller *hose = data; @@ -152,6 +200,10 @@ static int ioda_eeh_post_init(struct pci_controller *hose) if (!phb->has_dbgfs && phb->dbgfs) { phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &ioda_eeh_ei_fops); + debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, &ioda_eeh_outb_dbgfs_ops); @@ -187,10 +239,11 @@ static int ioda_eeh_post_init(struct pci_controller *hose) */ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) { - s64 ret; - u32 pe_no; struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; + bool freeze_pe = false; + int enable, ret = 0; + s64 rc; /* Check on PE number */ if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { @@ -201,184 +254,236 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) return -EINVAL; } - pe_no = pe->addr; switch (option) { case EEH_OPT_DISABLE: - ret = -EEXIST; - break; + return -EPERM; case EEH_OPT_ENABLE: - ret = 0; - break; + return 0; case EEH_OPT_THAW_MMIO: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); - if (ret) { - pr_warning("%s: Failed to enable MMIO for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; break; case EEH_OPT_THAW_DMA: - ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); - if (ret) { - pr_warning("%s: Failed to enable DMA for " - "PHB#%x-PE#%x, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return -EIO; - } - + enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; + break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + enable = OPAL_EEH_ACTION_SET_FREEZE_ALL; break; default: - pr_warning("%s: Invalid option %d\n", __func__, option); + pr_warn("%s: Invalid option %d\n", + __func__, option); return -EINVAL; } + /* If PHB supports compound PE, to handle it */ + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing " + "PHB#%x-PE#%x\n", + __func__, rc, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } else { + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, enable); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, + enable); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d " + "for PHB#%x-PE#%x\n", + __func__, rc, option, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } + return ret; } -static void ioda_eeh_phb_diag(struct pci_controller *hose) +static void ioda_eeh_phb_diag(struct eeh_pe *pe) { - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pe->phb->private_data; long rc; - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, PNV_PCI_DIAG_BUF_SIZE); - if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", - __func__, hose->global_number, rc); - return; - } - - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, pe->phb->global_number, rc); } -/** - * ioda_eeh_get_state - Retrieve the state of PE - * @pe: EEH PE - * - * The PE's state should be retrieved from the PEEV, PEST - * IODA tables. Since the OPAL has exported the function - * to do it, it'd better to use that. - */ -static int ioda_eeh_get_state(struct eeh_pe *pe) +static int ioda_eeh_get_phb_state(struct eeh_pe *pe) { - s64 ret = 0; + struct pnv_phb *phb = pe->phb->private_data; u8 fstate; __be16 pcierr; - u32 pe_no; - int result; - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; + s64 rc; + int result = 0; + + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x state\n", + __func__, rc, phb->hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } /* - * Sanity check on PE address. The PHB PE address should - * be zero. + * Check PHB state. If the PHB is frozen for the + * first time, to dump the PHB diag-data. */ - if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { - pr_err("%s: PE address %x out of range [0, %x] " - "on PHB#%x\n", - __func__, pe->addr, phb->ioda.total_pe, - hose->global_number); - return EEH_STATE_NOT_SUPPORT; + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(pe); } + return result; +} + +static int ioda_eeh_get_pe_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate; + __be16 pcierr; + s64 rc; + int result; + /* - * If we're in middle of PE reset, return normal - * state to keep EEH core going. For PHB reset, we - * still expect to have fenced PHB cleared with - * PHB reset. + * We don't clobber hardware frozen state until PE + * reset is completed. In order to keep EEH core + * moving forward, we have to return operational + * state during PE reset. */ - if (!(pe->type & EEH_PE_PHB) && - (pe->state & EEH_PE_RESET)) { - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | + if (pe->state & EEH_PE_RESET) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | EEH_STATE_DMA_ENABLED); return result; } - /* Retrieve PE status through OPAL */ - pe_no = pe->addr; - ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, - &fstate, &pcierr, NULL); - if (ret) { - pr_err("%s: Failed to get EEH status on " - "PHB#%x-PE#%x\n, err=%lld\n", - __func__, hose->global_number, pe_no, ret); - return EEH_STATE_NOT_SUPPORT; - } - - /* Check PHB status */ - if (pe->type & EEH_PE_PHB) { - result = 0; - result &= ~EEH_STATE_RESET_ACTIVE; - - if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; - } else if (!(pe->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + /* + * Fetch PE state from hardware. If the PHB + * supports compound PE, let it handle that. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", + __func__, rc, phb->hose->global_number, pe->addr); + return EEH_STATE_NOT_SUPPORT; } - - return result; } - /* Parse result out */ - result = 0; + /* Figure out state */ switch (fstate) { case OPAL_EEH_STOPPED_NOT_FROZEN: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_DMA_ENABLED; + result = (EEH_STATE_DMA_ACTIVE | + EEH_STATE_DMA_ENABLED); break; case OPAL_EEH_STOPPED_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_MMIO_ENABLED); break; case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: - result &= ~EEH_STATE_RESET_ACTIVE; + result = 0; break; case OPAL_EEH_STOPPED_RESET: - result |= EEH_STATE_RESET_ACTIVE; + result = EEH_STATE_RESET_ACTIVE; break; case OPAL_EEH_STOPPED_TEMP_UNAVAIL: - result |= EEH_STATE_UNAVAILABLE; + result = EEH_STATE_UNAVAILABLE; break; case OPAL_EEH_STOPPED_PERM_UNAVAIL: - result |= EEH_STATE_NOT_SUPPORT; + result = EEH_STATE_NOT_SUPPORT; break; default: - pr_warning("%s: Unexpected EEH status 0x%x " - "on PHB#%x-PE#%x\n", - __func__, fstate, hose->global_number, pe_no); + result = EEH_STATE_NOT_SUPPORT; + pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", + __func__, phb->hose->global_number, + pe->addr, fstate); } - /* Dump PHB diag-data for frozen PE */ - if (result != EEH_STATE_NOT_SUPPORT && - (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && + /* + * If PHB supports compound PE, to freeze all + * slave PEs for consistency. + * + * If the PE is switching to frozen state for the + * first time, to dump the PHB diag-data. + */ + if (!(result & EEH_STATE_NOT_SUPPORT) && + !(result & EEH_STATE_UNAVAILABLE) && + !(result & EEH_STATE_MMIO_ACTIVE) && + !(result & EEH_STATE_DMA_ACTIVE) && !(pe->state & EEH_PE_ISOLATED)) { + if (phb->freeze_pe) + phb->freeze_pe(phb, pe->addr); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(pe); } return result; } +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + + /* Sanity check on PE number. PHB PE should have 0 */ + if (pe->addr < 0 || + pe->addr >= phb->ioda.total_pe) { + pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n", + __func__, phb->hose->global_number, + pe->addr, phb->ioda.total_pe); + return EEH_STATE_NOT_SUPPORT; + } + + if (pe->type & EEH_PE_PHB) + return ioda_eeh_get_phb_state(pe); + + return ioda_eeh_get_pe_state(pe); +} + static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) { s64 rc = OPAL_HARDWARE; @@ -409,11 +514,11 @@ int ioda_eeh_phb_reset(struct pci_controller *hose, int option) if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PHB_COMPLETE, + OPAL_RESET_PHB_COMPLETE, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -453,15 +558,15 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) */ if (option == EEH_RESET_FUNDAMENTAL) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_RESET_PCI_FUNDAMENTAL, OPAL_ASSERT_RESET); else if (option == EEH_RESET_HOT) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_ASSERT_RESET); else if (option == EEH_RESET_DEACTIVATE) rc = opal_pci_reset(phb->opal_id, - OPAL_PCI_HOT_RESET, + OPAL_RESET_PCI_HOT, OPAL_DEASSERT_RESET); if (rc < 0) goto out; @@ -577,6 +682,31 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) if (pe->type & EEH_PE_PHB) { ret = ioda_eeh_phb_reset(hose, option); } else { + struct pnv_phb *phb; + s64 rc; + + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing " + "error injection registers\n", + __func__, rc); + return -EIO; + } + } + bus = eeh_pe_bus_get(pe); if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) @@ -589,6 +719,24 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) } /** + * ioda_eeh_get_log - Retrieve error log + * @pe: frozen PE + * @severity: permanent or temporary error + * @drv_log: device driver log + * @len: length of device driver log + * + * Retrieve error log, which contains log from device driver + * and firmware. + */ +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + + return 0; +} + +/** * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE * @pe: EEH PE * @@ -602,21 +750,70 @@ static int ioda_eeh_configure_bridge(struct eeh_pe *pe) return 0; } +static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + s64 ret; + + /* Sanity check on error type */ + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } + + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } + + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + ret = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (ret != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, ret, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; +} + static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) { /* GEM */ - pr_info(" GEM XFIR: %016llx\n", data->gemXfir); - pr_info(" GEM RFIR: %016llx\n", data->gemRfir); - pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); - pr_info(" GEM Mask: %016llx\n", data->gemMask); - pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + if (data->gemXfir || data->gemRfir || + data->gemRirqfir || data->gemMask || data->gemRwof) + pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->gemXfir), + be64_to_cpu(data->gemRfir), + be64_to_cpu(data->gemRirqfir), + be64_to_cpu(data->gemMask), + be64_to_cpu(data->gemRwof)); /* LEM */ - pr_info(" LEM FIR: %016llx\n", data->lemFir); - pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); - pr_info(" LEM Action 0: %016llx\n", data->lemAction0); - pr_info(" LEM Action 1: %016llx\n", data->lemAction1); - pr_info(" LEM WOF: %016llx\n", data->lemWof); + if (data->lemFir || data->lemErrMask || + data->lemAction0 || data->lemAction1 || data->lemWof) + pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrMask), + be64_to_cpu(data->lemAction0), + be64_to_cpu(data->lemAction1), + be64_to_cpu(data->lemWof)); } static void ioda_eeh_hub_diag(struct pci_controller *hose) @@ -627,8 +824,8 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", - __func__, phb->hub_id, rc); + pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); return; } @@ -636,24 +833,31 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) case OPAL_P7IOC_DIAG_TYPE_RGC: pr_info("P7IOC diag-data for RGC\n\n"); ioda_eeh_hub_diag_common(data); - pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); - pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + if (data->rgc.rgcStatus || data->rgc.rgcLdcp) + pr_info(" RGC: %016llx %016llx\n", + be64_to_cpu(data->rgc.rgcStatus), + be64_to_cpu(data->rgc.rgcLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_BI: pr_info("P7IOC diag-data for BI %s\n\n", data->bi.biDownbound ? "Downbound" : "Upbound"); ioda_eeh_hub_diag_common(data); - pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); - pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); - pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); - pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + if (data->bi.biLdcp0 || data->bi.biLdcp1 || + data->bi.biLdcp2 || data->bi.biFenceStatus) + pr_info(" BI: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->bi.biLdcp0), + be64_to_cpu(data->bi.biLdcp1), + be64_to_cpu(data->bi.biLdcp2), + be64_to_cpu(data->bi.biFenceStatus)); break; case OPAL_P7IOC_DIAG_TYPE_CI: - pr_info("P7IOC diag-data for CI Port %d\\nn", + pr_info("P7IOC diag-data for CI Port %d\n\n", data->ci.ciPort); ioda_eeh_hub_diag_common(data); - pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); - pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + if (data->ci.ciPortStatus || data->ci.ciPortLdcp) + pr_info(" CI: %016llx %016llx\n", + be64_to_cpu(data->ci.ciPortStatus), + be64_to_cpu(data->ci.ciPortLdcp)); break; case OPAL_P7IOC_DIAG_TYPE_MISC: pr_info("P7IOC diag-data for MISC\n\n"); @@ -664,30 +868,71 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) ioda_eeh_hub_diag_common(data); break; default: - pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", - __func__, phb->hub_id, data->type); + pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); } } static int ioda_eeh_get_pe(struct pci_controller *hose, u16 pe_no, struct eeh_pe **pe) { - struct eeh_pe *phb_pe, *dev_pe; - struct eeh_dev dev; + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pnv_pe; + struct eeh_pe *dev_pe; + struct eeh_dev edev; - /* Find the PHB PE */ - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) - return -EEXIST; + /* + * If PHB supports compound PE, to fetch + * the master PE because slave PE is invisible + * to EEH core. + */ + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; + } /* Find the PE according to PE# */ - memset(&dev, 0, sizeof(struct eeh_dev)); - dev.phb = hose; - dev.pe_config_addr = pe_no; - dev_pe = eeh_pe_get(&dev); - if (!dev_pe) return -EEXIST; + memset(&edev, 0, sizeof(struct eeh_dev)); + edev.phb = hose; + edev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&edev); + if (!dev_pe) + return -EEXIST; + /* Freeze the (compound) PE */ *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, pe_no); + + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE); + + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || (ret & active_flags) == active_flags) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + return 0; } @@ -792,7 +1037,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) "detected, location: %s\n", hose->global_number, eeh_pe_loc_get(phb_pe)); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(phb_pe); + pnv_pci_dump_phb_diag_data(hose, phb_pe->data); ret = EEH_NEXT_ERR_NONE; } @@ -812,7 +1058,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); ret = EEH_NEXT_ERR_NONE; - } else if ((*pe)->state & EEH_PE_ISOLATED) { + } else if ((*pe)->state & EEH_PE_ISOLATED || + eeh_pe_passed(*pe)) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", @@ -839,7 +1086,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) ret == EEH_NEXT_ERR_FENCED_PHB) && !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(hose); + ioda_eeh_phb_diag(*pe); } /* @@ -885,6 +1132,8 @@ struct pnv_eeh_ops ioda_eeh_ops = { .set_option = ioda_eeh_set_option, .get_state = ioda_eeh_get_state, .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, + .err_inject = ioda_eeh_err_inject, .next_error = ioda_eeh_next_error }; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 56a206f32f77..3e89cbf55885 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -45,14 +45,31 @@ */ static int powernv_eeh_init(void) { + struct pci_controller *hose; + struct pnv_phb *phb; + /* We require OPALv3 */ if (!firmware_has_feature(FW_FEATURE_OPALv3)) { - pr_warning("%s: OPALv3 is required !\n", __func__); + pr_warn("%s: OPALv3 is required !\n", + __func__); return -EINVAL; } - /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + /* Set probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEV); + + /* + * P7IOC blocks PCI config access to frozen PE, but PHB3 + * doesn't do that. So we have to selectively enable I/O + * prior to collecting error log. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_P7IOC) + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + break; + } return 0; } @@ -107,6 +124,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) struct pnv_phb *phb = hose->private_data; struct device_node *dn = pci_device_to_OF_node(dev); struct eeh_dev *edev = of_node_to_eeh_dev(dn); + int ret; /* * When probing the root bridge, which doesn't have any @@ -143,13 +161,27 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); /* Create PE */ - eeh_add_to_parent_pe(edev); + ret = eeh_add_to_parent_pe(edev); + if (ret) { + pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n", + __func__, pci_name(dev), ret); + return ret; + } + + /* + * Cache the PE primary bus, which can't be fetched when + * full hotplug is in progress. In that case, all child + * PCI devices of the PE are expected to be removed prior + * to PE reset. + */ + if (!edev->pe->bus) + edev->pe->bus = dev->bus; /* * Enable EEH explicitly so that we will do EEH check * while accessing I/O stuff */ - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); /* Save memory bars */ eeh_save_bars(edev); @@ -273,8 +305,8 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) max_wait -= mwait; if (max_wait <= 0) { - pr_warning("%s: Timeout getting PE#%x's state (%d)\n", - __func__, pe->addr, max_wait); + pr_warn("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } @@ -294,7 +326,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) * Retrieve the temporary or permanent error from the PE. */ static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) + char *drv_log, unsigned long len) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; @@ -327,6 +359,31 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe) } /** + * powernv_pe_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE for + * testing purpose. + */ +static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->err_inject) + ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + + return ret; +} + +/** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE * @@ -382,6 +439,7 @@ static struct eeh_ops powernv_eeh_ops = { .wait_state = powernv_eeh_wait_state, .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, + .err_inject = powernv_eeh_err_inject, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, .next_error = powernv_eeh_next_error, @@ -398,9 +456,7 @@ static int __init eeh_powernv_init(void) { int ret = -EINVAL; - if (!machine_is(powernv)) - return ret; - + eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); ret = eeh_ops_register(&powernv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); @@ -409,5 +465,4 @@ static int __init eeh_powernv_init(void) return ret; } - -early_initcall(eeh_powernv_init); +machine_early_initcall(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 32e2adfa5320..e462ab947d16 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -20,6 +20,7 @@ #include <linux/wait.h> #include <linux/gfp.h> #include <linux/of.h> +#include <asm/machdep.h> #include <asm/opal.h> #define N_ASYNC_COMPLETIONS 64 @@ -201,4 +202,4 @@ out_opal_node: out: return err; } -subsys_initcall(opal_async_comp_init); +machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 788a1977b9a5..23260f7dfa7a 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -102,9 +102,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, * due to the dynamic size of the dump */ static struct dump_attribute id_attribute = - __ATTR(id, 0666, dump_id_show, NULL); + __ATTR(id, S_IRUGO, dump_id_show, NULL); static struct dump_attribute type_attribute = - __ATTR(type, 0666, dump_type_show, NULL); + __ATTR(type, S_IRUGO, dump_type_show, NULL); static struct dump_attribute ack_attribute = __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); @@ -112,7 +112,7 @@ static ssize_t init_dump_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - return sprintf(buf, "1 - initiate dump\n"); + return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n"); } static int64_t dump_fips_init(uint8_t type) @@ -121,7 +121,7 @@ static int64_t dump_fips_init(uint8_t type) rc = opal_dump_init(type); if (rc) - pr_warn("%s: Failed to initiate FipS dump (%d)\n", + pr_warn("%s: Failed to initiate FSP dump (%d)\n", __func__, rc); return rc; } @@ -131,8 +131,12 @@ static ssize_t init_dump_store(struct dump_obj *dump_obj, const char *buf, size_t count) { - dump_fips_init(DUMP_TYPE_FSP); - pr_info("%s: Initiated FSP dump\n", __func__); + int rc; + + rc = dump_fips_init(DUMP_TYPE_FSP); + if (rc == OPAL_SUCCESS) + pr_info("%s: Initiated FSP dump\n", __func__); + return count; } @@ -297,7 +301,7 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, * and rely on userspace to ask us to try * again. */ - pr_info("%s: Platform dump partially read.ID = 0x%x\n", + pr_info("%s: Platform dump partially read. ID = 0x%x\n", __func__, dump->id); return -EIO; } @@ -423,6 +427,10 @@ void __init opal_platform_dump_init(void) { int rc; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_DUMP_READ)) + return; + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); if (!dump_kset) { pr_warn("%s: Failed to create dump kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 10268c41d830..518fe95dbf24 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -82,9 +82,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, } static struct elog_attribute id_attribute = - __ATTR(id, 0666, elog_id_show, NULL); + __ATTR(id, S_IRUGO, elog_id_show, NULL); static struct elog_attribute type_attribute = - __ATTR(type, 0666, elog_type_show, NULL); + __ATTR(type, S_IRUGO, elog_type_show, NULL); static struct elog_attribute ack_attribute = __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); @@ -249,7 +249,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { - pr_err("ELOG: Opal log read failed\n"); + pr_err("ELOG: OPAL log info read failed\n"); return; } @@ -257,7 +257,7 @@ static void elog_work_fn(struct work_struct *work) log_id = be64_to_cpu(id); elog_type = be64_to_cpu(type); - BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); if (elog_size >= OPAL_MAX_ERRLOG_SIZE) elog_size = OPAL_MAX_ERRLOG_SIZE; @@ -295,6 +295,10 @@ int __init opal_elog_init(void) { int rc = 0; + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_ELOG_READ)) + return -1; + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); if (!elog_kset) { pr_warn("%s: failed to create elog kset\n", __func__); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c new file mode 100644 index 000000000000..5e1ed1575aab --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -0,0 +1,189 @@ +/* + * OPAL hypervisor Maintenance interrupt handling support in PowreNV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; If not, see <http://www.gnu.org/licenses/>. + * + * Copyright 2014 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/opal.h> +#include <asm/cputable.h> +#include <asm/machdep.h> + +static int opal_hmi_handler_nb_init; +struct OpalHmiEvtNode { + struct list_head list; + struct OpalHMIEvent hmi_evt; +}; +static LIST_HEAD(opal_hmi_evt_list); +static DEFINE_SPINLOCK(opal_hmi_evt_lock); + +static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) +{ + const char *level, *sevstr, *error_info; + static const char *hmi_error_types[] = { + "Malfunction Alert", + "Processor Recovery done", + "Processor recovery occurred again", + "Processor recovery occurred for masked error", + "Timer facility experienced an error", + "TFMR SPR is corrupted", + "UPS (Uniterrupted Power System) Overflow indication", + "An XSCOM operation failure", + "An XSCOM operation completed", + "SCOM has set a reserved FIR bit to cause recovery", + "Debug trigger has set a reserved FIR bit to cause recovery", + "A hypervisor resource error occurred" + }; + + /* Print things out */ + if (hmi_evt->version != OpalHMIEvt_V1) { + pr_err("HMI Interrupt, Unknown event version %d !\n", + hmi_evt->version); + return; + } + switch (hmi_evt->severity) { + case OpalHMI_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case OpalHMI_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case OpalHMI_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case OpalHMI_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, + be64_to_cpu(hmi_evt->tfmr)); +} + +static void hmi_event_handler(struct work_struct *work) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct OpalHmiEvtNode *msg_node; + uint8_t disposition; + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + while (!list_empty(&opal_hmi_evt_list)) { + msg_node = list_entry(opal_hmi_evt_list.next, + struct OpalHmiEvtNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt; + print_hmi_event_info(hmi_evt); + disposition = hmi_evt->disposition; + kfree(msg_node); + + /* + * Check if HMI event has been recovered or not. If not + * then we can't continue, invoke panic. + */ + if (disposition != OpalHMI_DISPOSITION_RECOVERED) + panic("Unrecoverable HMI exception"); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + } + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); +} + +static DECLARE_WORK(hmi_event_work, hmi_event_handler); +/* + * opal_handle_hmi_event - notifier handler that queues up HMI events + * to be preocessed later. + */ +static int opal_handle_hmi_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct opal_msg *hmi_msg = msg; + struct OpalHmiEvtNode *msg_node; + + /* Sanity Checks */ + if (msg_type != OPAL_MSG_HMI_EVT) + return 0; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0]; + + /* Delay the logging of HMI events to workqueue. */ + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("HMI: out of memory, Opal message event not handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + list_add(&msg_node->list, &opal_hmi_evt_list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + schedule_work(&hmi_event_work); + return 0; +} + +static struct notifier_block opal_hmi_handler_nb = { + .notifier_call = opal_handle_hmi_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_hmi_handler_init(void) +{ + int ret; + + if (!opal_hmi_handler_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_hmi_handler_nb_init = 1; + } + return 0; +} +machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index f04b4d8aca5a..ad4b31df779a 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -324,7 +324,7 @@ static int opal_lpc_init_debugfs(void) rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW); return rc; } -device_initcall(opal_lpc_init_debugfs); +machine_device_initcall(powernv, opal_lpc_init_debugfs); #endif /* CONFIG_DEBUG_FS */ void opal_lpc_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index b17a34b695ef..43db2136dbff 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -27,6 +27,7 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/cputable.h> @@ -143,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -subsys_initcall(opal_mem_err_init); +machine_subsys_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index acd9f7e96678..f9896fd5d04a 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -78,7 +78,7 @@ void __init opal_nvram_init(void) } nvram_size = be32_to_cpup(nbytes_p); - printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size); + pr_info("OPAL nvram setup, %u bytes\n", nvram_size); of_node_put(np); ppc_md.nvram_read = opal_nvram_read; diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index b1885db8fdf3..499707ddaa9c 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -42,6 +42,9 @@ unsigned long __init opal_get_boot_time(void) __be64 __h_m_s_ms; long rc = OPAL_BUSY; + if (!opal_check_token(OPAL_RTC_READ)) + goto out; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) @@ -49,16 +52,18 @@ unsigned long __init opal_get_boot_time(void) else mdelay(10); } - if (rc != OPAL_SUCCESS) { - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; - } + if (rc != OPAL_SUCCESS) + goto out; + y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); +out: + ppc_md.get_rtc_time = NULL; + ppc_md.set_rtc_time = NULL; + return 0; } void opal_get_rtc_time(struct rtc_time *tm) diff --git a/arch/powerpc/platforms/powernv/opal-takeover.S b/arch/powerpc/platforms/powernv/opal-takeover.S deleted file mode 100644 index 11a3169ee583..000000000000 --- a/arch/powerpc/platforms/powernv/opal-takeover.S +++ /dev/null @@ -1,140 +0,0 @@ -/* - * PowerNV OPAL takeover assembly code, for use by prom_init.c - * - * Copyright 2011 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/ppc_asm.h> -#include <asm/hvcall.h> -#include <asm/asm-offsets.h> -#include <asm/opal.h> - -#define H_HAL_TAKEOVER 0x5124 -#define H_HAL_TAKEOVER_QUERY_MAGIC -1 - - .text -_GLOBAL(opal_query_takeover) - mfcr r0 - stw r0,8(r1) - stdu r1,-STACKFRAMESIZE(r1) - std r3,STK_PARAM(R3)(r1) - std r4,STK_PARAM(R4)(r1) - li r3,H_HAL_TAKEOVER - li r4,H_HAL_TAKEOVER_QUERY_MAGIC - HVSC - addi r1,r1,STACKFRAMESIZE - ld r10,STK_PARAM(R3)(r1) - std r4,0(r10) - ld r10,STK_PARAM(R4)(r1) - std r5,0(r10) - lwz r0,8(r1) - mtcrf 0xff,r0 - blr - -_GLOBAL(opal_do_takeover) - mfcr r0 - stw r0,8(r1) - mflr r0 - std r0,16(r1) - bl __opal_do_takeover - ld r0,16(r1) - mtlr r0 - lwz r0,8(r1) - mtcrf 0xff,r0 - blr - -__opal_do_takeover: - ld r4,0(r3) - ld r5,0x8(r3) - ld r6,0x10(r3) - ld r7,0x18(r3) - ld r8,0x20(r3) - ld r9,0x28(r3) - ld r10,0x30(r3) - ld r11,0x38(r3) - li r3,H_HAL_TAKEOVER - HVSC - blr - - .globl opal_secondary_entry -opal_secondary_entry: - mr r31,r3 - mfmsr r11 - li r12,(MSR_SF | MSR_ISF)@highest - sldi r12,r12,48 - or r11,r11,r12 - mtmsrd r11 - isync - mfspr r4,SPRN_PIR - std r4,0(r3) -1: HMT_LOW - ld r4,8(r3) - cmpli cr0,r4,0 - beq 1b - HMT_MEDIUM -1: addi r3,r31,16 - bl __opal_do_takeover - b 1b - -_GLOBAL(opal_enter_rtas) - mflr r0 - std r0,16(r1) - stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ - - /* Because PROM is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) - mfcr r10 - mfmsr r11 - std r10,_CCR(r1) - std r11,_MSR(r1) - - /* Get the PROM entrypoint */ - mtlr r5 - - /* Switch MSR to 32 bits mode - */ - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - andc r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) - andc r11,r11,r12 - mtmsrd r11 - isync - - /* Enter RTAS here... */ - blrl - - /* Just make sure that r1 top 32 bits didn't get - * corrupt by OF - */ - rldicl r1,r1,0,32 - - /* Restore the MSR (back to 64 bits) */ - ld r0,_MSR(r1) - MTMSRD(r0) - isync - - /* Restore other registers */ - REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) - ld r4,_CCR(r1) - mtcr r4 - - addi r1,r1,PROM_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c new file mode 100644 index 000000000000..ae14c40b4b1c --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -0,0 +1,84 @@ +#include <linux/percpu.h> +#include <linux/jump_label.h> +#include <asm/trace.h> + +#ifdef HAVE_JUMP_LABEL +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; + +void opal_tracepoint_regfunc(void) +{ + static_key_slow_inc(&opal_tracepoint_key); +} + +void opal_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&opal_tracepoint_key); +} +#else +/* + * We optimise OPAL calls by placing opal_tracepoint_refcount + * directly in the TOC so we can check if the opal tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long opal_tracepoint_refcount; + +void opal_tracepoint_regfunc(void) +{ + opal_tracepoint_refcount++; +} + +void opal_tracepoint_unregfunc(void) +{ + opal_tracepoint_refcount--; +} +#endif + +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +void __trace_opal_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + preempt_disable(); + trace_opal_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +void __trace_opal_exit(long opcode, unsigned long retval) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = &__get_cpu_var(opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + trace_opal_exit(opcode, retval); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4abbff22a61f..15942b6ffd3a 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -13,30 +13,69 @@ #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> +#include <asm/jump_label.h> + + .section ".text" + +#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +#define OPAL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) +#else + + .section ".toc","aw" + + .globl opal_tracepoint_refcount +opal_tracepoint_refcount: + .llong 0 + + .section ".text" + +/* + * We branch around this in early init by using an unconditional cpu + * feature. + */ +#define OPAL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,opal_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: + +#endif + +#else +#define OPAL_BRANCH(LABEL) +#endif /* TODO: * * - Trace irqs in/off (needs saving/restoring all args, argh...) * - Get r11 feed up by Dave so I can have better register usage */ + #define OPAL_CALL(name, token) \ _GLOBAL(name); \ mflr r0; \ - mfcr r12; \ std r0,16(r1); \ + li r0,token; \ + OPAL_BRANCH(opal_tracepoint_entry) \ + mfcr r12; \ stw r12,8(r1); \ std r1,PACAR1(r13); \ - li r0,0; \ + li r11,0; \ mfmsr r12; \ - ori r0,r0,MSR_EE; \ + ori r11,r11,MSR_EE; \ std r12,PACASAVEDMSR(r13); \ - andc r12,r12,r0; \ + andc r12,r12,r11; \ mtmsrd r12,1; \ - LOAD_REG_ADDR(r0,opal_return); \ - mtlr r0; \ - li r0,MSR_DR|MSR_IR|MSR_LE;\ - andc r12,r12,r0; \ - li r0,token; \ + LOAD_REG_ADDR(r11,opal_return); \ + mtlr r11; \ + li r11,MSR_DR|MSR_IR|MSR_LE;\ + andc r12,r12,r11; \ mtspr SPRN_HSRR1,r12; \ LOAD_REG_ADDR(r11,opal); \ ld r12,8(r11); \ @@ -61,6 +100,64 @@ opal_return: mtcr r4; rfid +#ifdef CONFIG_TRACEPOINTS +opal_tracepoint_entry: + stdu r1,-STACKFRAMESIZE(r1) + std r0,STK_REG(R23)(r1) + std r3,STK_REG(R24)(r1) + std r4,STK_REG(R25)(r1) + std r5,STK_REG(R26)(r1) + std r6,STK_REG(R27)(r1) + std r7,STK_REG(R28)(r1) + std r8,STK_REG(R29)(r1) + std r9,STK_REG(R30)(r1) + std r10,STK_REG(R31)(r1) + mr r3,r0 + addi r4,r1,STK_REG(R24) + bl __trace_opal_entry + ld r0,STK_REG(R23)(r1) + ld r3,STK_REG(R24)(r1) + ld r4,STK_REG(R25)(r1) + ld r5,STK_REG(R26)(r1) + ld r6,STK_REG(R27)(r1) + ld r7,STK_REG(R28)(r1) + ld r8,STK_REG(R29)(r1) + ld r9,STK_REG(R30)(r1) + ld r10,STK_REG(R31)(r1) + LOAD_REG_ADDR(r11,opal_tracepoint_return) + mfcr r12 + std r11,16(r1) + stw r12,8(r1) + std r1,PACAR1(r13) + li r11,0 + mfmsr r12 + ori r11,r11,MSR_EE + std r12,PACASAVEDMSR(r13) + andc r12,r12,r11 + mtmsrd r12,1 + LOAD_REG_ADDR(r11,opal_return) + mtlr r11 + li r11,MSR_DR|MSR_IR|MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_tracepoint_return: + std r3,STK_REG(R31)(r1) + mr r4,r3 + ld r0,STK_REG(R23)(r1) + bl __trace_opal_exit + ld r3,STK_REG(R31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr +#endif + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -86,6 +183,8 @@ OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); +OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); @@ -134,6 +233,7 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); @@ -146,3 +246,6 @@ OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); +OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); +OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 4cd2ea6c0dbe..7634d1c62299 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -130,4 +130,4 @@ static int opal_xscom_init(void) scom_init(&opal_scom_controller); return 0; } -arch_initcall(opal_xscom_init); +machine_arch_initcall(powernv, opal_xscom_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 199975613fe9..cf85ba86bf5e 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -22,6 +22,8 @@ #include <linux/kobject.h> #include <linux/delay.h> #include <linux/memblock.h> + +#include <asm/machdep.h> #include <asm/opal.h> #include <asm/firmware.h> #include <asm/mce.h> @@ -103,12 +105,12 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; powerpc_firmware_features |= FW_FEATURE_OPALv3; - printk("OPAL V3 detected !\n"); + pr_info("OPAL V3 detected !\n"); } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) { powerpc_firmware_features |= FW_FEATURE_OPALv2; - printk("OPAL V2 detected !\n"); + pr_info("OPAL V2 detected !\n"); } else { - printk("OPAL V1 detected !\n"); + pr_info("OPAL V1 detected !\n"); } /* Reinit all cores with the right endian */ @@ -192,16 +194,12 @@ static int __init opal_register_exception_handlers(void) * fwnmi area at 0x7000 to provide the glue space to OPAL */ glue = 0x7000; - opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER, - 0, glue); - glue += 128; opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif return 0; } - -early_initcall(opal_register_exception_handlers); +machine_early_initcall(powernv, opal_register_exception_handlers); int opal_notifier_register(struct notifier_block *nb) { @@ -368,7 +366,7 @@ static int __init opal_message_init(void) } return 0; } -early_initcall(opal_message_init); +machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -513,6 +511,46 @@ int opal_machine_check(struct pt_regs *regs) return 0; } +/* Early hmi handler called in real mode. */ +int opal_hmi_exception_early(struct pt_regs *regs) +{ + s64 rc; + + /* + * call opal hmi handler. Pass paca address as token. + * The return value OPAL_SUCCESS is an indication that there is + * an HMI event generated waiting to pull by Linux. + */ + rc = opal_handle_hmi(); + if (rc == OPAL_SUCCESS) { + local_paca->hmi_event_available = 1; + return 1; + } + return 0; +} + +/* HMI exception handler called in virtual mode during check_irq_replay. */ +int opal_handle_hmi_exception(struct pt_regs *regs) +{ + s64 rc; + __be64 evt = 0; + + /* + * Check if HMI event is available. + * if Yes, then call opal_poll_events to pull opal messages and + * process them. + */ + if (!local_paca->hmi_event_available) + return 0; + + local_paca->hmi_event_available = 0; + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(be64_to_cpu(evt)); + + return 1; +} + static uint64_t find_recovery_address(uint64_t nip) { int i; @@ -567,6 +605,24 @@ static int opal_sysfs_init(void) return 0; } +static void __init opal_dump_region_init(void) +{ + void *addr; + uint64_t size; + int rc; + + /* Register kernel log buffer */ + addr = log_buf_addr_get(); + size = log_buf_len_get(); + rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, + __pa(addr), size); + /* Don't warn if this is just an older OPAL that doesn't + * know about that call + */ + if (rc && rc != OPAL_UNSUPPORTED) + pr_warn("DUMP: Failed to register kernel log buffer. " + "rc = %d\n", rc); +} static int __init opal_init(void) { struct device_node *np, *consoles; @@ -616,6 +672,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Setup dump region interface */ + opal_dump_region_init(); /* Setup error log interface */ rc = opal_elog_init(); /* Setup code update interface */ @@ -630,7 +688,7 @@ static int __init opal_init(void) return 0; } -subsys_initcall(opal_init); +machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { @@ -656,6 +714,9 @@ void opal_shutdown(void) else mdelay(10); } + + /* Unregister memory dump region */ + opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF); } /* Export this so that test modules can use it */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index de19edeaa7a7..0fd6bdb493c9 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -36,41 +36,41 @@ #include <asm/tce.h> #include <asm/xics.h> #include <asm/debug.h> +#include <asm/firmware.h> #include "powernv.h" #include "pci.h" -#define define_pe_printk_level(func, kern_level) \ -static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - char pfix[32]; \ - int r; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - if (pe->pdev) \ - strlcpy(pfix, dev_name(&pe->pdev->dev), \ - sizeof(pfix)); \ - else \ - sprintf(pfix, "%04x:%02x ", \ - pci_domain_nr(pe->pbus), \ - pe->pbus->number); \ - r = printk(kern_level "pci %s: [PE# %.3d] %pV", \ - pfix, pe->pe_number, &vaf); \ - \ - va_end(args); \ - \ - return r; \ -} \ - -define_pe_printk_level(pe_err, KERN_ERR); -define_pe_printk_level(pe_warn, KERN_WARNING); -define_pe_printk_level(pe_info, KERN_INFO); +static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + char pfix[32]; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (pe->pdev) + strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + else + sprintf(pfix, "%04x:%02x ", + pci_domain_nr(pe->pbus), pe->pbus->number); + + printk("%spci %s: [PE# %.3d] %pV", + level, pfix, pe->pe_number, &vaf); + + va_end(args); +} + +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) /* * stdcix is only supposed to be used in hypervisor real mode as per @@ -82,6 +82,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) : : "r" (val), "r" (paddr) : "memory"); } +static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +{ + return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == + (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -106,6 +112,380 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe) clear_bit(pe, phb->ioda.pe_alloc); } +/* The default M64 BAR is shared by all PEs */ +static int pnv_ioda2_init_m64(struct pnv_phb *phb) +{ + const char *desc; + struct resource *r; + s64 rc; + + /* Configure the default M64 BAR */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + phb->ioda.m64_base, + 0, /* unused */ + phb->ioda.m64_size); + if (rc != OPAL_SUCCESS) { + desc = "configuring"; + goto fail; + } + + /* Enable the default M64 BAR */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + desc = "enabling"; + goto fail; + } + + /* Mark the M64 BAR assigned */ + set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); + + /* + * Strip off the segment used by the reserved PE, which is + * expected to be 0 or last one of PE capabicity. + */ + r = &phb->hose->mem_resources[1]; + if (phb->ioda.reserved_pe == 0) + r->start += phb->ioda.m64_segsize; + else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1)) + r->end -= phb->ioda.m64_segsize; + else + pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", + phb->ioda.reserved_pe); + + return 0; + +fail: + pr_warn(" Failure %lld %s M64 BAR#%d\n", + rc, desc, phb->ioda.m64_bar_idx); + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_DISABLE_M64); + return -EIO; +} + +static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +{ + resource_size_t sgsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + int base, step, i; + + /* + * Root bus always has full M64 range and root port has + * M64 range used in reality. So we're checking root port + * instead of root bus. + */ + list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { + for (i = PCI_BRIDGE_RESOURCES; + i <= PCI_BRIDGE_RESOURCE_END; i++) { + r = &pdev->resource[i]; + if (!r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + base = (r->start - phb->ioda.m64_base) / sgsz; + for (step = 0; step < resource_size(r) / sgsz; step++) + set_bit(base + step, phb->ioda.pe_alloc); + } + } +} + +static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, + struct pci_bus *bus, int all) +{ + resource_size_t segsz = phb->ioda.m64_segsize; + struct pci_dev *pdev; + struct resource *r; + struct pnv_ioda_pe *master_pe, *pe; + unsigned long size, *pe_alloc; + bool found; + int start, i, j; + + /* Root bus shouldn't use M64 */ + if (pci_is_root_bus(bus)) + return IODA_INVALID_PE; + + /* We support only one M64 window on each bus */ + found = false; + pci_bus_for_each_resource(bus, r, i) { + if (r && r->parent && + pnv_pci_is_mem_pref_64(r->flags)) { + found = true; + break; + } + } + + /* No M64 window found ? */ + if (!found) + return IODA_INVALID_PE; + + /* Allocate bitmap */ + size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); + pe_alloc = kzalloc(size, GFP_KERNEL); + if (!pe_alloc) { + pr_warn("%s: Out of memory !\n", + __func__); + return IODA_INVALID_PE; + } + + /* + * Figure out reserved PE numbers by the PE + * the its child PEs. + */ + start = (r->start - phb->ioda.m64_base) / segsz; + for (i = 0; i < resource_size(r) / segsz; i++) + set_bit(start + i, pe_alloc); + + if (all) + goto done; + + /* + * If the PE doesn't cover all subordinate buses, + * we need subtract from reserved PEs for children. + */ + list_for_each_entry(pdev, &bus->devices, bus_list) { + if (!pdev->subordinate) + continue; + + pci_bus_for_each_resource(pdev->subordinate, r, i) { + if (!r || !r->parent || + !pnv_pci_is_mem_pref_64(r->flags)) + continue; + + start = (r->start - phb->ioda.m64_base) / segsz; + for (j = 0; j < resource_size(r) / segsz ; j++) + clear_bit(start + j, pe_alloc); + } + } + + /* + * the current bus might not own M64 window and that's all + * contributed by its child buses. For the case, we needn't + * pick M64 dependent PE#. + */ + if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) { + kfree(pe_alloc); + return IODA_INVALID_PE; + } + + /* + * Figure out the master PE and put all slave PEs to master + * PE's list to form compound PE. + */ +done: + master_pe = NULL; + i = -1; + while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < + phb->ioda.total_pe) { + pe = &phb->ioda.pe_array[i]; + pe->phb = phb; + pe->pe_number = i; + + if (!master_pe) { + pe->flags |= PNV_IODA_PE_MASTER; + INIT_LIST_HEAD(&pe->slaves); + master_pe = pe; + } else { + pe->flags |= PNV_IODA_PE_SLAVE; + pe->master = master_pe; + list_add_tail(&pe->list, &master_pe->slaves); + } + } + + kfree(pe_alloc); + return master_pe->pe_number; +} + +static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + struct resource *res; + const u32 *r; + u64 pci_addr; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_info(" Firmware too old to support M64 window\n"); + return; + } + + r = of_get_property(dn, "ibm,opal-m64-window", NULL); + if (!r) { + pr_info(" No <ibm,opal-m64-window> on %s\n", + dn->full_name); + return; + } + + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + + res = &hose->mem_resources[1]; + res->start = of_translate_address(dn, r + 2); + res->end = res->start + of_read_number(r + 4, 2) - 1; + res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + pci_addr = of_read_number(r, 2); + hose->mem_offset[1] = res->start - pci_addr; + + phb->ioda.m64_size = resource_size(res); + phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; + phb->ioda.m64_base = pci_addr; + + /* Use last M64 BAR to cover M64 window */ + phb->ioda.m64_bar_idx = 15; + phb->init_m64 = pnv_ioda2_init_m64; + phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; +} + +static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; + struct pnv_ioda_pe *slave; + s64 rc; + + /* Fetch master PE */ + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Freeze master PE */ + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } + + /* Freeze slave PEs */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + slave->pe_number); + } +} + +static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +{ + struct pnv_ioda_pe *pe, *slave; + s64 rc; + + /* Find master PE */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Clear frozen state for master PE */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, pe_no); + return -EIO; + } + + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Clear frozen state for slave PEs */ + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, + slave->pe_number); + return -EIO; + } + } + + return 0; +} + +static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *slave, *pe; + u8 fstate, state; + __be16 pcierr; + s64 rc; + + /* Sanity check on PE number */ + if (pe_no < 0 || pe_no >= phb->ioda.total_pe) + return OPAL_EEH_STOPPED_PERM_UNAVAIL; + + /* + * Fetch the master PE and the PE instance might be + * not initialized yet. + */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Check the master PE */ + rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &state, &pcierr, NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, pe_no); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* Check the slave PE */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return state; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + slave->pe_number, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, slave->pe_number); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* + * Override the result based on the ascending + * priority. + */ + if (fstate > state) + state = fstate; + } + + return state; +} + /* Currently those 2 are only used when MSIs are enabled, this will change * but in the meantime, we need to protect them to avoid warnings */ @@ -363,9 +743,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe; - int pe_num; + int pe_num = IODA_INVALID_PE; + + /* Check if PE is determined by M64 */ + if (phb->pick_m64_pe) + pe_num = phb->pick_m64_pe(phb, bus, all); + + /* The PE number isn't pinned by M64 */ + if (pe_num == IODA_INVALID_PE) + pe_num = pnv_ioda_alloc_pe(phb); - pe_num = pnv_ioda_alloc_pe(phb); if (pe_num == IODA_INVALID_PE) { pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); @@ -373,7 +760,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) } pe = &phb->ioda.pe_array[pe_num]; - pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); + pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; pe->tce32_seg = -1; @@ -441,8 +828,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus) static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; + struct pnv_phb *phb; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + /* M64 layout might affect PE allocation */ + if (phb->alloc_m64_pe) + phb->alloc_m64_pe(phb); + pnv_ioda_setup_PEs(hose->bus); } } @@ -462,7 +856,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); } static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, @@ -491,17 +885,48 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, set_dma_ops(&pdev->dev, &dma_iommu_ops); set_iommu_table_base(&pdev->dev, &pe->tce32_table); } + *pdev->dev.dma_mask = dma_mask; return 0; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, + struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + u64 end, mask; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return 0; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + if (!pe->tce_bypass_enabled) + return __dma_get_required_mask(&pdev->dev); + + + end = pe->tce_bypass_base + memblock_end_of_DRAM(); + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, + struct pci_bus *bus, + bool add_to_iommu_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); + if (add_to_iommu_group) + set_iommu_table_base_and_group(&dev->dev, + &pe->tce32_table); + else + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_iommu_group); } } @@ -513,15 +938,16 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; + const unsigned shift = tbl->it_page_shift; start = __pa(startp); end = __pa(endp); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { - start <<= 12; - end <<= 12; - inc = 128 << 12; + start <<= shift; + end <<= shift; + inc = 128ull << shift; start |= tbl->it_busno; end |= tbl->it_busno; } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { @@ -559,18 +985,19 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; + const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ - start = 0x2ul << 60; + start = 0x2ull << 60; start |= (pe->pe_number & 0xFF); end = start; /* Figure out the start, end and step */ inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << 12); + start |= (inc << shift); inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << 12); - inc = (0x1ul << 12); + end |= (inc << shift); + inc = (0x1ull << shift); mb(); while (start <= end) { @@ -654,7 +1081,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, - base << 28); + base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -677,7 +1104,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -713,11 +1140,15 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) 0); /* - * We might want to reset the DMA ops of all devices on - * this PE. However in theory, that shouldn't be necessary - * as this is used for VFIO/KVM pass-through and the device - * hasn't yet been returned to its kernel driver + * EEH needs the mapping between IOMMU table and group + * of those VFIO/KVM pass-through devices. We can postpone + * resetting DMA ops until the DMA mask is configured in + * host side. */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -784,7 +1215,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ tbl = &pe->tce32_table; - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0); + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, + IOMMU_PAGE_SHIFT_4K); /* OPAL variant of PHB3 invalidated TCEs */ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); @@ -805,7 +1237,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->pdev) set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); /* Also create a bypass window */ pnv_pci_ioda2_setup_bypass_pe(phb, pe); @@ -1055,9 +1487,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, index++; } } else if (res->flags & IORESOURCE_MEM) { - /* WARNING: Assumes M32 is mem region 0 in PHB. We need to - * harden that algorithm when we start supporting M64 - */ region.start = res->start - hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -1141,9 +1570,8 @@ static void pnv_pci_ioda_fixup(void) pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH - eeh_probe_mode_set(EEH_PROBE_MODE_DEV); - eeh_addr_cache_build(); eeh_init(); + eeh_addr_cache_build(); #endif } @@ -1178,7 +1606,10 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We need support prefetchable memory window later */ + /* We fail back to M32 if M64 isn't supported */ + if (phb->ioda.m64_segsize && + pnv_pci_is_mem_pref_64(type)) + return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -1217,12 +1648,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) { - opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET, + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, - u64 hub_id, int ioda_type) +static void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; struct pnv_phb *phb; @@ -1299,6 +1730,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); if (prop32) phb->ioda.reserved_pe = be32_to_cpup(prop32); + + /* Parse 64-bit MMIO range */ + pnv_ioda_parse_m64_window(phb); + phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); /* FW Has already off top 64k of M32 space (MSI space) */ phb->ioda.m32_size += 0x10000; @@ -1334,14 +1769,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Calculate how many 32-bit TCE segments we have */ phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28; - /* Clear unusable m64 */ - hose->mem_resources[1].flags = 0; - hose->mem_resources[1].start = 0; - hose->mem_resources[1].end = 0; - hose->mem_resources[2].flags = 0; - hose->mem_resources[2].start = 0; - hose->mem_resources[2].end = 0; - #if 0 /* We should really do that ... */ rc = opal_pci_set_phb_mem_window(opal->phb_id, window_type, @@ -1351,14 +1778,21 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, segment_size); #endif - pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" - " IO: 0x%x [segment=0x%x]\n", - phb->ioda.total_pe, - phb->ioda.reserved_pe, - phb->ioda.m32_size, phb->ioda.m32_segsize, - phb->ioda.io_size, phb->ioda.io_segsize); + pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", + phb->ioda.total_pe, phb->ioda.reserved_pe, + phb->ioda.m32_size, phb->ioda.m32_segsize); + if (phb->ioda.m64_size) + pr_info(" M64: 0x%lx [segment=0x%lx]\n", + phb->ioda.m64_size, phb->ioda.m64_segsize); + if (phb->ioda.io_size) + pr_info(" IO: 0x%x [segment=0x%x]\n", + phb->ioda.io_size, phb->ioda.io_segsize); + phb->hose->ops = &pnv_pci_ops; + phb->get_pe_state = pnv_ioda_get_pe_state; + phb->freeze_pe = pnv_ioda_freeze_pe; + phb->unfreeze_pe = pnv_ioda_unfreeze_pe; #ifdef CONFIG_EEH phb->eeh_ops = &ioda_eeh_ops; #endif @@ -1369,6 +1803,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; + phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ phb->shutdown = pnv_pci_ioda_shutdown; @@ -1390,7 +1825,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ - rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); + rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); @@ -1404,6 +1839,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); } + + /* Configure M64 window */ + if (phb->init_m64 && phb->init_m64(phb)) + hose->mem_resources[1].flags = 0; } void __init pnv_pci_init_ioda2_phb(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index e3807d69393e..94ce3481490b 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -172,7 +172,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0); + tce_mem, tce_size, 0, + IOMMU_PAGE_SHIFT_4K); } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f91a4e5d872e..e9f509bbc078 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -132,61 +132,78 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, data = (struct OpalIoP7IOCPhbErrorData *)common; pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->p7iocPlssr || data->p7iocCsr) pr_info("PhbSts: %016llx %016llx\n", - data->p7iocPlssr, data->p7iocCsr); + be64_to_cpu(data->p7iocPlssr), + be64_to_cpu(data->p7iocCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { if ((data->pestA[i] >> 63) == 0 && @@ -194,7 +211,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -319,43 +337,52 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) { unsigned long flags, rc; - int has_diag; + int has_diag, ret = 0; spin_lock_irqsave(&phb->lock, flags); + /* Fetch PHB diag-data */ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); - rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + /* If PHB supports compound PE, to handle it */ + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - if (rc) { - pr_warning("PCI %d: Failed to clear EEH freeze state" - " for PE#%d, err %ld\n", - phb->hose->global_number, pe_no, rc); - - /* For now, let's only display the diag buffer when we fail to clear - * the EEH status. We'll do more sensible things later when we have - * proper EEH support. We need to make sure we don't pollute ourselves - * with the normal errors generated when probing empty slots - */ - if (has_diag) - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); - else - pr_warning("PCI %d: No diag data available\n", - phb->hose->global_number); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc) { + pr_warn("%s: Failure %ld clearing frozen " + "PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe_no); + ret = -EIO; + } } + /* + * For now, let's only display the diag buffer when we fail to clear + * the EEH status. We'll do more sensible things later when we have + * proper EEH support. We need to make sure we don't pollute ourselves + * with the normal errors generated when probing empty slots + */ + if (has_diag && ret) + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); + spin_unlock_irqrestore(&phb->lock, flags); } static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct device_node *dn) { - s64 rc; u8 fstate; __be16 pcierr; - u32 pe_no; + int pe_no; + s64 rc; /* * Get the PE#. During the PCI probe stage, we might not @@ -370,20 +397,42 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, pe_no = phb->ioda.reserved_pe; } - /* Read freeze status */ - rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, - NULL); - if (rc) { - pr_warning("%s: Can't read EEH status (PE#%d) for " - "%s, err %lld\n", - __func__, pe_no, dn->full_name, rc); - return; + /* + * Fetch frozen state. If the PHB support compound PE, + * we need handle that case. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe_no); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe_no, + &fstate, + &pcierr, + NULL); + if (rc) { + pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } } + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), pe_no, fstate); - if (fstate != 0) + + /* Clear the frozen state if applicable */ + if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || + fstate == OPAL_EEH_STOPPED_DMA_FREEZE || + fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) { + /* + * If PHB supports compound PE, freeze it for + * consistency. + */ + if (phb->freeze_pe) + phb->freeze_pe(phb, pe_no); + pnv_pci_handle_eeh_config(phb, pe_no); + } } int pnv_pci_cfg_read(struct device_node *dn, @@ -564,10 +613,11 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, proto_tce |= TCE_PCI_WRITE; tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tbl->it_page_shift; while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT)); + *(tcep++) = cpu_to_be64(proto_tce | + (rpn++ << tbl->it_page_shift)); /* Some implementations won't cache invalid TCEs and thus may not * need that flush. We'll probably turn it_type into a bit mask @@ -627,11 +677,11 @@ static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset) + u64 dma_offset, unsigned page_shift) { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = page_shift; tbl->it_offset = dma_offset >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_size = tce_size >> 3; @@ -656,7 +706,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) if (WARN_ON(!tbl)) return NULL; pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), - be32_to_cpup(sizep), 0); + be32_to_cpup(sizep), 0, IOMMU_PAGE_SHIFT_4K); iommu_init_table(tbl, hose->node); iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); @@ -711,6 +761,17 @@ int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return __dma_set_mask(&pdev->dev, dma_mask); } +u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + + if (phb && phb->dma_get_required_mask) + return phb->dma_get_required_mask(phb, pdev); + + return __dma_get_required_mask(&pdev->dev); +} + void pnv_pci_shutdown(void) { struct pci_controller *hose; @@ -842,5 +903,4 @@ static int __init tce_iommu_bus_notifier_init(void) bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; } - -subsys_initcall_sync(tce_iommu_bus_notifier_init); +machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 676232c34328..34d29eb2a4de 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -21,6 +21,8 @@ enum pnv_phb_model { #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ +#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ +#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_phb; @@ -64,6 +66,10 @@ struct pnv_ioda_pe { */ int mve_number; + /* PEs in compound case */ + struct pnv_ioda_pe *master; + struct list_head slaves; + /* Link in list of PE#s */ struct list_head dma_link; struct list_head list; @@ -79,6 +85,8 @@ struct pnv_eeh_ops { int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); + int (*err_inject)(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); int (*next_error)(struct eeh_pe **pe); }; #endif /* CONFIG_EEH */ @@ -116,9 +124,17 @@ struct pnv_phb { void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pnv_phb *phb, + struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); + int (*init_m64)(struct pnv_phb *phb); + void (*alloc_m64_pe)(struct pnv_phb *phb); + int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); + int (*get_pe_state)(struct pnv_phb *phb, int pe_no); + void (*freeze_pe)(struct pnv_phb *phb, int pe_no); + int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); union { struct { @@ -129,9 +145,20 @@ struct pnv_phb { /* Global bridge info */ unsigned int total_pe; unsigned int reserved_pe; + + /* 32-bit MMIO window */ unsigned int m32_size; unsigned int m32_segsize; unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ unsigned int io_size; unsigned int io_segsize; unsigned int io_pci_base; @@ -198,7 +225,7 @@ int pnv_pci_cfg_write(struct device_node *dn, int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, - u64 dma_offset); + u64 dma_offset, unsigned page_shift); extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 75501bfede7f..6c8e2d188cd0 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -13,6 +13,7 @@ struct pci_dev; extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); +extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } @@ -21,6 +22,11 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { return -ENODEV; } + +static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) +{ + return 0; +} #endif extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 1cb160dc1609..80db43944afe 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -123,4 +123,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(powernv, rng_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d9b88fa7c5a3..3f9546d8a51f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -173,6 +173,14 @@ static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) return __dma_set_mask(dev, dma_mask); } +static u64 pnv_dma_get_required_mask(struct device *dev) +{ + if (dev_is_pci(dev)) + return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); + + return __dma_get_required_mask(dev); +} + static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -264,6 +272,8 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; + ppc_md.hmi_exception_early = opal_hmi_exception_early; + ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } #ifdef CONFIG_PPC_POWERNV_RTAS @@ -305,7 +315,7 @@ static int __init pnv_probe(void) * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo */ -unsigned long pnv_get_proc_freq(unsigned int cpu) +static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; @@ -333,6 +343,7 @@ define_machine(powernv) { .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, .dma_set_mask = pnv_dma_set_mask, + .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 5fcfcf44e3a9..4753958cd509 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -54,7 +54,7 @@ static void pnv_smp_setup_cpu(int cpu) #endif } -int pnv_smp_kick_cpu(int nr) +static int pnv_smp_kick_cpu(int nr) { unsigned int pcpu = get_hard_smp_processor_id(nr); unsigned long start_here = @@ -168,9 +168,9 @@ static void pnv_smp_cpu_kill_self(void) power7_nap(1); ppc64_runlatch_on(); - /* Reenable IRQs briefly to clear the IPI that woke us */ - local_irq_enable(); - local_irq_disable(); + /* Clear the IPI that woke us up */ + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; mb(); if (cpu_core_split_required()) diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 894ecb3eb596..c87f96b79d1a 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -24,6 +24,7 @@ #include <asm/smp.h> #include "subcore.h" +#include "powernv.h" /* diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 2d8bf15879fd..fc44ad0475f8 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -555,7 +555,6 @@ static int cmm_mem_going_offline(void *arg) pa_last = pa_last->next; free_page((unsigned long)cmm_page_list); cmm_page_list = pa_last; - continue; } } pa_curr = pa_curr->next; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 022b38e6a80b..fdf01b660d59 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/of.h> #include "offline_states.h" +#include "pseries.h" #include <asm/prom.h> #include <asm/machdep.h> @@ -86,6 +87,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, } of_node_set_flag(dn, OF_DYNAMIC); + of_node_init(dn); return dn; } @@ -362,7 +364,8 @@ static int dlpar_online_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -372,8 +375,9 @@ static int dlpar_online_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); @@ -387,7 +391,7 @@ static int dlpar_online_cpu(struct device_node *dn) } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to online " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -399,10 +403,10 @@ out: static ssize_t dlpar_cpu_probe(const char *buf, size_t count) { struct device_node *dn, *parent; - unsigned long drc_index; + u32 drc_index; int rc; - rc = strict_strtoul(buf, 0, &drc_index); + rc = kstrtou32(buf, 0, &drc_index); if (rc) return -EINVAL; @@ -441,7 +445,8 @@ static int dlpar_offline_cpu(struct device_node *dn) int rc = 0; unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -451,8 +456,9 @@ static int dlpar_offline_cpu(struct device_node *dn) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) @@ -474,14 +480,14 @@ static int dlpar_offline_cpu(struct device_node *dn) * Upgrade it's state to CPU_STATE_OFFLINE. */ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - BUG_ON(plpar_hcall_norets(H_PROD, intserv[i]) + BUG_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); __cpu_die(cpu); break; } if (cpu == num_possible_cpus()) printk(KERN_WARNING "Could not find cpu to offline " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); @@ -493,15 +499,15 @@ out: static ssize_t dlpar_cpu_release(const char *buf, size_t count) { struct device_node *dn; - const u32 *drc_index; + u32 drc_index; int rc; dn = of_find_node_by_path(buf); if (!dn) return -EINVAL; - drc_index = of_get_property(dn, "ibm,my-drc-index", NULL); - if (!drc_index) { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { of_node_put(dn); return -EINVAL; } @@ -512,7 +518,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - rc = dlpar_release_drc(*drc_index); + rc = dlpar_release_drc(drc_index); if (rc) { of_node_put(dn); return rc; @@ -520,7 +526,7 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) rc = dlpar_detach_node(dn); if (rc) { - dlpar_acquire_drc(*drc_index); + dlpar_acquire_drc(drc_index); return rc; } diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 7d61498e45c0..1062f71f5a85 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -29,6 +29,7 @@ #include <asm/lppaca.h> #include <asm/debug.h> #include <asm/plpar_wrappers.h> +#include <asm/machdep.h> struct dtl { struct dtl_entry *buf; @@ -391,4 +392,4 @@ err_remove_dir: err: return rc; } -arch_initcall(dtl_init); +machine_arch_initcall(pseries, dtl_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 0bec0c02c5e7..a6c7e19f5eb3 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -88,29 +88,14 @@ static int pseries_eeh_init(void) * and its variant since the old firmware probably support address * of domain/bus/slot/function for EEH RTAS operations. */ - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,set-slot-reset> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && - ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and " - "<ibm,read-slot-reset-state> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n", - __func__); - return -EINVAL; - } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && - ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: RTAS service <ibm,configure-pe> and " - "<ibm,configure-bridge> invalid\n", - __func__); + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && + ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) { + pr_info("EEH functionality not supported\n"); return -EINVAL; } @@ -118,17 +103,17 @@ static int pseries_eeh_init(void) spin_lock_init(&slot_errbuf_lock); eeh_error_buf_size = rtas_token("rtas-error-log-max"); if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - pr_warning("%s: unknown EEH error log size\n", + pr_info("%s: unknown EEH error log size\n", __func__); eeh_error_buf_size = 1024; } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - pr_warning("%s: EEH error log size %d exceeds the maximal %d\n", + pr_info("%s: EEH error log size %d exceeds the maximal %d\n", __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } /* Set EEH probe mode */ - eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE); + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); return 0; } @@ -270,7 +255,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); if (!regs) { - pr_warning("%s: OF node property %s::reg not found\n", + pr_warn("%s: OF node property %s::reg not found\n", __func__, dn->full_name); return NULL; } @@ -297,7 +282,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) enable = 1; if (enable) { - eeh_set_enable(true); + eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", @@ -349,7 +334,9 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) if (pe->addr) config_addr = pe->addr; break; - + case EEH_OPT_FREEZE_PE: + /* Not support */ + return 0; default: pr_err("%s: Invalid option %d\n", __func__, option); @@ -398,7 +385,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -411,7 +398,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) pe->config_addr, BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + pr_warn("%s: Failed to get address for PHB#%d-PE#%x\n", __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -584,17 +571,17 @@ static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) return ret; if (max_wait <= 0) { - pr_warning("%s: Timeout when getting PE's state (%d)\n", + pr_warn("%s: Timeout when getting PE's state (%d)\n", __func__, max_wait); return EEH_STATE_NOT_SUPPORT; } if (mwait <= 0) { - pr_warning("%s: Firmware returned bad wait value %d\n", + pr_warn("%s: Firmware returned bad wait value %d\n", __func__, mwait); mwait = EEH_STATE_MIN_WAIT_TIME; } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { - pr_warning("%s: Firmware returned too long wait value %d\n", + pr_warn("%s: Firmware returned too long wait value %d\n", __func__, mwait); mwait = EEH_STATE_MAX_WAIT_TIME; } @@ -675,7 +662,7 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) } if (ret) - pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, ret); return ret; @@ -729,6 +716,7 @@ static struct eeh_ops pseries_eeh_ops = { .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, + .err_inject = NULL, .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, @@ -743,10 +731,7 @@ static struct eeh_ops pseries_eeh_ops = { */ static int __init eeh_pseries_init(void) { - int ret = -EINVAL; - - if (!machine_is(pseries)) - return ret; + int ret; ret = eeh_ops_register(&pseries_eeh_ops); if (!ret) @@ -757,5 +742,4 @@ static int __init eeh_pseries_init(void) return ret; } - -early_initcall(eeh_pseries_init); +machine_early_initcall(pseries, eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 20d62975856f..b174fa751d26 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,7 +90,7 @@ static void rtas_stop_self(void) { static struct rtas_args args = { .nargs = 0, - .nret = 1, + .nret = cpu_to_be32(1), .rets = &args.args[0], }; @@ -312,7 +312,8 @@ static void pseries_remove_processor(struct device_node *np) { unsigned int cpu; int len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; + u32 thread; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -322,8 +323,9 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); for_each_present_cpu(cpu) { - if (get_hard_smp_processor_id(cpu) != intserv[i]) + if (get_hard_smp_processor_id(cpu) != thread) continue; BUG_ON(cpu_online(cpu)); set_cpu_present(cpu, false); @@ -332,7 +334,7 @@ static void pseries_remove_processor(struct device_node *np) } if (cpu >= nr_cpu_ids) printk(KERN_WARNING "Could not find cpu to remove " - "with physical id 0x%x\n", intserv[i]); + "with physical id 0x%x\n", thread); } cpu_maps_update_done(); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7995135170a3..187ecfab8362 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -20,6 +20,7 @@ #include <asm/machdep.h> #include <asm/prom.h> #include <asm/sparsemem.h> +#include "pseries.h" unsigned long pseries_memory_block_size(void) { @@ -113,7 +114,7 @@ out: static int pseries_remove_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -132,8 +133,8 @@ static int pseries_remove_mem_node(struct device_node *np) if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); pseries_remove_memblock(base, lmb_size); return 0; @@ -146,14 +147,14 @@ static inline int pseries_remove_memblock(unsigned long base, } static inline int pseries_remove_mem_node(struct device_node *np) { - return -EOPNOTSUPP; + return 0; } #endif /* CONFIG_MEMORY_HOTREMOVE */ static int pseries_add_mem_node(struct device_node *np) { const char *type; - const unsigned int *regs; + const __be32 *regs; unsigned long base; unsigned int lmb_size; int ret = -EINVAL; @@ -172,8 +173,8 @@ static int pseries_add_mem_node(struct device_node *np) if (!regs) return ret; - base = *(unsigned long *)regs; - lmb_size = regs[3]; + base = be64_to_cpu(*(unsigned long *)regs); + lmb_size = be32_to_cpu(regs[3]); /* * Update memory region to represent the memory add @@ -187,14 +188,14 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; u32 entries; - u32 *p; + __be32 *p; int i, rc = -EINVAL; memblock_size = pseries_memory_block_size(); if (!memblock_size) return -EINVAL; - p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); + p = (__be32 *) pr->old_prop->value; if (!p) return -EINVAL; @@ -203,28 +204,30 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) * entries. Get the niumber of entries and skip to the array of * of_drconf_cell's. */ - entries = *p++; + entries = be32_to_cpu(*p++); old_drmem = (struct of_drconf_cell *)p; - p = (u32 *)pr->prop->value; + p = (__be32 *)pr->prop->value; p++; new_drmem = (struct of_drconf_cell *)p; for (i = 0; i < entries; i++) { - if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) && - (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock(old_drmem[i].base_addr, + if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && + (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { + rc = pseries_remove_memblock( + be64_to_cpu(old_drmem[i].base_addr), memblock_size); break; - } else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) && - (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(old_drmem[i].base_addr, + } else if ((!(be32_to_cpu(old_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) && + (be32_to_cpu(new_drmem[i].flags) & + DRCONF_MEM_ASSIGNED)) { + rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), memblock_size); rc = (rc < 0) ? -EINVAL : 0; break; } } - return rc; } diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 99ecf0a5a929..3fda3f17b84e 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -12,9 +12,13 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> +#include <asm/jump_label.h> + + .section ".text" #ifdef CONFIG_TRACEPOINTS +#ifndef CONFIG_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -22,21 +26,13 @@ hcall_tracepoint_refcount: .llong 0 .section ".text" +#endif /* * precall must preserve all registers. use unused STK_PARAM() - * areas to save snapshots and opcode. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * areas to save snapshots and opcode. */ #define HCALL_INST_PRECALL(FIRST_REG) \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,hcall_tracepoint_refcount@toc(r2); \ - std r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ mflr r0; \ std r3,STK_PARAM(R3)(r1); \ std r4,STK_PARAM(R4)(r1); \ @@ -50,45 +46,29 @@ END_FTR_SECTION(0, 1); \ addi r4,r1,STK_PARAM(FIRST_REG); \ stdu r1,-STACK_FRAME_OVERHEAD(r1); \ bl __trace_hcall_entry; \ - addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ - ld r3,STK_PARAM(R3)(r1); \ - ld r4,STK_PARAM(R4)(r1); \ - ld r5,STK_PARAM(R5)(r1); \ - ld r6,STK_PARAM(R6)(r1); \ - ld r7,STK_PARAM(R7)(r1); \ - ld r8,STK_PARAM(R8)(r1); \ - ld r9,STK_PARAM(R9)(r1); \ - ld r10,STK_PARAM(R10)(r1); \ - mtlr r0; \ -1: + ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1) /* * postcall is performed immediately before function return which - * allows liberal use of volatile registers. We branch around this - * in early init (eg when populating the MMU hashtable) by using an - * unconditional cpu feature. + * allows liberal use of volatile registers. */ #define __HCALL_INST_POSTCALL \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r12,32(r1); \ - cmpdi r12,0; \ - beq+ 1f; \ - mflr r0; \ - ld r6,STK_PARAM(R3)(r1); \ - std r3,STK_PARAM(R3)(r1); \ + ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \ mr r4,r3; \ - mr r3,r6; \ - std r0,16(r1); \ - stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + mr r3,r0; \ bl __trace_hcall_exit; \ + ld r0,STACK_FRAME_OVERHEAD+16(r1); \ addi r1,r1,STACK_FRAME_OVERHEAD; \ - ld r0,16(r1); \ ld r3,STK_PARAM(R3)(r1); \ - mtlr r0; \ -1: + mtlr r0 #define HCALL_INST_POSTCALL_NORETS \ li r5,0; \ @@ -98,37 +78,62 @@ END_FTR_SECTION(0, 1); \ mr r5,BUFREG; \ __HCALL_INST_POSTCALL +#ifdef CONFIG_JUMP_LABEL +#define HCALL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) +#else + +/* + * We branch around this in early init (eg when populating the MMU + * hashtable) by using an unconditional cpu feature. + */ +#define HCALL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + std r12,32(r1); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: +#endif + #else #define HCALL_INST_PRECALL(FIRST_ARG) #define HCALL_INST_POSTCALL_NORETS #define HCALL_INST_POSTCALL(BUFREG) +#define HCALL_BRANCH(LABEL) #endif - .text - _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM mfcr r0 stw r0,8(r1) - - HCALL_INST_PRECALL(R4) - + HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL_NORETS - lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_norets_trace: + HCALL_INST_PRECALL(R4) + HVSC + HCALL_INST_POSTCALL_NORETS + lwz r0,8(r1) + mtcrf 0xff,r0 + blr +#endif + _GLOBAL_TOC(plpar_hcall) HMT_MEDIUM mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -147,12 +152,40 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC + + ld r12,STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* * plpar_hcall_raw can be called in real mode. kexec/kdump need some @@ -194,7 +227,7 @@ _GLOBAL_TOC(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL(R5) + HCALL_BRANCH(plpar_hcall9_trace) std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ @@ -222,12 +255,49 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall9_trace: + HCALL_INST_PRECALL(R5) + + std r4,STK_PARAM(R4)(r1) + mr r0,r4 + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1) + + HVSC + + mr r0,r12 + ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + std r8,32(r12) + std r9,40(r12) + std r10,48(r12) + std r11,56(r12) + std r0,64(r12) + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 - blr /* return r3 = status */ + blr +#endif /* See plpar_hcall_raw to see why this is needed */ _GLOBAL(plpar_hcall9_raw) diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index cf4e7736e4f1..4575f0c9e521 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -27,6 +27,7 @@ #include <asm/firmware.h> #include <asm/cputable.h> #include <asm/trace.h> +#include <asm/machdep.h> DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -162,4 +163,4 @@ static int __init hcall_inst_init(void) return 0; } -__initcall(hcall_inst_init); +machine_device_initcall(pseries, hcall_inst_init); diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 4557e91626c4..eedb64594dc5 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, return retval; } - last_p_partition_ID = pi_buff[0]; - last_p_unit_address = pi_buff[1]; + last_p_partition_ID = be64_to_cpu(pi_buff[0]); + last_p_unit_address = be64_to_cpu(pi_buff[1]); /* This indicates that there are no further partners */ if (last_p_partition_ID == ~0UL diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33b552ffbe57..de1ec54a2a57 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -329,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - __be32 windows_available; - __be32 largest_available_block; - __be32 page_size; - __be32 migration_capable; + u32 windows_available; + u32 largest_available_block; + u32 page_size; + u32 migration_capable; }; struct ddw_create_response { - __be32 liobn; - __be32 addr_hi; - __be32 addr_lo; + u32 liobn; + u32 addr_hi; + u32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -721,20 +721,22 @@ static int __init disable_ddw_setup(char *str) early_param("disable_ddw", disable_ddw_setup); -static void remove_ddw(struct device_node *np) +static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; - const u32 *ddw_avail; + u32 ddw_avail[3]; u64 liobn; - int len, ret; + int ret = 0; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], 3); - ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); if (!win64) return; - if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp)) + if (ret || win64->length < sizeof(*dwp)) goto delprop; dwp = win64->value; @@ -761,7 +763,8 @@ static void remove_ddw(struct device_node *np) np->full_name, ret, ddw_avail[2], liobn); delprop: - ret = of_remove_property(np, win64); + if (remove_prop) + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -805,7 +808,7 @@ static int find_existing_ddw_windows(void) window = kzalloc(sizeof(*window), GFP_KERNEL); if (!window || len < sizeof(struct dynamic_dma_window_prop)) { kfree(window); - remove_ddw(pdn); + remove_ddw(pdn, true); continue; } @@ -871,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, do { /* extra outputs are LIOBN and dma-addr (hi, lo) */ - ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr, - BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); + ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, + cfg_addr, BUID_HI(buid), BUID_LO(buid), + page_shift, window_shift); } while (rtas_busy_delay(ret)); dev_info(&dev->dev, "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " @@ -909,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) int page_shift; u64 dma_addr, max_addr; struct device_node *dn; - const u32 *uninitialized_var(ddw_avail); + u32 ddw_avail[3]; struct direct_window *window; struct property *win64; struct dynamic_dma_window_prop *ddwprop; @@ -941,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for the given node in that order. * the property is actually in the parent, not the PE */ - ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); - if (!ddw_avail || len < 3 * sizeof(u32)) + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], 3); + if (ret) goto out_failed; /* @@ -965,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_failed; } - if (be32_to_cpu(query.page_size) & 4) { + if (query.page_size & 4) { page_shift = 24; /* 16MB */ - } else if (be32_to_cpu(query.page_size) & 2) { + } else if (query.page_size & 2) { page_shift = 16; /* 64kB */ - } else if (be32_to_cpu(query.page_size) & 1) { + } else if (query.page_size & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -979,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { + if (query.largest_available_block < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1005,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = create.liobn; - ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); + ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | + create.addr_lo); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1038,14 +1044,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = of_read_number(&create.addr_hi, 2); + dma_addr = be64_to_cpu(ddwprop->dma_base); goto out_unlock; out_free_window: kfree(window); out_clear_window: - remove_ddw(pdn); + remove_ddw(pdn, true); out_free_prop: kfree(win64->name); @@ -1255,7 +1261,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti switch (action) { case OF_RECONFIG_DETACH_NODE: - remove_ddw(np); + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + remove_ddw(np, false); if (pci && pci->iommu_table) iommu_free_table(pci->iommu_table, np->full_name); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b02af9ef3ff6..8c509d5397c6 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/console.h> #include <linux/export.h> +#include <linux/static_key.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> @@ -58,8 +59,6 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); -extern void pSeries_find_serial_port(void); - void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); @@ -430,16 +429,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } -static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, - unsigned char *hpte_slot_array, - unsigned long addr, int psize) +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize) { - int ssize = 0, i, index = 0; + int i, index = 0; unsigned long s_addr = addr; unsigned int max_hpte_count, valid; unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; - unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + unsigned long shift, hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -452,15 +452,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -649,6 +640,19 @@ EXPORT_SYMBOL(arch_free_page); #endif #ifdef CONFIG_TRACEPOINTS +#ifdef HAVE_JUMP_LABEL +struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; + +void hcall_tracepoint_regfunc(void) +{ + static_key_slow_inc(&hcall_tracepoint_key); +} + +void hcall_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&hcall_tracepoint_key); +} +#else /* * We optimise our hcall path by placing hcall_tracepoint_refcount * directly in the TOC so we can check if the hcall tracepoints are @@ -658,13 +662,6 @@ EXPORT_SYMBOL(arch_free_page); /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ extern long hcall_tracepoint_refcount; -/* - * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. - */ -static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); - void hcall_tracepoint_regfunc(void) { hcall_tracepoint_refcount++; @@ -674,6 +671,15 @@ void hcall_tracepoint_unregfunc(void) { hcall_tracepoint_refcount--; } +#endif + +/* + * Since the tracing code might execute hcalls we need to guard against + * recursion. One example of this are spinlocks calling H_YIELD on + * shared processor partitions. + */ +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); + void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index bde7ebad3949..e7cb6d4a871a 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/slab.h> +#include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" @@ -319,7 +320,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr, u64 streamid; int rc; - rc = strict_strtoull(buf, 0, &streamid); + rc = kstrtou64(buf, 0, &streamid); if (rc) return rc; @@ -362,4 +363,4 @@ static int __init mobility_sysfs_init(void) return rc; } -device_initcall(mobility_sysfs_init); +machine_device_initcall(pseries, mobility_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 0c882e83c4ce..18ff4626d74e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -16,6 +16,7 @@ #include <asm/rtas.h> #include <asm/hw_irq.h> #include <asm/ppc-pci.h> +#include <asm/machdep.h> static int query_token, change_token; @@ -532,5 +533,4 @@ static int rtas_msi_init(void) return 0; } -arch_initcall(rtas_msi_init); - +machine_arch_initcall(pseries, rtas_msi_init); diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 0cc240b7f694..11a3b617ef5d 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -276,8 +276,10 @@ static ssize_t pSeries_nvram_get_size(void) * sequence #: The unique sequence # for each event. (until it wraps) * error log: The error log from event_scan */ -int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, - int length, unsigned int err_type, unsigned int error_log_cnt) +static int nvram_write_os_partition(struct nvram_os_partition *part, + char *buff, int length, + unsigned int err_type, + unsigned int error_log_cnt) { int rc; loff_t tmp_index; @@ -330,9 +332,9 @@ int nvram_write_error_log(char * buff, int length, * * Reads nvram partition for at most 'length' */ -int nvram_read_partition(struct nvram_os_partition *part, char *buff, - int length, unsigned int *err_type, - unsigned int *error_log_cnt) +static int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index c413ec158ff5..67e48594040c 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/prom.h> #include <asm/ppc-pci.h> +#include "pseries.h" #if 0 void pcibios_name_device(struct pci_dev *dev) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 203cbf0dc101..89e23811199c 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -118,10 +118,10 @@ int remove_phb_dynamic(struct pci_controller *phb) } } - /* Unregister the bridge device from sysfs and remove the PCI bus */ - device_unregister(b->bridge); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); + device_unregister(b->bridge); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index 6d6266236446..c26eadde434c 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -25,6 +25,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/init.h> +#include <asm/machdep.h> unsigned long rtas_poweron_auto; /* default and normal state is 0 */ @@ -71,11 +72,11 @@ static int __init pm_init(void) return -ENOMEM; return sysfs_create_group(power_kobj, &attr_group); } -core_initcall(pm_init); +machine_core_initcall(pseries, pm_init); #else static int __init apo_pm_init(void) { return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr)); } -__initcall(apo_pm_init); +machine_device_initcall(pseries, apo_pm_init); #endif diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9c5778e6ed4b..5a4d0fc03b03 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -71,7 +71,7 @@ static int __init init_ras_IRQ(void) return 0; } -subsys_initcall(init_ras_IRQ); +machine_subsys_initcall(pseries, init_ras_IRQ); #define EPOW_SHUTDOWN_NORMAL 1 #define EPOW_SHUTDOWN_ON_UPS 2 @@ -126,7 +126,7 @@ struct epow_errorlog { #define EPOW_MAIN_ENCLOSURE 5 #define EPOW_POWER_OFF 7 -void rtas_parse_epow_errlog(struct rtas_error_log *log) +static void rtas_parse_epow_errlog(struct rtas_error_log *log) { struct pseries_errorlog *pseries_log; struct epow_errorlog *epow_log; diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 0435bb65d0aa..0f319521e002 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -69,6 +69,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist np->properties = proplist; of_node_set_flag(np, OF_DYNAMIC); + of_node_init(np); np->parent = derive_parent(path); if (IS_ERR(np->parent)) { @@ -445,13 +446,10 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - if (!machine_is(pseries)) - return 0; - ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); if (ent) proc_set_size(ent, 0); return 0; } -__initcall(proc_ppc64_create_ofdt); +machine_device_initcall(pseries, proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index 72a102758d4e..e09608770909 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -42,4 +42,4 @@ static __init int rng_init(void) return 0; } -subsys_initcall(rng_init); +machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f2f40e64658f..125c589eeef5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -232,8 +232,7 @@ static void __init pseries_discover_pic(void) struct device_node *np; const char *typep; - for (np = NULL; (np = of_find_node_by_name(np, - "interrupt-controller"));) { + for_each_node_by_name(np, "interrupt-controller") { typep = of_get_property(np, "compatible", NULL); if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); @@ -351,7 +350,7 @@ static int alloc_dispatch_log_kmem_cache(void) return alloc_dispatch_logs(); } -early_initcall(alloc_dispatch_log_kmem_cache); +machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); static void pseries_lpar_idle(void) { @@ -562,7 +561,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -void pSeries_cmo_feature_init(void) +static void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index b87b97849d4c..e76aefae2aa2 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -265,7 +265,7 @@ static int __init pseries_suspend_init(void) { int rc; - if (!machine_is(pseries) || !firmware_has_feature(FW_FEATURE_LPAR)) + if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; suspend_data.token = rtas_token("ibm,suspend-me"); @@ -280,5 +280,4 @@ static int __init pseries_suspend_init(void) suspend_set_ops(&pseries_suspend_ops); return 0; } - -__initcall(pseries_suspend_init); +machine_device_initcall(pseries, pseries_suspend_init); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 47b6b9f81d43..ad56edc39919 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -314,7 +314,7 @@ axon_ram_remove(struct platform_device *device) return 0; } -static struct of_device_id axon_ram_device_id[] = { +static const struct of_device_id axon_ram_device_id[] = { { .type = "dma-memory" }, diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 62c47bb76517..9e5353ff6d1b 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -476,6 +476,11 @@ void __init alloc_dart_table(void) */ dart_tablebase = (unsigned long) __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + /* + * The DART space is later unmapped from the kernel linear mapping and + * accessing dart_tablebase during kmemleak scanning will fault. + */ + kmemleak_no_scan((void *)dart_tablebase); printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); } diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index e9056e438575..2d8a101b6b9e 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -230,5 +230,6 @@ EXPORT_SYMBOL_GPL(dcr_unmap_mmio); #ifdef CONFIG_PPC_DCR_NATIVE DEFINE_SPINLOCK(dcr_ind_lock); +EXPORT_SYMBOL_GPL(dcr_ind_lock); #endif /* defined(CONFIG_PPC_DCR_NATIVE) */ diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index afc2dbf37011..90545ad1626e 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -171,7 +171,7 @@ static int mpc85xx_l2ctlr_of_remove(struct platform_device *dev) return 0; } -static struct of_device_id mpc85xx_l2ctlr_of_match[] = { +static const struct of_device_id mpc85xx_l2ctlr_of_match[] = { { .compatible = "fsl,p2020-l2-cache-controller", }, diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index be33c9768ea1..89cec0ed6a58 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -960,7 +960,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vecpri); } -void mpic_set_destination(unsigned int virq, unsigned int cpuid) +static void mpic_set_destination(unsigned int virq, unsigned int cpuid) { struct mpic *mpic = mpic_from_irq(virq); unsigned int src = virq_to_hw(virq); diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 2ff630267e9e..a7c7a9fc7530 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -143,7 +143,7 @@ void msi_bitmap_free(struct msi_bitmap *bmp) #define check(x) \ if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__); -void __init test_basics(void) +static void __init test_basics(void) { struct msi_bitmap bmp; int i, size = 512; @@ -188,7 +188,7 @@ void __init test_basics(void) kfree(bmp.bitmap); } -void __init test_of_node(void) +static void __init test_of_node(void) { u32 prop_data[] = { 10, 10, 25, 3, 40, 1, 100, 100, 200, 20 }; const char *expected_str = "0-9,20-24,28-39,41-99,220-255"; @@ -236,7 +236,7 @@ void __init test_of_node(void) kfree(bmp.bitmap); } -int __init msi_bitmap_selftest(void) +static int __init msi_bitmap_selftest(void) { printk(KERN_DEBUG "Running MSI bitmap self-tests ...\n"); diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c index c2dba7db71ad..026bbc3b2c47 100644 --- a/arch/powerpc/sysdev/mv64x60_dev.c +++ b/arch/powerpc/sysdev/mv64x60_dev.c @@ -23,7 +23,7 @@ /* These functions provide the necessary setup for the mv64x60 drivers. */ -static struct of_device_id __initdata of_mv64x60_devices[] = { +static const struct of_device_id of_mv64x60_devices[] __initconst = { { .compatible = "marvell,mv64306-devctrl", }, {} }; diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 5aaf86c03893..13e67d93a7c1 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -101,7 +101,7 @@ out: } -static struct of_device_id pmi_match[] = { +static const struct of_device_id pmi_match[] = { { .type = "ibm,pmi", .name = "ibm,pmi" }, { .type = "ibm,pmi" }, {}, diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index de8d9483bbe8..2fc4cf1b7557 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -155,6 +155,31 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) icp_native_set_qirr(cpu, IPI_PRIORITY); } +/* + * Called when an interrupt is received on an off-line CPU to + * clear the interrupt, so that the CPU can go back to nap mode. + */ +void icp_native_flush_interrupt(void) +{ + unsigned int xirr = icp_native_get_xirr(); + unsigned int vec = xirr & 0x00ffffff; + + if (vec == XICS_IRQ_SPURIOUS) + return; + if (vec == XICS_IPI) { + /* Clear pending IPI */ + int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); + icp_native_set_qirr(cpu, 0xff); + } else { + pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n", + vec); + xics_mask_unknown_vec(vec); + } + /* EOI the interrupt */ + icp_native_set_xirr(xirr); +} + void xics_wake_cpu(int cpu) { icp_native_set_qirr(cpu, IPI_PRIORITY); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 83f943a8e0db..56f0524e47a6 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -265,7 +265,7 @@ static void __init xilinx_i8259_setup_cascade(void) static inline void xilinx_i8259_setup_cascade(void) { return; } #endif /* defined(CONFIG_PPC_I8259) */ -static struct of_device_id xilinx_intc_match[] __initconst = { +static const struct of_device_id xilinx_intc_match[] __initconst = { { .compatible = "xlnx,opb-intc-1.00.c", }, { .compatible = "xlnx,xps-intc-1.00.a", }, {} diff --git a/arch/powerpc/sysdev/xilinx_pci.c b/arch/powerpc/sysdev/xilinx_pci.c index 1453b0eed220..fea5667699ed 100644 --- a/arch/powerpc/sysdev/xilinx_pci.c +++ b/arch/powerpc/sysdev/xilinx_pci.c @@ -27,7 +27,7 @@ #define PCI_HOST_ENABLE_CMD PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY -static struct of_device_id xilinx_pci_match[] = { +static const struct of_device_id xilinx_pci_match[] = { { .compatible = "xlnx,plbv46-pci-1.03.a", }, {} }; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d199bfa2f1fa..b988b5addf86 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/bug.h> +#include <linux/nmi.h> #include <asm/ptrace.h> #include <asm/string.h> @@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #endif local_irq_save(flags); + hard_irq_disable(); bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { @@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #endif insert_cpu_bpts(); + touch_nmi_watchdog(); local_irq_restore(flags); return cmd != 'X' && cmd != EOF; @@ -2058,10 +2061,6 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); -#ifdef CONFIG_PPC_STD_MMU_64 - DUMP(p, stab_real, "lx"); - DUMP(p, stab_addr, "lx"); -#endif DUMP(p, emergency_sp, "p"); #ifdef CONFIG_PPC_BOOK3S_64 DUMP(p, mc_emergency_sp, "p"); @@ -2694,7 +2693,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, } #ifdef CONFIG_PPC_BOOK3S_64 -static void dump_slb(void) +void dump_segments(void) { int i; unsigned long esid,vsid,valid; @@ -2726,34 +2725,6 @@ static void dump_slb(void) } } } - -static void dump_stab(void) -{ - int i; - unsigned long *tmp = (unsigned long *)local_paca->stab_addr; - - printf("Segment table contents of cpu 0x%x\n", smp_processor_id()); - - for (i = 0; i < PAGE_SIZE/16; i++) { - unsigned long a, b; - - a = *tmp++; - b = *tmp++; - - if (a || b) { - printf("%03d %016lx ", i, a); - printf("%016lx\n", b); - } - } -} - -void dump_segments(void) -{ - if (mmu_has_feature(MMU_FTR_SLB)) - dump_slb(); - else - dump_stab(); -} #endif #ifdef CONFIG_PPC_STD_MMU_32 |