summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/boot/rs6000.h2
-rw-r--r--arch/powerpc/boot/treeboot-akebono.c2
-rw-r--r--arch/powerpc/boot/treeboot-currituck.c2
-rw-r--r--arch/powerpc/boot/treeboot-iss4xx.c2
-rw-r--r--arch/powerpc/configs/powernv_defconfig313
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S4
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c2
-rw-r--r--arch/powerpc/include/asm/atomic.h159
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h (renamed from arch/powerpc/include/asm/mmu-hash32.h)0
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h36
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h57
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h68
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h (renamed from arch/powerpc/include/asm/mmu-hash64.h)4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h94
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h237
-rw-r--r--arch/powerpc/include/asm/cputable.h20
-rw-r--r--arch/powerpc/include/asm/eeh.h6
-rw-r--r--arch/powerpc/include/asm/hugetlb.h2
-rw-r--r--arch/powerpc/include/asm/hvcall.h1
-rw-r--r--arch/powerpc/include/asm/hydra.h2
-rw-r--r--arch/powerpc/include/asm/io.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/mmu.h5
-rw-r--r--arch/powerpc/include/asm/module.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h3
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/page.h111
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h9
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h10
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h42
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h103
-rw-r--r--arch/powerpc/include/asm/pmac_feature.h2
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/reg.h16
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/asm/smu.h2
-rw-r--r--arch/powerpc/include/asm/switch_to.h13
-rw-r--r--arch/powerpc/include/asm/tlbflush.h92
-rw-r--r--arch/powerpc/include/asm/trace.h8
-rw-r--r--arch/powerpc/include/asm/uninorth.h2
-rw-r--r--arch/powerpc/include/asm/xics.h2
-rw-r--r--arch/powerpc/include/uapi/asm/epapr_hcalls.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S49
-rw-r--r--arch/powerpc/kernel/cputable.c27
-rw-r--r--arch/powerpc/kernel/eeh.c39
-rw-r--r--arch/powerpc/kernel/eeh_cache.c11
-rw-r--r--arch/powerpc/kernel/eeh_dev.c1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c152
-rw-r--r--arch/powerpc/kernel/eeh_pe.c38
-rw-r--r--arch/powerpc/kernel/entry_64.S21
-rw-r--r--arch/powerpc/kernel/fpu.S25
-rw-r--r--arch/powerpc/kernel/head_44x.S2
-rw-r--r--arch/powerpc/kernel/idle_power7.S2
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/mce_power.c17
-rw-r--r--arch/powerpc/kernel/module_64.c2
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c2
-rw-r--r--arch/powerpc/kernel/pci_dn.c19
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kernel/process.c168
-rw-r--r--arch/powerpc/kernel/signal.c4
-rw-r--r--arch/powerpc/kernel/signal.h2
-rw-r--r--arch/powerpc/kernel/traps.c12
-rw-r--r--arch/powerpc/kernel/vector.S45
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/e500mc.c2
-rw-r--r--arch/powerpc/mm/hash64_4k.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c87
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/init_64.c68
-rw-r--r--arch/powerpc/mm/mem.c10
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/pgtable_64.c43
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S2
-rw-r--r--arch/powerpc/mm/tlb_nohash_low.S4
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c4
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/perf/hv-24x7.c225
-rw-r--r--arch/powerpc/perf/hv-24x7.h3
-rw-r--r--arch/powerpc/perf/hv-gpci.c43
-rw-r--r--arch/powerpc/perf/power7-pmu.c18
-rw-r--r--arch/powerpc/perf/power8-events-list.h51
-rw-r--r--arch/powerpc/perf/power8-pmu.c112
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c2
-rw-r--r--arch/powerpc/platforms/powermac/cache.S2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c6
-rw-r--r--arch/powerpc/platforms/powernv/Makefile2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c303
-rw-r--r--arch/powerpc/platforms/powernv/idle.c6
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c34
-rw-r--r--arch/powerpc/platforms/powernv/opal.c7
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c348
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c271
-rw-r--r--arch/powerpc/platforms/powernv/pci.c43
-rw-r--r--arch/powerpc/platforms/powernv/pci.h153
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c72
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c2
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c9
-rw-r--r--arch/powerpc/platforms/pseries/setup.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c2
-rw-r--r--arch/powerpc/sysdev/fsl_rmu.c2
-rw-r--r--arch/powerpc/sysdev/i8259.c2
-rw-r--r--arch/powerpc/sysdev/mpic.c4
-rw-r--r--arch/powerpc/xmon/xmon.c120
123 files changed, 2762 insertions, 1522 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 91da283cd658..5130fa166a93 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -304,7 +304,7 @@ config ZONE_DMA32
config PGTABLE_LEVELS
int
default 2 if !PPC64
- default 3 if PPC_64K_PAGES
+ default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
default 4
source "init/Kconfig"
@@ -576,7 +576,7 @@ choice
config PPC_4K_PAGES
bool "4k page size"
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_16K_PAGES
bool "16k page size"
@@ -585,7 +585,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
- select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
config PPC_256K_PAGES
bool "256k page size"
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
index 433f45084e41..d70517ccc0f7 100644
--- a/arch/powerpc/boot/rs6000.h
+++ b/arch/powerpc/boot/rs6000.h
@@ -239,5 +239,5 @@ struct external_reloc {
#define DEFAULT_DATA_SECTION_ALIGNMENT 4
#define DEFAULT_BSS_SECTION_ALIGNMENT 4
#define DEFAULT_TEXT_SECTION_ALIGNMENT 4
-/* For new sections we havn't heard of before */
+/* For new sections we haven't heard of before */
#define DEFAULT_SECTION_ALIGNMENT 4
diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c
index b73174c34fe4..bcc5902f8462 100644
--- a/arch/powerpc/boot/treeboot-akebono.c
+++ b/arch/powerpc/boot/treeboot-akebono.c
@@ -38,7 +38,7 @@
BSS_STACK(4096);
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
#define USERDATA_LEN 256 /* Length of userdata passed in by PIBS */
#define MAX_RANKS 0x4
#define DDR3_MR0CF 0x80010011U
diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c
index 925ae43b7467..303d2074ee56 100644
--- a/arch/powerpc/boot/treeboot-currituck.c
+++ b/arch/powerpc/boot/treeboot-currituck.c
@@ -80,7 +80,7 @@ static void ibm_currituck_fixups(void)
}
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram, avail_ram;
diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c
index 329e710feda2..733f8bf25184 100644
--- a/arch/powerpc/boot/treeboot-iss4xx.c
+++ b/arch/powerpc/boot/treeboot-iss4xx.c
@@ -59,7 +59,7 @@ static void *iss_4xx_vmlinux_alloc(unsigned long size)
return (void *)ibm4xx_memstart;
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram = 0x08000000;
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
new file mode 100644
index 000000000000..045031048f8d
--- /dev/null
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -0,0 +1,313 @@
+CONFIG_PPC64=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OPAL_PRD=y
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PPC_64K_PAGES=y
+CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_SCHED_SMT=y
+CONFIG_PM=y
+CONFIG_PCI_MSI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_ADVANCED is not set
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_POWERNV_FLASH=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_JSM=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=1024
+CONFIG_DRM=y
+CONFIG_DRM_AST=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_CXGB3=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=m
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
index 5dc6bce90a77..bc6ff43a9889 100644
--- a/arch/powerpc/crypto/aes-spe-core.S
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -61,7 +61,7 @@
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
@@ -209,7 +209,7 @@ ppc_encrypt_block_loop:
* via bl/blr. It expects that caller has pre-xored input data with first
* 4 words of encryption key into rD0-rD3. Pointer/counter registers must
* have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
* All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
*
*/
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 93ee046d12cd..6d99ebf2ea15 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -32,7 +32,7 @@
* 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
* will need an estimated maximum of 20,000 cycles. Headroom for cache misses
* included. Even with the low end model clocked at 667 MHz this equals to a
- * critical time window of less than 30us. The value has been choosen to
+ * critical time window of less than 30us. The value has been chosen to
* process a 512 byte disk block in one or a large 1400 bytes IPsec network
* packet in two runs.
*
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 55f106ed12bf..ae0751ef8788 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -12,6 +12,24 @@
#define ATOMIC_INIT(i) { (i) }
+/*
+ * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
+ * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
+ * on the platform without lwsync.
+ */
+#define __atomic_op_acquire(op, args...) \
+({ \
+ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \
+ __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory"); \
+ __ret; \
+})
+
+#define __atomic_op_release(op, args...) \
+({ \
+ __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory"); \
+ op##_relaxed(args); \
+})
+
static __inline__ int atomic_read(const atomic_t *v)
{
int t;
@@ -42,27 +60,27 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \
: "cc"); \
} \
-#define ATOMIC_OP_RETURN(op, asm_op) \
-static __inline__ int atomic_##op##_return(int a, atomic_t *v) \
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
__asm__ __volatile__( \
- PPC_ATOMIC_ENTRY_BARRIER \
-"1: lwarx %0,0,%2 # atomic_" #op "_return\n" \
- #asm_op " %0,%1,%0\n" \
- PPC405_ERR77(0,%2) \
-" stwcx. %0,0,%2 \n" \
+"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
+ #asm_op " %0,%2,%0\n" \
+ PPC405_ERR77(0, %3) \
+" stwcx. %0,0,%3\n" \
" bne- 1b\n" \
- PPC_ATOMIC_EXIT_BARRIER \
- : "=&r" (t) \
+ : "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc", "memory"); \
+ : "cc"); \
\
return t; \
}
-#define ATOMIC_OPS(op, asm_op) ATOMIC_OP(op, asm_op) ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
@@ -71,8 +89,11 @@ ATOMIC_OP(and, and)
ATOMIC_OP(or, or)
ATOMIC_OP(xor, xor)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+
#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
@@ -92,21 +113,19 @@ static __inline__ void atomic_inc(atomic_t *v)
: "cc", "xer");
}
-static __inline__ int atomic_inc_return(atomic_t *v)
+static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
{
int t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n"
+" addic %0,%0,1\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
@@ -136,27 +155,34 @@ static __inline__ void atomic_dec(atomic_t *v)
: "cc", "xer");
}
-static __inline__ int atomic_dec_return(atomic_t *v)
+static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
{
int t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n"
+" addic %0,%0,-1\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
+#define atomic_inc_return_relaxed atomic_inc_return_relaxed
+#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+
#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_cmpxchg_relaxed(v, o, n) \
+ cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic_cmpxchg_acquire(v, o, n) \
+ cmpxchg_acquire(&((v)->counter), (o), (n))
+
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
/**
* __atomic_add_unless - add unless the number is a given value
@@ -285,26 +311,27 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
: "cc"); \
}
-#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return(long a, atomic64_t *v) \
+#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+static inline long \
+atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
{ \
long t; \
\
__asm__ __volatile__( \
- PPC_ATOMIC_ENTRY_BARRIER \
-"1: ldarx %0,0,%2 # atomic64_" #op "_return\n" \
- #asm_op " %0,%1,%0\n" \
-" stdcx. %0,0,%2 \n" \
+"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
+ #asm_op " %0,%2,%0\n" \
+" stdcx. %0,0,%3\n" \
" bne- 1b\n" \
- PPC_ATOMIC_EXIT_BARRIER \
- : "=&r" (t) \
+ : "=&r" (t), "+m" (v->counter) \
: "r" (a), "r" (&v->counter) \
- : "cc", "memory"); \
+ : "cc"); \
\
return t; \
}
-#define ATOMIC64_OPS(op, asm_op) ATOMIC64_OP(op, asm_op) ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
@@ -312,8 +339,11 @@ ATOMIC64_OP(and, and)
ATOMIC64_OP(or, or)
ATOMIC64_OP(xor, xor)
-#undef ATOMIC64_OPS
-#undef ATOMIC64_OP_RETURN
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+
+#undef ATOPIC64_OPS
+#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
@@ -332,20 +362,18 @@ static __inline__ void atomic64_inc(atomic64_t *v)
: "cc", "xer");
}
-static __inline__ long atomic64_inc_return(atomic64_t *v)
+static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
{
long t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_inc_return\n\
- addic %0,%0,1\n\
- stdcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
+" addic %0,%0,1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
@@ -374,24 +402,25 @@ static __inline__ void atomic64_dec(atomic64_t *v)
: "cc", "xer");
}
-static __inline__ long atomic64_dec_return(atomic64_t *v)
+static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
{
long t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_dec_return\n\
- addic %0,%0,-1\n\
- stdcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
+" addic %0,%0,-1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
+#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
+#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
@@ -420,7 +449,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
}
#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define atomic64_cmpxchg_relaxed(v, o, n) \
+ cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define atomic64_cmpxchg_acquire(v, o, n) \
+ cmpxchg_acquire(&((v)->counter), (o), (n))
+
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
/**
* atomic64_add_unless - add unless the number is a given value
diff --git a/arch/powerpc/include/asm/mmu-hash32.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 16f513e5cbd7..16f513e5cbd7 100644
--- a/arch/powerpc/include/asm/mmu-hash32.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index ea0414d6659e..5f08a0832238 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -52,44 +52,14 @@
_PAGE_F_SECOND | _PAGE_F_GIX)
/* shift to put page number into pte */
-#define PTE_RPN_SHIFT (18)
+#define PTE_RPN_SHIFT (12)
+#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */
#define _PAGE_4K_PFN 0
#ifndef __ASSEMBLY__
/*
- * 4-level page tables related bits
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
*/
-
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) (pgd_val(pgd) == 0)
-#define pgd_present(pgd) (pgd_val(pgd) != 0)
-#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
-
-static inline void pgd_clear(pgd_t *pgdp)
-{
- *pgdp = __pgd(0);
-}
-
-static inline pte_t pgd_pte(pgd_t pgd)
-{
- return __pte(pgd_val(pgd));
-}
-
-static inline pgd_t pte_pgd(pte_t pte)
-{
- return __pgd(pte_val(pte));
-}
-extern struct page *pgd_page(pgd_t pgd);
-
-#define pud_offset(pgdp, addr) \
- (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
- (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
-
-#define pud_ERROR(e) \
- pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
-
-/*
- * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
#define remap_4k_pfn(vma, addr, pfn, prot) \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 849bbec80f7b..0a7956a80a08 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -1,15 +1,14 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#include <asm-generic/pgtable-nopud.h>
-
#define PTE_INDEX_SIZE 8
-#define PMD_INDEX_SIZE 10
-#define PUD_INDEX_SIZE 0
+#define PMD_INDEX_SIZE 5
+#define PUD_INDEX_SIZE 5
#define PGD_INDEX_SIZE 12
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
/* With 4k base page size, hugepage PTEs go at the PMD level */
@@ -20,13 +19,18 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */
+#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */
/*
* Used to track subpage group valid if _PAGE_COMBO is set
* This overloads _PAGE_F_GIX and _PAGE_F_SECOND
@@ -39,10 +43,12 @@
/* Shift to put page number into pte.
*
- * That gives us a max RPN of 34 bits, which means a max of 50 bits
- * of addressable physical space, or 46 bits for the special 4k PFNs.
+ * That gives us a max RPN of 41 bits, which means a max of 57 bits
+ * of addressable physical space, or 53 bits for the special 4k PFNs.
*/
-#define PTE_RPN_SHIFT (30)
+#define PTE_RPN_SHIFT (16)
+#define PTE_RPN_SIZE (41)
+
/*
* we support 16 fragments per PTE page of 64K size.
*/
@@ -54,13 +60,12 @@
#define PTE_FRAG_SIZE_SHIFT 12
#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
-/*
- * Bits to mask out from a PMD to get to the PTE page
- * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
- */
-#define PMD_MASKED_BITS (PTE_FRAG_SIZE - 1)
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS 0x1ff
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0xc0000000000000ffUL
#ifndef __ASSEMBLY__
@@ -120,7 +125,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
#define remap_4k_pfn(vma, addr, pfn, prot) \
- (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \
+ (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \
__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
@@ -130,11 +135,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
#else
#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd })))
-#define pte_pgd(pte) ((pgd_t)pte_pud(pte))
-
#ifdef CONFIG_HUGETLB_PAGE
/*
* We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
@@ -208,30 +211,30 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
/*
* The linux hugepage PMD now include the pmd entries followed by the address
* to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
* each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
* with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
*
- * The last three bits are intentionally left to zero. This memory location
+ * The top three bits are intentionally left as zero. This memory location
* are also used as normal page PTE pointers. So if we have any pointers
* left around while we collapse a hugepage, we need to make sure
* _PAGE_PRESENT bit of that is zero when we look at them
*/
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
- return (hpte_slot_array[index] >> 3) & 0x1;
+ return hpte_slot_array[index] & 0x1;
}
static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
int index)
{
- return hpte_slot_array[index] >> 4;
+ return hpte_slot_array[index] >> 1;
}
static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
unsigned int index, unsigned int hidx)
{
- hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+ hpte_slot_array[index] = (hidx << 1) | 0x1;
}
/*
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 8d1c8162f0c1..d0ee6fcef823 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -4,8 +4,7 @@
/*
* Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
+ * Additional bits may be defined in pgtable-hash64-*.h
*
* Note: We only support user read/write permissions. Supervisor always
* have full read/write to pages above PAGE_OFFSET (pages below that
@@ -14,32 +13,35 @@
* We could create separate kernel read-only if we used the 3 PP bits
* combinations that newer processors provide but we currently don't.
*/
-#define _PAGE_PTE 0x00001
-#define _PAGE_PRESENT 0x00002 /* software: pte contains a translation */
-#define _PAGE_BIT_SWAP_TYPE 2
-#define _PAGE_USER 0x00004 /* matches one of the PP bits */
-#define _PAGE_EXEC 0x00008 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED 0x00010
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_BIT_SWAP_TYPE 0
+
+#define _PAGE_EXEC 0x00001 /* execute permission */
+#define _PAGE_RW 0x00002 /* read & write access allowed */
+#define _PAGE_READ 0x00004 /* read access allowed */
+#define _PAGE_USER 0x00008 /* page may be accessed by userspace */
+#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */
+/* M (memory coherence) is always set in the HPTE, so we don't need it here */
#define _PAGE_COHERENT 0x0
#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
-#define _PAGE_RW 0x00200 /* software: user write access allowed */
-#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */
+#define _PAGE_SPECIAL 0x00400 /* software: special page */
#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
-#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */
-#define _PAGE_F_GIX_SHIFT 12
-#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
-#define _PAGE_SPECIAL 0x10000 /* software: special page */
#ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */
+#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */
#else
-#define _PAGE_SOFT_DIRTY 0x00000
+#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_F_GIX_SHIFT 57
+#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */
+#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */
+#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */
+#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */
+
/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
@@ -132,7 +134,7 @@
* The mask convered by the RPN must be a ULL on 32-bit platforms with
* 64-bit PTEs
*/
-#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT)
/*
* _PAGE_CHG_MASK masks of bits that are to be preserved across
* pgprot changes
@@ -223,15 +225,17 @@
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
-#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
- || (pmd_val(pmd) & PMD_BAD_BITS))
-#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS)
+#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS)
+
+#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS)
+#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
-#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
- || (pud_val(pud) & PUD_BAD_BITS))
-#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr) __pa(ptr)
#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1))
#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
@@ -360,8 +364,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
:"cc");
}
+static inline int pgd_bad(pgd_t pgd)
+{
+ return (pgd_val(pgd) == 0);
+}
+
#define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+ return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS);
+}
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
@@ -402,7 +416,7 @@ static inline int pte_protnone(pte_t pte)
static inline int pte_present(pte_t pte)
{
- return pte_val(pte) & _PAGE_PRESENT;
+ return !!(pte_val(pte) & _PAGE_PRESENT);
}
/* Conversion functions: convert a page and protection to a page entry,
@@ -413,13 +427,13 @@ static inline int pte_present(pte_t pte)
*/
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
{
- return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+ return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) |
pgprot_val(pgprot));
}
static inline unsigned long pte_pfn(pte_t pte)
{
- return pte_val(pte) >> PTE_RPN_SHIFT;
+ return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
}
/* Generic modifiers for PTE bits */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 7352d3f212df..0cea4807e26f 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -114,6 +114,7 @@
#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */
+#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */
#ifndef __ASSEMBLY__
@@ -607,6 +608,9 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
return get_vsid(context, ea, ssize);
}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8d1c41d28318..77d3ce05798e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -43,13 +43,8 @@
*/
#ifndef __real_pte
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
#define __real_pte(e,p) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
-#else
-#define __real_pte(e,p) (e)
-#define __rpte_to_pte(r) (__pte(r))
-#endif
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
@@ -111,6 +106,26 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
*pgdp = __pgd(val);
}
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ *pgdp = __pgd(0);
+}
+
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+ return __pgd(pte_val(pte));
+}
+
+extern struct page *pgd_page(pgd_t pgd);
+
/*
* Find an entry in a page-table-directory. We combine the address region
* (the high order N bits) and the pgd portion of the address.
@@ -118,9 +133,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+#define pud_offset(pgdp, addr) \
+ (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
#define pmd_offset(pudp,addr) \
(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
-
#define pte_offset_kernel(dir,addr) \
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
@@ -135,6 +151,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
@@ -154,10 +172,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define SWP_TYPE_BITS 5
#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
+#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT)
#define __swp_entry(type, offset) ((swp_entry_t) { \
- ((type) << _PAGE_BIT_SWAP_TYPE) \
- | ((offset) << PTE_RPN_SHIFT) })
+ ((type) << _PAGE_BIT_SWAP_TYPE) \
+ | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)})
/*
* swp_entry_t must be independent of pte bits. We build a swp_entry_t from
* swap type and offset we get from swap and convert that to pte to find a
@@ -281,6 +299,10 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
+extern void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
new file mode 100644
index 000000000000..1b753f96b374
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -0,0 +1,94 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+
+#define MMU_NO_CONTEXT 0
+
+/*
+ * TLB flushing for 64-bit hash-MMU CPUs
+ */
+
+#include <linux/percpu.h>
+#include <asm/page.h>
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+ int active;
+ unsigned long index;
+ struct mm_struct *mm;
+ real_pte_t pte[PPC64_TLB_BATCH_NR];
+ unsigned long vpn[PPC64_TLB_BATCH_NR];
+ unsigned int psize;
+ int ssize;
+};
+DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+ batch->active = 1;
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+}
+
+#define arch_flush_lazy_mmu_mode() do {} while (0)
+
+
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
+ int ssize, unsigned long flags);
+extern void flush_hash_range(unsigned long number, int local);
+extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags);
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+}
+
+/* Private function for use by PCI IO mapping code */
+extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end);
+extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr);
+#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d1a8d93cccfd..44efe739b6b9 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -5,25 +5,25 @@
#include <linux/compiler.h>
#include <asm/synch.h>
#include <asm/asm-compat.h>
+#include <linux/bug.h>
/*
* Atomic exchange
*
- * Changes the memory location '*ptr' to be val and returns
+ * Changes the memory location '*p' to be val and returns
* the previous value stored there.
*/
+
static __always_inline unsigned long
-__xchg_u32(volatile void *p, unsigned long val)
+__xchg_u32_local(volatile void *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %0,0,%2 \n"
PPC405_ERR77(0,%2)
" stwcx. %3,0,%2 \n\
bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
: "r" (p), "r" (val)
: "cc", "memory");
@@ -31,42 +31,34 @@ __xchg_u32(volatile void *p, unsigned long val)
return prev;
}
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
static __always_inline unsigned long
-__xchg_u32_local(volatile void *p, unsigned long val)
+__xchg_u32_relaxed(u32 *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
-"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stwcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+"1: lwarx %0,0,%2\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
: "r" (p), "r" (val)
- : "cc", "memory");
+ : "cc");
return prev;
}
#ifdef CONFIG_PPC64
static __always_inline unsigned long
-__xchg_u64(volatile void *p, unsigned long val)
+__xchg_u64_local(volatile void *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
"1: ldarx %0,0,%2 \n"
PPC405_ERR77(0,%2)
" stdcx. %3,0,%2 \n\
bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
: "r" (p), "r" (val)
: "cc", "memory");
@@ -75,64 +67,52 @@ __xchg_u64(volatile void *p, unsigned long val)
}
static __always_inline unsigned long
-__xchg_u64_local(volatile void *p, unsigned long val)
+__xchg_u64_relaxed(u64 *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
-"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stdcx. %3,0,%2 \n\
- bne- 1b"
- : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+"1: ldarx %0,0,%2\n"
+ PPC405_ERR77(0, %2)
+" stdcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
: "r" (p), "r" (val)
- : "cc", "memory");
+ : "cc");
return prev;
}
#endif
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
-
static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
case 4:
- return __xchg_u32(ptr, x);
+ return __xchg_u32_local(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64(ptr, x);
+ return __xchg_u64_local(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg");
return x;
}
static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
case 4:
- return __xchg_u32_local(ptr, x);
+ return __xchg_u32_relaxed(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64_local(ptr, x);
+ return __xchg_u64_relaxed(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
return x;
}
-#define xchg(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
- })
-
#define xchg_local(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
@@ -140,6 +120,12 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
(unsigned long)_x_, sizeof(*(ptr))); \
})
+#define xchg_relaxed(ptr, x) \
+({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
+ (unsigned long)_x_, sizeof(*(ptr))); \
+})
/*
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
@@ -190,6 +176,56 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
return prev;
}
+static __always_inline unsigned long
+__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+/*
+ * cmpxchg family don't have order guarantee if cmp part fails, therefore we
+ * can avoid superfluous barriers if we use assembly code to implement
+ * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for
+ * cmpxchg_release() because that will result in putting a barrier in the
+ * middle of a ll/sc loop, which is probably a bad idea. For example, this
+ * might cause the conditional store more likely to fail.
+ */
+static __always_inline unsigned long
+__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+ PPC405_ERR77(0, %2)
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
#ifdef CONFIG_PPC64
static __always_inline unsigned long
__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
@@ -233,11 +269,47 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
return prev;
}
-#endif
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
+static __always_inline unsigned long
+__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_acquire\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
@@ -251,7 +323,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg");
return old;
}
@@ -267,10 +339,41 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64_local(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local");
+ return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_relaxed(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_relaxed(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed");
return old;
}
+static __always_inline unsigned long
+__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32_acquire(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_acquire(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
+ return old;
+}
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
@@ -288,6 +391,23 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
+#define cmpxchg_relaxed(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
+
+#define cmpxchg_acquire(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
#ifdef CONFIG_PPC64
#define cmpxchg64(ptr, o, n) \
({ \
@@ -299,7 +419,16 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed cmpxchg64_local
+#define cmpxchg64_relaxed(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_relaxed((ptr), (o), (n)); \
+})
+#define cmpxchg64_acquire(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ cmpxchg_acquire((ptr), (o), (n)); \
+})
#else
#include <asm-generic/cmpxchg-local.h>
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index b118072670fb..94ace9b4c4e1 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -171,7 +171,7 @@ enum {
#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000)
#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000)
#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000)
-/* Free LONG_ASM_CONST(0x0000001000000000) */
+#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000)
#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000)
#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000)
#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000)
@@ -196,6 +196,7 @@ enum {
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
+#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
#ifndef __ASSEMBLY__
@@ -443,9 +444,19 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
+#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -464,7 +475,7 @@ enum {
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX)
+ CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9)
#endif
#else
enum {
@@ -515,7 +526,8 @@ enum {
(CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
- CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE)
+ CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
+ CPU_FTRS_POWER9)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index c5eb86f3d452..fb9f376ae27b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -72,6 +72,7 @@ struct pci_dn;
#define EEH_PE_PHB (1 << 1) /* PHB PE */
#define EEH_PE_DEVICE (1 << 2) /* Device PE */
#define EEH_PE_BUS (1 << 3) /* Bus PE */
+#define EEH_PE_VF (1 << 4) /* VF PE */
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
@@ -81,6 +82,7 @@ struct pci_dn;
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
+#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
@@ -135,11 +137,15 @@ struct eeh_dev {
int pcix_cap; /* Saved PCIx capability */
int pcie_cap; /* Saved PCIe capability */
int aer_cap; /* Saved AER capability */
+ int af_cap; /* Saved AF capability */
struct eeh_pe *pe; /* Associated PE */
struct list_head list; /* Form link list in the PE */
+ struct list_head rmv_list; /* Record the removed edevs */
struct pci_controller *phb; /* Associated PHB */
struct pci_dn *pdn; /* Associated PCI device node */
struct pci_dev *pdev; /* Associated PCI device */
+ bool in_error; /* Error flag for edev */
+ struct pci_dev *physfn; /* Associated SRIOV PF */
struct pci_bus *bus; /* PCI bus for partial hotplug */
};
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7eac89b9f02e..42814f0567cc 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -19,7 +19,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd)
* We have only four bits to encode, MMU page size
*/
BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
- return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK);
+ return __va(hpd.pd & HUGEPD_ADDR_MASK);
}
static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e3b54dd4f730..0bc9c284aa10 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -94,6 +94,7 @@
#define H_SG_LIST -72
#define H_OP_MODE -73
#define H_COP_HW -74
+#define H_STATE -75
#define H_UNSUPPORTED_FLAG_START -256
#define H_UNSUPPORTED_FLAG_END -511
#define H_MULTI_THREADS_ACTIVE -9005
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index 1cb39c96d155..b3b0f2d020f0 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -89,7 +89,7 @@ extern volatile struct Hydra __iomem *Hydra;
#define HYDRA_INT_EXT2 13 /* PCI IRQX */
#define HYDRA_INT_EXT3 14 /* PCI IRQY */
#define HYDRA_INT_EXT4 15 /* PCI IRQZ */
-#define HYDRA_INT_EXT5 16 /* IDE Primay/Secondary */
+#define HYDRA_INT_EXT5 16 /* IDE Primary/Secondary */
#define HYDRA_INT_EXT6 17 /* IDE Secondary */
#define HYDRA_INT_EXT7 18 /* Power Off Request */
#define HYDRA_INT_SPARE 19
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 6c1297ec374c..2fd1690b79d2 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -300,7 +300,7 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
* When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks
* on all MMIOs. (Note that this is all 64 bits only for now)
*
- * To help platforms who may need to differenciate MMIO addresses in
+ * To help platforms who may need to differentiate MMIO addresses in
* their hooks, a bitfield is reserved for use by the platform near the
* top of MMIO addresses (not PIO, those have to cope the hard way).
*
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 3f191f573d4f..fd22442d30a9 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -54,7 +54,7 @@ struct machdep_calls {
int psize, int apsize,
int ssize);
long (*hpte_remove)(unsigned long hpte_group);
- void (*hpte_removebolted)(unsigned long ea,
+ int (*hpte_removebolted)(unsigned long ea,
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
void (*hugepage_invalidate)(unsigned long vsid,
@@ -174,11 +174,11 @@ struct machdep_calls {
platform, called once per cpu. */
void (*enable_pmcs)(void);
- /* Set DABR for this platform, leave empty for default implemenation */
+ /* Set DABR for this platform, leave empty for default implementation */
int (*set_dabr)(unsigned long dabr,
unsigned long dabrx);
- /* Set DAWR for this platform, leave empty for default implemenation */
+ /* Set DAWR for this platform, leave empty for default implementation */
int (*set_dawr)(unsigned long dawr,
unsigned long dawrx);
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3d5abfe6ba67..8ca1c983bf6c 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -97,6 +97,7 @@
#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -182,10 +183,10 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
#if defined(CONFIG_PPC_STD_MMU_64)
/* 64-bit classic hash table MMU */
-# include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#elif defined(CONFIG_PPC_STD_MMU_32)
/* 32-bit classic hash table MMU */
-# include <asm/mmu-hash32.h>
+#include <asm/book3s/32/mmu-hash.h>
#elif defined(CONFIG_40x)
/* 40x-style software loaded TLB */
# include <asm/mmu-40x.h>
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 5b6b5a427b54..cd4ffd86765f 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -19,7 +19,7 @@
* Thanks to Paul M for explaining this.
*
* PPC can only do rel jumps += 32MB, and often the kernel and other
- * modules are furthur away than this. So, we jump to a table of
+ * modules are further away than this. So, we jump to a table of
* trampolines attached to the module (the Procedure Linkage Table)
* whenever that happens.
*/
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index b9f734dd5b81..10debb93c4a4 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -108,6 +108,9 @@
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
+/* Pointers in the page table tree are virtual addresses */
+#define __pgtable_ptr_val(ptr) ((unsigned long)(ptr))
+
#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 07a99e638449..9d86c6651716 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -248,6 +248,7 @@ extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
extern void opal_msglog_init(void);
+extern void opal_msglog_sysfs_init(void);
extern int opal_async_comp_init(void);
extern int opal_sensor_init(void);
extern int opal_hmi_handler_init(void);
@@ -273,6 +274,8 @@ void opal_free_sg_list(struct opal_sg_list *sg);
extern int opal_error_code(int rc);
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e34124f6fbf2..ab3d8977bacd 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -271,6 +271,13 @@ extern long long virt_phys_offset;
#else
#define PD_HUGE 0x80000000
#endif
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+/*
+ * Book3S 64 stores real addresses in the hugepd entries to
+ * avoid overlaps with _PAGE_PRESENT and _PAGE_PTE.
+ */
+#define HUGEPD_ADDR_MASK (0x0ffffffffffffffful & ~HUGEPD_SHIFT_MASK)
#endif /* CONFIG_PPC_BOOK3S_64 */
/*
@@ -281,109 +288,7 @@ extern long long virt_phys_offset;
#ifndef __ASSEMBLY__
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-/* These are used to make use of C type-checking. */
-
-/* PTE level */
-typedef struct { pte_basic_t pte; } pte_t;
-#define __pte(x) ((pte_t) { (x) })
-static inline pte_basic_t pte_val(pte_t x)
-{
- return x.pte;
-}
-
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
-/* PMD level */
-#ifdef CONFIG_PPC64
-typedef struct { unsigned long pmd; } pmd_t;
-#define __pmd(x) ((pmd_t) { (x) })
-static inline unsigned long pmd_val(pmd_t x)
-{
- return x.pmd;
-}
-
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
-typedef struct { unsigned long pud; } pud_t;
-#define __pud(x) ((pud_t) { (x) })
-static inline unsigned long pud_val(pud_t x)
-{
- return x.pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-/* PGD level */
-typedef struct { unsigned long pgd; } pgd_t;
-#define __pgd(x) ((pgd_t) { (x) })
-static inline unsigned long pgd_val(pgd_t x)
-{
- return x.pgd;
-}
-
-/* Page protection bits */
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) })
-
-#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef pte_basic_t pte_t;
-#define __pte(x) (x)
-static inline pte_basic_t pte_val(pte_t pte)
-{
- return pte;
-}
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef pte_t real_pte_t;
-#endif
-
-
-#ifdef CONFIG_PPC64
-typedef unsigned long pmd_t;
-#define __pmd(x) (x)
-static inline unsigned long pmd_val(pmd_t pmd)
-{
- return pmd;
-}
-
-#ifndef CONFIG_PPC_64K_PAGES
-typedef unsigned long pud_t;
-#define __pud(x) (x)
-static inline unsigned long pud_val(pud_t pud)
-{
- return pud;
-}
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-typedef unsigned long pgd_t;
-#define __pgd(x) (x)
-static inline unsigned long pgd_val(pgd_t pgd)
-{
- return pgd;
-}
-
-typedef unsigned long pgprot_t;
-#define pgprot_val(x) (x)
-#define __pgprot(x) (x)
-
-#endif
+#include <asm/pgtable-types.h>
typedef struct { signed long pd; } hugepd_t;
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 54843ca5fa2b..9f165e8a77bf 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -212,15 +212,16 @@ struct pci_dn {
#define IODA_INVALID_PE (-1)
#ifdef CONFIG_PPC_POWERNV
int pe_number;
+ int vf_index; /* VF index in the PF */
#ifdef CONFIG_PCI_IOV
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
u16 num_vfs; /* number of VFs enabled*/
- int offset; /* PE# for the first VF PE */
-#define M64_PER_IOV 4
- int m64_per_iov;
+ int *pe_num_map; /* PE# for the first VF PE or array */
+ bool m64_single_mode; /* Use M64 BAR in Single Mode */
#define IODA_INVALID_M64 (-1)
- int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV];
+ int (*m64_map)[PCI_SRIOV_NUM_BARS];
#endif /* CONFIG_PCI_IOV */
+ int mps; /* Maximum Payload Size */
#endif
struct list_head child_list;
struct list_head list;
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 814622146d5a..e157489ee7a1 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -136,16 +136,24 @@ extern ssize_t power_events_sysfs_show(struct device *dev,
* event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
* 'PM_CYC' where the latter is the name by which the event is known in
* POWER CPU specification.
+ *
+ * Similarly, some hardware and cache events use the same event code. Eg.
+ * on POWER8, both "cache-references" and "L1-dcache-loads" events refer
+ * to the same event, PM_LD_REF_L1. The suffix, allows us to have two
+ * sysfs objects for the same event and thus two entries/aliases in sysfs.
*/
#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
#define EVENT_ATTR(_name, _id, _suffix) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \
power_events_sysfs_show)
#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
+#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c)
+#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c)
+
#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p)
#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 69ef28a81733..8d5fc3ac43da 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#ifndef CONFIG_PPC_64K_PAGES
-#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, __pgtable_ptr_val(PUD))
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -68,19 +68,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pud_set(pud, (unsigned long)pmd);
+ pud_set(pud, __pgtable_ptr_val(pmd));
}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
- pmd_set(pmd, (unsigned long)pte);
+ pmd_set(pmd, __pgtable_ptr_val(pte));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)page_address(pte_page));
+ pmd_set(pmd, __pgtable_ptr_val(page_address(pte_page)));
}
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -171,23 +171,45 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
extern void __tlb_remove_table(void *_table);
#endif
-#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+#ifndef __PAGETABLE_PUD_FOLDED
+/* book3s 64 is 4 level page table */
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+ pgd_set(pgd, __pgtable_ptr_val(pud));
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+#endif
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, __pgtable_ptr_val(pmd));
+}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
- pmd_set(pmd, (unsigned long)pte);
+ pmd_set(pmd, __pgtable_ptr_val(pte));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)pte_page);
+ pmd_set(pmd, __pgtable_ptr_val(pte_page));
}
static inline pgtable_t pmd_pgtable(pmd_t pmd)
{
- return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
+ return (pgtable_t)pmd_page_vaddr(pmd);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -233,11 +255,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
+#ifndef __PAGETABLE_PUD_FOLDED
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* __PAGETABLE_PUD_FOLDED */
#define check_pgt_cache() do { } while (0)
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
new file mode 100644
index 000000000000..43140f8b0592
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -0,0 +1,103 @@
+#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_TYPES_H
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking. */
+
+/* PTE level */
+typedef struct { pte_basic_t pte; } pte_t;
+#define __pte(x) ((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+ return x.pte;
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { unsigned long pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+ return x.pmd;
+}
+
+/*
+ * 64 bit hash always use 4 level table. Everybody else use 4 level
+ * only for 4K page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef struct { unsigned long pud; } pud_t;
+#define __pud(x) ((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+ return x.pud;
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { unsigned long pgd; } pgd_t;
+#define __pgd(x) ((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+#else
+
+/*
+ * .. while these make it easier on the compiler
+ */
+
+typedef pte_basic_t pte_t;
+#define __pte(x) (x)
+static inline pte_basic_t pte_val(pte_t pte)
+{
+ return pte;
+}
+
+#ifdef CONFIG_PPC64
+typedef unsigned long pmd_t;
+#define __pmd(x) (x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+ return pmd;
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+typedef unsigned long pud_t;
+#define __pud(x) (x)
+static inline unsigned long pud_val(pud_t pud)
+{
+ return pud;
+}
+#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC64 */
+
+typedef unsigned long pgd_t;
+#define __pgd(x) (x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+ return pgd;
+}
+
+typedef unsigned long pgprot_t;
+#define pgprot_val(x) (x)
+#define __pgprot(x) (x)
+
+#endif /* CONFIG_STRICT_MM_TYPECHECKS */
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
index 10902c9375d0..925697968946 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -46,7 +46,7 @@
/* PowerSurge are the first generation of PCI Pmacs. This include
* all of the Grand-Central based machines. We currently don't
- * differenciate most of them.
+ * differentiate most of them.
*/
#define PMAC_TYPE_PSURGE 0x10 /* PowerSurge */
#define PMAC_TYPE_ANS 0x11 /* Apple Network Server */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ac2330820b9a..8ab8a1a9610a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -236,7 +236,9 @@ struct thread_struct {
#endif
struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
unsigned long trap_nr; /* last trap # on this thread */
+ u8 load_fp;
#ifdef CONFIG_ALTIVEC
+ u8 load_vec;
struct thread_vr_state vr_state;
struct thread_vr_state *vr_save_area;
unsigned long vrsave;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c4cb2ffc624e..52ed654d01ba 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -75,6 +75,14 @@
#define MSR_HV 0
#endif
+/*
+ * To be used in shared book E/book S, this avoids needing to worry about
+ * book S/book E in shared code
+ */
+#ifndef MSR_SPE
+#define MSR_SPE 0
+#endif
+
#define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */
#define MSR_VSX __MASK(MSR_VSX_LG) /* Enable VSX */
#define MSR_POW __MASK(MSR_POW_LG) /* Enable Power Management */
@@ -376,7 +384,7 @@
#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
-#define SPRN_DER 0x095 /* Debug Enable Regsiter */
+#define SPRN_DER 0x095 /* Debug Enable Register */
#define DER_RSTE 0x40000000 /* Reset Interrupt */
#define DER_CHSTPE 0x20000000 /* Check Stop */
#define DER_MCIE 0x10000000 /* Machine Check Interrupt */
@@ -401,7 +409,7 @@
#define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */
#define SPRN_EAR 0x11A /* External Address Register */
#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */
-#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */
+#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Register */
#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */
#define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */
#define HID0_EMCP (1<<31) /* Enable Machine Check pin */
@@ -514,7 +522,7 @@
#define ICTRL_EICP 0x00000100 /* enable icache par. check */
#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */
#define SPRN_IMMR 0x27E /* Internal Memory Map Register */
-#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */
+#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Register */
#define SPRN_L2CR2 0x3f8
#define L2CR_L2E 0x80000000 /* L2 enable */
#define L2CR_L2PE 0x40000000 /* L2 parity enable */
@@ -549,7 +557,7 @@
#define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */
#define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */
#define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */
-#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */
+#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Register */
#define L3CR_L3E 0x80000000 /* L3 enable */
#define L3CR_L3PE 0x40000000 /* L3 data parity enable */
#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2fef74b474f0..737e012ef56e 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -681,7 +681,7 @@
#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */
#define SPRN_TBHI 0x3DC /* Time Base High */
#define SPRN_TBLO 0x3DD /* Time Base Low */
-#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */
+#define SPRN_DBCR 0x3F2 /* Debug Control Register */
#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */
#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */
#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 37d2da6feabf..f280dd11243f 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -154,7 +154,7 @@
*
* The Darwin I2C driver is less subtle though. On any non-success status
* from the response command, it waits 5ms and tries again up to 20 times,
- * it doesn't differenciate between fatal errors or "busy" status.
+ * it doesn't differentiate between fatal errors or "busy" status.
*
* This driver provides an asynchronous paramblock based i2c command
* interface to be used either directly by low level code or by a higher
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 5b268b6be74c..17c8380673a6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -28,12 +28,14 @@ extern void giveup_all(struct task_struct *);
extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
extern void giveup_fpu(struct task_struct *);
-extern void __giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
static inline void disable_kernel_fp(void)
{
msr_check_and_clear(MSR_FP);
}
#else
+static inline void __giveup_fpu(struct task_struct *t) { }
+static inline void save_fpu(struct task_struct *t) { }
static inline void flush_fp_to_thread(struct task_struct *t) { }
#endif
@@ -41,18 +43,19 @@ static inline void flush_fp_to_thread(struct task_struct *t) { }
extern void enable_kernel_altivec(void);
extern void flush_altivec_to_thread(struct task_struct *);
extern void giveup_altivec(struct task_struct *);
-extern void __giveup_altivec(struct task_struct *);
+extern void save_altivec(struct task_struct *);
static inline void disable_kernel_altivec(void)
{
msr_check_and_clear(MSR_VEC);
}
+#else
+static inline void save_altivec(struct task_struct *t) { }
+static inline void __giveup_altivec(struct task_struct *t) { }
#endif
#ifdef CONFIG_VSX
extern void enable_kernel_vsx(void);
extern void flush_vsx_to_thread(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void __giveup_vsx(struct task_struct *);
static inline void disable_kernel_vsx(void)
{
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
@@ -68,6 +71,8 @@ static inline void disable_kernel_spe(void)
{
msr_check_and_clear(MSR_SPE);
}
+#else
+static inline void __giveup_spe(struct task_struct *t) { }
#endif
static inline void clear_task_ebb(struct task_struct *t)
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 23d351ca0303..9f77f85e3e99 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -78,97 +78,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
}
#elif defined(CONFIG_PPC_STD_MMU_64)
-
-#define MMU_NO_CONTEXT 0
-
-/*
- * TLB flushing for 64-bit hash-MMU CPUs
- */
-
-#include <linux/percpu.h>
-#include <asm/page.h>
-
-#define PPC64_TLB_BATCH_NR 192
-
-struct ppc64_tlb_batch {
- int active;
- unsigned long index;
- struct mm_struct *mm;
- real_pte_t pte[PPC64_TLB_BATCH_NR];
- unsigned long vpn[PPC64_TLB_BATCH_NR];
- unsigned int psize;
- int ssize;
-};
-DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
-
-extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
-
-#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-
-static inline void arch_enter_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
-
- batch->active = 1;
-}
-
-static inline void arch_leave_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
-
- if (batch->index)
- __flush_tlb_pending(batch);
- batch->active = 0;
-}
-
-#define arch_flush_lazy_mmu_mode() do {} while (0)
-
-
-extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
- int ssize, unsigned long flags);
-extern void flush_hash_range(unsigned long number, int local);
-extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
- pmd_t *pmdp, unsigned int psize, int ssize,
- unsigned long flags);
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
-}
-
-/* Private function for use by PCI IO mapping code */
-extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr);
+#include <asm/book3s/64/tlbflush-hash.h>
#else
#error Unsupported MMU type
#endif
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 8e86b48d0369..32e36b16773f 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -57,12 +57,14 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
extern void hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
@@ -76,13 +78,15 @@ TRACE_EVENT_FN(hcall_entry,
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
TP_PROTO(unsigned long opcode, unsigned long retval,
unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
__field(unsigned long, retval)
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index d12b11d7641e..a1d112979fd2 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -132,7 +132,7 @@
/* This one _might_ return the CPU number of the CPU reading it;
* the bootROM decides whether to boot or to sleep/spinloop depending
- * on this register beeing 0 or not
+ * on this register being 0 or not
*/
#define UNI_N_CPU_NUMBER 0x0050
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 0e25bdb190bb..5d61bbced6a1 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -1,5 +1,5 @@
/*
- * Common definitions accross all variants of ICP and ICS interrupt
+ * Common definitions across all variants of ICP and ICS interrupt
* controllers.
*/
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
index 7f9c74b46704..b4504f394427 100644
--- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -78,7 +78,7 @@
#define EV_SUCCESS 0
#define EV_EPERM 1 /* Operation not permitted */
#define EV_ENOENT 2 /* Entry Not Found */
-#define EV_EIO 3 /* I/O error occured */
+#define EV_EIO 3 /* I/O error occurred */
#define EV_EAGAIN 4 /* The operation had insufficient
* resources to complete and should be
* retried
@@ -89,7 +89,7 @@
#define EV_ENODEV 7 /* No such device */
#define EV_EINVAL 8 /* An argument supplied to the hcall
was out of range or invalid */
-#define EV_INTERNAL 9 /* An internal error occured */
+#define EV_INTERNAL 9 /* An internal error occurred */
#define EV_CONFIG 10 /* A configuration error was detected */
#define EV_INVALID_STATE 11 /* The object is in an invalid state */
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 07cebc3514f3..10d5eab19458 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -95,12 +95,14 @@ int main(void)
DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
+ DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp));
#ifdef CONFIG_ALTIVEC
DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
+ DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec));
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 9c9b7411b28b..584e119fa8b0 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/book3s/64/mmu-hash.h>
/* Entry: r3 = crap, r4 = ptr to cputable entry
*
@@ -83,6 +84,39 @@ _GLOBAL(__restore_cpu_power8)
mtlr r11
blr
+_GLOBAL(__setup_cpu_power9)
+ mflr r11
+ bl __init_FSCR
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power9
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power9)
+ mflr r11
+ bl __init_FSCR
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power9
+ mtlr r11
+ blr
+
__init_hvmode_206:
/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
mfmsr r3
@@ -139,7 +173,7 @@ __init_HFSCR:
* (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
*/
__init_tlb_power7:
- li r6,128
+ li r6,POWER7_TLB_SETS
mtctr r6
li r7,0xc00 /* IS field = 0b11 */
ptesync
@@ -150,7 +184,18 @@ __init_tlb_power7:
1: blr
__init_tlb_power8:
- li r6,512
+ li r6,POWER8_TLB_SETS
+ mtctr r6
+ li r7,0xc00 /* IS field = 0b11 */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power9:
+ li r6,POWER9_TLB_SETS_HASH
mtctr r6
li r7,0xc00 /* IS field = 0b11 */
ptesync
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 7d80bfdfb15e..be4d73053bed 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -70,9 +70,12 @@ extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power7(void);
extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
+extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power9(void);
extern void __restore_cpu_a2(void);
extern void __flush_tlb_power7(unsigned int action);
extern void __flush_tlb_power8(unsigned int action);
+extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
@@ -116,6 +119,11 @@ extern void __restore_cpu_e6500(void);
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9 COMMON_USER_POWER8
+#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128)
+
#ifdef CONFIG_PPC_BOOK3E_64
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
#else
@@ -499,6 +507,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* Power9 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004e0000,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .flush_tlb = __flush_tlb_power9,
+ .platform = "power9",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 40e4d4a27663..6544017eb90b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag)
struct eeh_dev *edev, *tmp;
size_t *plen = flag;
- /* If the PE's config space is blocked, 0xFF's will be
- * returned. It's pointless to collect the log in this
- * case.
- */
- if (pe->state & EEH_PE_CFG_BLOCKED)
- return NULL;
-
eeh_pe_for_each_dev(pe, edev, tmp)
*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
EEH_PCI_REGS_LOG_LEN - *plen);
@@ -677,7 +670,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
/* Check if the request is finished successfully */
if (active_flag) {
rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (rc <= 0)
+ if (rc < 0)
return rc;
if (rc & active_flag)
@@ -739,7 +732,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata)
}
/**
- * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
* @state: reset state to enter
*
@@ -761,7 +754,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
eeh_unfreeze_pe(pe, false);
- eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
break;
@@ -769,14 +763,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
- eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
- eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
@@ -1243,6 +1239,14 @@ void eeh_remove_device(struct pci_dev *dev)
* from the parent PE during the BAR resotre.
*/
edev->pdev = NULL;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
dev->dev.archdata.edev = NULL;
if (!(edev->pe->state & EEH_PE_KEEP))
eeh_rmv_from_parent_pe(edev);
@@ -1537,6 +1541,17 @@ int eeh_pe_get_state(struct eeh_pe *pe)
if (!eeh_ops || !eeh_ops->get_state)
return -ENOENT;
+ /*
+ * If the parent PE is owned by the host kernel and is undergoing
+ * error recovery, we should return the PE state as temporarily
+ * unavailable so that the error recovery on the guest is suspended
+ * until the recovery completes on the host.
+ */
+ if (pe->parent &&
+ !(pe->state & EEH_PE_REMOVED) &&
+ (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return EEH_PE_STATE_UNAVAIL;
+
result = eeh_ops->get_state(pe, NULL);
rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
dma_en = !!(result & EEH_STATE_DMA_ENABLED);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index a1e86e172e3c..ddbcfab7efdf 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -195,8 +195,11 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
return;
}
- /* Walk resources on this device, poke them into the tree */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ /*
+ * Walk resources on this device, poke the first 7 (6 normal BAR and 1
+ * ROM BAR) into the tree.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
resource_size_t start = pci_resource_start(dev,i);
resource_size_t end = pci_resource_end(dev,i);
unsigned long flags = pci_resource_flags(dev,i);
@@ -222,10 +225,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
unsigned long flags;
- /* Ignore PCI bridges */
- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
- return;
-
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
__eeh_addr_cache_insert_dev(dev);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index aabba94ff9cb..7815095fe3d8 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
edev->pdn = pdn;
edev->phb = phb;
INIT_LIST_HEAD(&edev->list);
+ INIT_LIST_HEAD(&edev->rmv_list);
return NULL;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 938742135ee0..fb6207d2c604 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -34,6 +34,11 @@
#include <asm/prom.h>
#include <asm/rtas.h>
+struct eeh_rmv_data {
+ struct list_head edev_list;
+ int removed;
+};
+
/**
* eeh_pcid_name - Retrieve name of PCI device driver
* @pdev: PCI device
@@ -190,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_frozen;
@@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ edev->in_error = true;
eeh_pcid_put(dev);
return NULL;
}
@@ -231,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
driver = eeh_pcid_get(dev);
@@ -271,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
@@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata)
if (!driver->err_handler ||
!driver->err_handler->slot_reset ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
+ (edev->mode & EEH_DEV_NO_HANDLER) ||
+ (!edev->in_error)) {
eeh_pcid_put(dev);
return NULL;
}
@@ -326,20 +333,23 @@ static void *eeh_report_resume(void *data, void *userdata)
{
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ bool was_in_error;
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
if (!driver) return NULL;
+ was_in_error = edev->in_error;
+ edev->in_error = false;
eeh_enable_irq(dev);
if (!driver->err_handler ||
!driver->err_handler->resume ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
+ (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
edev->mode &= ~EEH_DEV_NO_HANDLER;
eeh_pcid_put(dev);
return NULL;
@@ -365,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver;
- if (!dev || eeh_dev_removed(edev))
+ if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
dev->error_state = pci_channel_io_perm_failure;
@@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata)
return NULL;
}
+static void *eeh_add_virt_device(void *data, void *userdata)
+{
+ struct pci_driver *driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ if (!(edev->physfn)) {
+ pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
+ __func__, edev->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ return NULL;
+ }
+
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ eeh_pcid_put(dev);
+ if (driver->err_handler)
+ return NULL;
+ }
+
+#ifdef CONFIG_PPC_POWERNV
+ pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0);
+#endif
+ return NULL;
+}
+
static void *eeh_rmv_device(void *data, void *userdata)
{
struct pci_driver *driver;
struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- int *removed = (int *)userdata;
+ struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
+ int *removed = rmv_data ? &rmv_data->removed : NULL;
/*
* Actually, we should remove the PCI bridges as well.
@@ -416,10 +454,13 @@ static void *eeh_rmv_device(void *data, void *userdata)
driver = eeh_pcid_get(dev);
if (driver) {
eeh_pcid_put(dev);
- if (driver->err_handler &&
+ if (removed &&
+ eeh_pe_passed(edev->pe))
+ return NULL;
+ if (removed &&
+ driver->err_handler &&
driver->err_handler->error_detected &&
- driver->err_handler->slot_reset &&
- driver->err_handler->resume)
+ driver->err_handler->slot_reset)
return NULL;
}
@@ -428,11 +469,29 @@ static void *eeh_rmv_device(void *data, void *userdata)
pci_name(dev));
edev->bus = dev->bus;
edev->mode |= EEH_DEV_DISCONNECTED;
- (*removed)++;
+ if (removed)
+ (*removed)++;
- pci_lock_rescan_remove();
- pci_stop_and_remove_bus_device(dev);
- pci_unlock_rescan_remove();
+ if (edev->physfn) {
+#ifdef CONFIG_PPC_POWERNV
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
+ edev->pdev = NULL;
+
+ /*
+ * We have to set the VF PE number to invalid one, which is
+ * required to plug the VF successfully.
+ */
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
+ if (rmv_data)
+ list_add(&edev->rmv_list, &rmv_data->edev_list);
+ } else {
+ pci_lock_rescan_remove();
+ pci_stop_and_remove_bus_device(dev);
+ pci_unlock_rescan_remove();
+ }
return NULL;
}
@@ -546,11 +605,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+ struct eeh_rmv_data *rmv_data)
{
struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
struct timeval tstamp;
- int cnt, rc, removed = 0;
+ int cnt, rc;
+ struct eeh_dev *edev;
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -564,11 +625,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
if (bus) {
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(bus);
- pci_unlock_rescan_remove();
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(bus);
+ pci_unlock_rescan_remove();
+ }
} else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
}
/*
@@ -610,14 +676,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(bus);
- } else if (frozen_bus && removed) {
+ if (pe->type & EEH_PE_VF)
+ eeh_add_virt_device(edev, NULL);
+ else
+ pcibios_add_pci_devices(bus);
+ } else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(frozen_bus);
+ if (pe->type & EEH_PE_VF)
+ eeh_add_virt_device(edev, NULL);
+ else
+ pcibios_add_pci_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
@@ -636,8 +710,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
static void eeh_handle_normal_event(struct eeh_pe *pe)
{
struct pci_bus *frozen_bus;
+ struct eeh_dev *edev, *tmp;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+ struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
frozen_bus = eeh_pe_bus_get(pe);
if (!frozen_bus) {
@@ -692,7 +768,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus);
+ rc = eeh_reset_device(pe, frozen_bus, NULL);
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
@@ -744,7 +820,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL);
+ rc = eeh_reset_device(pe, NULL, &rmv_data);
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
@@ -764,6 +840,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
goto hard_fail;
}
+ /*
+ * For those hot removed VFs, we should add back them after PF get
+ * recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
+ eeh_add_virt_device(edev, NULL);
+ list_del(&edev->rmv_list);
+ }
+
/* Tell all device drivers that they can resume operations */
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
@@ -803,11 +888,17 @@ perm_error:
* the their PCI config any more.
*/
if (frozen_bus) {
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(frozen_bus);
- pci_unlock_rescan_remove();
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(frozen_bus);
+ pci_unlock_rescan_remove();
+ }
}
}
@@ -886,6 +977,7 @@ static void eeh_handle_special_event(void)
continue;
/* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
bus = eeh_pe_bus_get(phb_pe);
eeh_pe_dev_traverse(pe,
eeh_report_failure, NULL);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index ca9e5371930e..eea48d8baf49 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
* EEH device already having associated PE, but
* the direct parent EEH device doesn't have yet.
*/
- pdn = pdn ? pdn->parent : NULL;
+ if (edev->physfn)
+ pdn = pci_get_pdn(edev->physfn);
+ else
+ pdn = pdn ? pdn->parent : NULL;
while (pdn) {
/* We're poking out of PCI territory */
parent = pdn_to_eeh_dev(pdn);
@@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
}
/* Create a new EEH PE */
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (edev->physfn)
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_VF);
+ else
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
@@ -920,25 +926,21 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe)
*/
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
- struct pci_bus *bus = NULL;
struct eeh_dev *edev;
struct pci_dev *pdev;
- if (pe->type & EEH_PE_PHB) {
- bus = pe->phb->bus;
- } else if (pe->type & EEH_PE_BUS ||
- pe->type & EEH_PE_DEVICE) {
- if (pe->bus) {
- bus = pe->bus;
- goto out;
- }
+ if (pe->type & EEH_PE_PHB)
+ return pe->phb->bus;
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- bus = pdev->bus;
- }
+ /* The primary bus might be cached during probe time */
+ if (pe->state & EEH_PE_PRI_BUS)
+ return pe->bus;
-out:
- return bus;
+ /* Retrieve the parent PCI bus of first (top) PCI device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ return pdev->bus;
+
+ return NULL;
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ec7f8aada697..b9b125327f27 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */
li r11,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- syscall_exit_work
- cmpld r3,r11
+
+ andi. r0,r8,MSR_FP
+ beq 2f
+#ifdef CONFIG_ALTIVEC
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
+#endif
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_math
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
+
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- syscall_error
.Lsyscall_error_cont:
@@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite)
/* Check current_thread_info()->flags */
andi. r0,r4,_TIF_USER_WORK_MASK
-#ifdef CONFIG_PPC_BOOK3E
bne 1f
+#ifdef CONFIG_PPC_BOOK3E
/*
* Check to see if the dbcr0 register is set up to debug.
* Use the internal debug mode bit to do this.
@@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite)
mtspr SPRN_DBSR,r10
b restore
#else
- beq restore
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_math
+ b restore
#endif
1: andi. r0,r4,_TIF_NEED_RESCHED
beq 2f
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 2117eaca3d28..15da2b5df85e 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
or r12,r12,r4
std r12,_MSR(r1)
#endif
+ /* Don't care if r4 overflows, this is desired behaviour */
+ lbz r4,THREAD_LOAD_FP(r5)
+ addi r4,r4,1
+ stb r4,THREAD_LOAD_FP(r5)
addi r10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
MTFSF_L(fr0)
@@ -139,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
blr
/*
- * __giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
* Enables the FPU for use in the kernel on return.
*/
-_GLOBAL(__giveup_fpu)
+_GLOBAL(save_fpu)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r6,THREAD_FPSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r6,0
bne 2f
addi r6,r3,THREAD_FPSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32FPVSRS(0, R4, R6)
+2: SAVE_32FPVSRS(0, R4, R6)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- li r3,MSR_FP|MSR_FE0|MSR_FE1
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
/*
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b5061abbd2e0..9cdf5c71e426 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -806,7 +806,7 @@ _GLOBAL(set_context)
_GLOBAL(init_cpu_state)
mflr r22
#ifdef CONFIG_PPC_47x
- /* We use the PVR to differenciate 44x cores from 476 */
+ /* We use the PVR to differentiate 44x cores from 476 */
mfspr r3,SPRN_PVR
srwi r3,r3,16
cmplwi cr0,r3,PVR_476FPE@h
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index cf4fb5429cf1..470ceebd2d23 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -19,7 +19,7 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#undef DEBUG
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index e77c3ccf8dcf..dbf098121ce6 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -445,7 +445,11 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
* Global data
*/
struct kgdb_arch arch_kgdb_ops = {
+#ifdef __LITTLE_ENDIAN__
+ .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
+#else
.gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
+#endif
};
static int kgdb_not_implemented(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 2c647b1e62e4..ee62b197502d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -54,8 +54,8 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action)
}
/*
- * Generic routine to flush TLB on power7. This routine is used as
- * flush_tlb hook in cpu_spec for Power7 processor.
+ * Generic routines to flush TLB on POWER processors. These routines
+ * are used as flush_tlb hook in the cpu_spec.
*
* action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
* TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
@@ -65,18 +65,17 @@ void __flush_tlb_power7(unsigned int action)
flush_tlb_206(POWER7_TLB_SETS, action);
}
-/*
- * Generic routine to flush TLB on power8. This routine is used as
- * flush_tlb hook in cpu_spec for power8 processor.
- *
- * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
- * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
- */
void __flush_tlb_power8(unsigned int action)
{
flush_tlb_206(POWER8_TLB_SETS, action);
}
+void __flush_tlb_power9(unsigned int action)
+{
+ flush_tlb_206(POWER9_TLB_SETS_HASH, action);
+}
+
+
/* flush SLBs and reload */
static void flush_and_reload_slb(void)
{
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 848b47499a27..9ce9a25f58b5 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -311,7 +311,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
if (name[0] == '.') {
if (strcmp(name+1, "TOC.") == 0)
syms[i].st_shndx = SHN_ABS;
- memmove(name, name+1, strlen(name));
+ syms[i].st_name++;
}
}
}
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 7f9ed0c1f6b9..59c436189f46 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,7 +55,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus)
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
- list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+ list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
pr_debug(" Removing %s...\n", pci_name(dev));
pci_stop_and_remove_bus_device(dev);
}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index b3b4df91b792..38102cb9baa9 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
struct pci_dev *pdev,
+ int vf_index,
int busno, int devfn)
{
struct pci_dn *pdn;
@@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
pdn->busno = busno;
pdn->devfn = devfn;
#ifdef CONFIG_PPC_POWERNV
+ pdn->vf_index = vf_index;
pdn->pe_number = IODA_INVALID_PE;
#endif
INIT_LIST_HEAD(&pdn->child_list);
@@ -179,6 +181,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
{
#ifdef CONFIG_PCI_IOV
struct pci_dn *parent, *pdn;
+ struct eeh_dev *edev;
int i;
/* Only support IOV for now */
@@ -196,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
return NULL;
for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
- pdn = add_one_dev_pci_data(parent, NULL,
+ pdn = add_one_dev_pci_data(parent, NULL, i,
pci_iov_virtfn_bus(pdev, i),
pci_iov_virtfn_devfn(pdev, i));
if (!pdn) {
@@ -204,6 +207,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
__func__, i);
return NULL;
}
+
+ /* Create the EEH device for the VF */
+ eeh_dev_init(pdn, pci_bus_to_host(pdev->bus));
+ edev = pdn_to_eeh_dev(pdn);
+ BUG_ON(!edev);
+ edev->physfn = pdev;
}
#endif /* CONFIG_PCI_IOV */
@@ -215,6 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev)
#ifdef CONFIG_PCI_IOV
struct pci_dn *parent;
struct pci_dn *pdn, *tmp;
+ struct eeh_dev *edev;
int i;
/*
@@ -256,6 +266,13 @@ void remove_dev_pci_data(struct pci_dev *pdev)
pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
continue;
+ /* Release EEH device for the VF */
+ edev = pdn_to_eeh_dev(pdn);
+ if (edev) {
+ pdn->edev = NULL;
+ kfree(edev);
+ }
+
if (!list_empty(&pdn->list))
list_del(&pdn->list);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 41e1607e800c..ef7024dacff7 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state);
EXPORT_SYMBOL(store_vr_state);
#endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif
-
#ifdef CONFIG_EPAPR_PARAVIRT
EXPORT_SYMBOL(epapr_hypercall_start);
#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e8fee5..d7a9df51b974 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits)
EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
+void __giveup_fpu(struct task_struct *tsk)
+{
+ save_fpu(tsk);
+ tsk->thread.regs->msr &= ~MSR_FP;
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
void giveup_fpu(struct task_struct *tsk)
{
check_if_tm_restore_required(tsk);
@@ -187,9 +197,32 @@ void enable_kernel_fp(void)
}
}
EXPORT_SYMBOL(enable_kernel_fp);
+
+static int restore_fp(struct task_struct *tsk) {
+ if (tsk->thread.load_fp) {
+ load_fp_state(&current->thread.fp_state);
+ current->thread.load_fp++;
+ return 1;
+ }
+ return 0;
+}
+#else
+static int restore_fp(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
+#define loadvec(thr) ((thr).load_vec)
+
+static void __giveup_altivec(struct task_struct *tsk)
+{
+ save_altivec(tsk);
+ tsk->thread.regs->msr &= ~MSR_VEC;
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ tsk->thread.regs->msr &= ~MSR_VSX;
+#endif
+}
+
void giveup_altivec(struct task_struct *tsk)
{
check_if_tm_restore_required(tsk);
@@ -229,22 +262,49 @@ void flush_altivec_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
+
+static int restore_altivec(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) {
+ load_vr_state(&tsk->thread.vr_state);
+ tsk->thread.used_vr = 1;
+ tsk->thread.load_vec++;
+
+ return 1;
+ }
+ return 0;
+}
+#else
+#define loadvec(thr) 0
+static inline int restore_altivec(struct task_struct *tsk) { return 0; }
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
-void giveup_vsx(struct task_struct *tsk)
+static void __giveup_vsx(struct task_struct *tsk)
{
- check_if_tm_restore_required(tsk);
-
- msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (tsk->thread.regs->msr & MSR_FP)
__giveup_fpu(tsk);
if (tsk->thread.regs->msr & MSR_VEC)
__giveup_altivec(tsk);
+ tsk->thread.regs->msr &= ~MSR_VSX;
+}
+
+static void giveup_vsx(struct task_struct *tsk)
+{
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
-EXPORT_SYMBOL(giveup_vsx);
+
+static void save_vsx(struct task_struct *tsk)
+{
+ if (tsk->thread.regs->msr & MSR_FP)
+ save_fpu(tsk);
+ if (tsk->thread.regs->msr & MSR_VEC)
+ save_altivec(tsk);
+}
void enable_kernel_vsx(void)
{
@@ -275,6 +335,19 @@ void flush_vsx_to_thread(struct task_struct *tsk)
}
}
EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
+
+static int restore_vsx(struct task_struct *tsk)
+{
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ tsk->thread.used_vsr = 1;
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+static inline void save_vsx(struct task_struct *tsk) { }
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -374,12 +447,76 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
+void restore_math(struct pt_regs *regs)
+{
+ unsigned long msr;
+
+ if (!current->thread.load_fp && !loadvec(current->thread))
+ return;
+
+ msr = regs->msr;
+ msr_check_and_set(msr_all_available);
+
+ /*
+ * Only reload if the bit is not set in the user MSR, the bit BEING set
+ * indicates that the registers are hot
+ */
+ if ((!(msr & MSR_FP)) && restore_fp(current))
+ msr |= MSR_FP | current->thread.fpexc_mode;
+
+ if ((!(msr & MSR_VEC)) && restore_altivec(current))
+ msr |= MSR_VEC;
+
+ if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) &&
+ restore_vsx(current)) {
+ msr |= MSR_VSX;
+ }
+
+ msr_check_and_clear(msr_all_available);
+
+ regs->msr = msr;
+}
+
+void save_all(struct task_struct *tsk)
+{
+ unsigned long usermsr;
+
+ if (!tsk->thread.regs)
+ return;
+
+ usermsr = tsk->thread.regs->msr;
+
+ if ((usermsr & msr_all_available) == 0)
+ return;
+
+ msr_check_and_set(msr_all_available);
+
+ /*
+ * Saving the way the register space is in hardware, save_vsx boils
+ * down to a save_fpu() and save_altivec()
+ */
+ if (usermsr & MSR_VSX) {
+ save_vsx(tsk);
+ } else {
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
+ }
+
+ if (usermsr & MSR_SPE)
+ __giveup_spe(tsk);
+
+ msr_check_and_clear(msr_all_available);
+}
+
void flush_all_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
- giveup_all(tsk);
+ save_all(tsk);
#ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
@@ -832,17 +969,9 @@ void restore_tm_state(struct pt_regs *regs)
msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
- if (msr_diff & MSR_FP) {
- msr_check_and_set(MSR_FP);
- load_fp_state(&current->thread.fp_state);
- msr_check_and_clear(MSR_FP);
- regs->msr |= current->thread.fpexc_mode;
- }
- if (msr_diff & MSR_VEC) {
- msr_check_and_set(MSR_VEC);
- load_vr_state(&current->thread.vr_state);
- msr_check_and_clear(MSR_VEC);
- }
+
+ restore_math(regs);
+
regs->msr |= msr_diff;
}
@@ -1006,6 +1135,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+
+ if (current_thread_info()->task->thread.regs)
+ restore_math(current_thread_info()->task->thread.regs);
+
#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
@@ -1307,6 +1440,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
f = ret_from_fork;
}
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
/*
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cf8c7e4e0b21..cb64d6feb45a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -1,7 +1,7 @@
/*
* Common signal handling code for both 32 and 64 bits
*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
*
* This file is subject to the terms and conditions of the GNU General
@@ -178,7 +178,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
* need to use the stack pointer from the checkpointed state, rather
* than the speculated state. This ensures that the signal context
* (written tm suspended) will be written below the stack required for
- * the rollback. The transaction is aborted becuase of the treclaim,
+ * the rollback. The transaction is aborted because of the treclaim,
* so any memory written between the tbegin and the signal will be
* rolled back anyway.
*
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 51b274199dd9..be305c858e51 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
*
* This file is subject to the terms and conditions of the GNU General
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b6becc795bb5..88414dde7e35 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1148,6 +1148,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
goto bail;
}
if (reason & REASON_TRAP) {
+ unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
@@ -1158,8 +1159,15 @@ void __kprobes program_check_exception(struct pt_regs *regs)
== NOTIFY_STOP)
goto bail;
+ bugaddr = regs->nip;
+ /*
+ * Fixup bugaddr for BUG_ON() in real mode
+ */
+ if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
+ bugaddr += PAGE_OFFSET;
+
if (!(regs->msr & MSR_PR) && /* not user-mode */
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
+ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
goto bail;
}
@@ -1394,7 +1402,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
* is a read DSCR attempt through a mfspr instruction, we
* just emulate the instruction instead. This code path will
* always emulate all the mfspr instructions till the user
- * has attempted atleast one mtspr instruction. This way it
+ * has attempted at least one mtspr instruction. This way it
* preserves the same behaviour when the user is accessing
* the DSCR through privilege level only SPR number (0x11)
* which is emulated through illegal instruction exception.
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 162d0f714941..1c2e7a343bf5 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec)
oris r12,r12,MSR_VEC@h
std r12,_MSR(r1)
#endif
+ /* Don't care if r4 overflows, this is desired behaviour */
+ lbz r4,THREAD_LOAD_VEC(r5)
+ addi r4,r4,1
+ stb r4,THREAD_LOAD_VEC(r5)
addi r6,r5,THREAD_VRSTATE
li r4,1
li r10,VRSTATE_VSCR
@@ -102,36 +106,20 @@ _GLOBAL(load_up_altivec)
blr
/*
- * __giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
*/
-_GLOBAL(__giveup_altivec)
+_GLOBAL(save_altivec)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r7,THREAD_VRSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r7,0
bne 2f
addi r7,r3,THREAD_VRSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32VRS(0,r4,r7)
+2: SAVE_32VRS(0,r4,r7)
mfvscr v0
li r4,VRSTATE_VSCR
stvx v0,r4,r7
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
- lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
- lis r3,MSR_VEC@h
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
blr
#ifdef CONFIG_VSX
@@ -163,23 +151,6 @@ _GLOBAL(load_up_vsx)
std r12,_MSR(r1)
b fast_exception_return
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- */
-_GLOBAL(__giveup_vsx)
- addi r3,r3,THREAD /* want THREAD of task */
- ld r5,PT_REGS(r3)
- cmpdi 0,r5,0
- beq 1f
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r3,MSR_VSX@h
- andc r4,r4,r3 /* disable VSX for previous task */
- std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
- blr
-
#endif /* CONFIG_VSX */
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 55c4d51ea3e2..999106991a76 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -22,7 +22,7 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash32.h>
+#include <asm/book3s/32/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 9bf7031a67ff..b9131aa1aedf 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -26,7 +26,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
/* #define DEBUG_MMU */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 913cd2198fa6..114edace6cdd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -23,7 +23,7 @@
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index fb37290a57b4..c7b78d8336b2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -32,7 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc94dad..9c3b76bb69d9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -30,7 +30,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 89e96b3e0039..039028d3ccb5 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -29,7 +29,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 91700518bbf3..4cb8db05f3e5 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -17,7 +17,7 @@
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ee26de9a1de..c613fee0b9f7 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,7 +27,7 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/tm.h>
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 905e94a1370f..46871d554057 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -432,7 +432,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* the whole masked_pending business which is about not
* losing interrupts that occur while masked.
*
- * I don't differenciate normal deliveries and resends, this
+ * I don't differentiate normal deliveries and resends, this
* implementation will differ from PAPR and not lose such
* interrupts.
*/
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 778ef86e187e..4d66f44a1657 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -992,7 +992,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_restart_interrupt(vcpu, exit_nr);
/*
- * get last instruction before beeing preempted
+ * get last instruction before being preempted
* TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
*/
switch (exit_nr) {
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index cda695de8aa7..f48a0c22e8f9 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -182,7 +182,7 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
#ifdef CONFIG_ALTIVEC
/*
- * Since guests have the priviledge to enable AltiVec, we need AltiVec
+ * Since guests have the privilege to enable AltiVec, we need AltiVec
* support in the host to save/restore their context.
* Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
* because it's cleared in the absence of CONFIG_ALTIVEC!
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index e7c04542ba62..47d1b26effc6 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -44,7 +44,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* a write access. Since this is 4K insert of 64K page size
* also add _PAGE_COMBO
*/
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE;
+ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
new_pte |= _PAGE_DIRTY;
} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
@@ -106,7 +106,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e08c88..b2d659cf51c6 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/
if (!(old_pte & _PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
- old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+ /*
+ * clear the old slot details from the old and new pte.
+ * On hash insert failure we use old pte value and we don't
+ * want slot information there if we have a insert failure.
+ */
+ old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+ new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
@@ -182,7 +188,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
@@ -243,8 +249,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
return 0;
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access. Since this is 4K insert of 64K page size
- * also add _PAGE_COMBO
+ * a write access.
*/
new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_RW)
@@ -305,7 +310,7 @@ repeat:
}
}
/*
- * Hypervisor failure. Restore old pmd and return -1
+ * Hypervisor failure. Restore old pte and return -1
* similar to __hash_page_*
*/
if (unlikely(slot == -2)) {
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index ba59d5977f34..90dd9280894f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -168,11 +168,11 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_N;
/*
* PP bits:
- * Linux use slb key 0 for kernel and 1 for user.
- * kernel areas are mapped by PP bits 00
- * and and there is no kernel RO (_PAGE_KERNEL_RO).
- * User area mapped by 0x2 and read only use by
- * 0x3.
+ * Linux uses slb key 0 for kernel and 1 for user.
+ * kernel areas are mapped with PP=00
+ * and there is no kernel RO (_PAGE_KERNEL_RO).
+ * User area is mapped with PP=0x2 for read/write
+ * or PP=0x3 for read-only (including writeable but clean pages).
*/
if (pteflags & _PAGE_USER) {
rflags |= 0x2;
@@ -263,28 +263,32 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
return ret < 0 ? ret : 0;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
int htab_remove_mapping(unsigned long vstart, unsigned long vend,
int psize, int ssize)
{
unsigned long vaddr;
unsigned int step, shift;
+ int rc;
+ int ret = 0;
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
- if (!ppc_md.hpte_removebolted) {
- printk(KERN_WARNING "Platform doesn't implement "
- "hpte_removebolted\n");
- return -EINVAL;
- }
+ if (!ppc_md.hpte_removebolted)
+ return -ENODEV;
- for (vaddr = vstart; vaddr < vend; vaddr += step)
- ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ for (vaddr = vstart; vaddr < vend; vaddr += step) {
+ rc = ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ if (rc == -ENOENT) {
+ ret = -ENOENT;
+ continue;
+ }
+ if (rc < 0)
+ return rc;
+ }
- return 0;
+ return ret;
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
@@ -605,10 +609,28 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
return 0;
}
-static unsigned long __init htab_get_table_size(void)
+unsigned htab_shift_for_mem_size(unsigned long mem_size)
{
- unsigned long mem_size, rnd_mem_size, pteg_count, psize;
+ unsigned memshift = __ilog2(mem_size);
+ unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned pteg_shift;
+
+ /* round mem_size up to next power of 2 */
+ if ((1UL << memshift) < mem_size)
+ memshift += 1;
+
+ /* aim for 2 pages / pteg */
+ pteg_shift = memshift - (pshift + 1);
+ /*
+ * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab
+ * size permitted by the architecture.
+ */
+ return max(pteg_shift + 7, 18U);
+}
+
+static unsigned long __init htab_get_table_size(void)
+{
/* If hash size isn't already provided by the platform, we try to
* retrieve it from the device-tree. If it's not there neither, we
* calculate it now based on the total RAM size
@@ -618,31 +640,30 @@ static unsigned long __init htab_get_table_size(void)
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
- /* round mem_size up to next power of 2 */
- mem_size = memblock_phys_mem_size();
- rnd_mem_size = 1UL << __ilog2(mem_size);
- if (rnd_mem_size < mem_size)
- rnd_mem_size <<= 1;
-
- /* # pages / 2 */
- psize = mmu_psize_defs[mmu_virtual_psize].shift;
- pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11);
-
- return pteg_count << 7;
+ return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size());
}
#ifdef CONFIG_MEMORY_HOTPLUG
int create_section_mapping(unsigned long start, unsigned long end)
{
- return htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+ mmu_kernel_ssize);
+
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
}
int remove_section_mapping(unsigned long start, unsigned long end)
{
- return htab_remove_mapping(start, end, mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+ WARN_ON(rc < 0);
+ return rc;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b0f926..eb2accdd76fd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* base page size. This is because demote_segment won't flush
* hash page table entries.
*/
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags);
+ /*
+ * With THP, we also clear the slot information with
+ * respect to all the 64K hash pte mapping the 16MB
+ * page. They are all invalid now. This make sure we
+ * don't find the slot valid when we fault with 4k
+ * base page size.
+ *
+ */
+ memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+ }
}
valid = hpte_valid(hpte_slot_array, index);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index e2138c7ae70f..8555fce902fe 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -76,7 +76,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
mmu_psize, ssize, flags) == -1)
@@ -105,7 +105,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+ new_pte |= (slot << _PAGE_F_GIX_SHIFT) &
+ (_PAGE_F_SECOND | _PAGE_F_GIX);
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 744e24bcb85c..6dd272b6196f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -107,8 +107,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
kmem_cache_free(cachep, new);
else {
#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = (unsigned long)new |
- (shift_to_mmu_psize(pshift) << 2);
+ hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2);
#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 379a6a90644b..ba655666186d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
memset(addr, 0, PGD_TABLE_SIZE);
}
+static void pud_ctor(void *addr)
+{
+ memset(addr, 0, PUD_TABLE_SIZE);
+}
+
static void pmd_ctor(void *addr)
{
memset(addr, 0, PMD_TABLE_SIZE);
@@ -138,14 +143,18 @@ void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ /*
+ * In all current configs, when the PUD index exists it's the
+ * same size as either the pgd or pmd index except with THP enabled
+ * on book3s 64
+ */
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+
if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
panic("Couldn't allocate pgtable caches");
- /* In all current configs, when the PUD index exists it's the
- * same size as either the pgd or pmd index. Verify that the
- * initialization above has also created a PUD cache. This
- * will need re-examiniation if we add new possibilities for
- * the pagetable layout. */
- BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
+ if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
+ panic("Couldn't allocate pud pgtable caches");
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -188,9 +197,9 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
*/
#ifdef CONFIG_PPC_BOOK3E
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
{
/* Create a PTE encoding without page size */
unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
@@ -208,6 +217,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
*/
for (i = 0; i < page_size; i += PAGE_SIZE)
BUG_ON(map_kernel_page(start + i, phys, flags));
+
+ return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
@@ -217,25 +228,31 @@ static void vmemmap_remove_mapping(unsigned long start,
}
#endif
#else /* CONFIG_PPC_BOOK3E */
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
+static int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
{
- int mapped = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
+ int rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
}
#ifdef CONFIG_MEMORY_HOTPLUG
static void vmemmap_remove_mapping(unsigned long start,
unsigned long page_size)
{
- int mapped = htab_remove_mapping(start, start + page_size,
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
+ int rc = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON((rc < 0) && (rc != -ENOENT));
+ WARN_ON(rc == -ENOENT);
}
#endif
@@ -303,6 +320,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
for (; start < end; start += page_size) {
void *p;
+ int rc;
if (vmemmap_populated(start, page_size))
continue;
@@ -316,7 +334,13 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug(" * %016lx..%016lx allocated at %p\n",
start, start + page_size, p);
- vmemmap_create_mapping(start, page_size, __pa(p));
+ rc = vmemmap_create_mapping(start, page_size, __pa(p));
+ if (rc < 0) {
+ pr_warning(
+ "vmemmap_populate: Unable to create vmemmap mapping: %d\n",
+ rc);
+ return -EFAULT;
+ }
}
return 0;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a514b04e..f980da6d7569 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -119,12 +119,18 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ int rc;
pgdata = NODE_DATA(nid);
start = (unsigned long)__va(start);
- if (create_section_mapping(start, start + size))
- return -EINVAL;
+ rc = create_section_mapping(start, start + size);
+ if (rc) {
+ pr_warning(
+ "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
+ start, start + size, rc);
+ return -EFAULT;
+ }
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones +
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9f58ff44a075..898d63365cdd 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -110,7 +110,8 @@ extern unsigned long Hash_size, Hash_mask;
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
+extern int map_kernel_page(unsigned long ea, unsigned long pa,
+ unsigned long flags);
#endif /* CONFIG_PPC64 */
extern unsigned long ioremap_bot;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3124a20d0fab..0eb53128ca2a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -88,7 +88,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -646,6 +646,28 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
return pgtable;
}
+void pmdp_huge_split_prepare(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+
+ /*
+ * We can't mark the pmd none here, because that will cause a race
+ * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
+ * we spilt, but at the same time we wan't rest of the ppc64 code
+ * not to insert hash pte on this, because we will be modifying
+ * the deposited pgtable in the caller of this function. Hence
+ * clear the _PAGE_USER so that we move the fault handling to
+ * higher level function and that will serialize against ptl.
+ * We need to flush existing hash pte entries here even though,
+ * the translation is still valid, because we will withdraw
+ * pgtable_t after this.
+ */
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
+}
+
+
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
@@ -663,10 +685,20 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+
+ /*
+ * This ensures that generic code that rely on IRQ disabling
+ * to prevent a parallel THP split work as expected.
+ */
+ kick_all_cpus_sync();
}
/*
@@ -717,7 +749,7 @@ pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
unsigned long pmdv;
- pmdv = pfn << PTE_RPN_SHIFT;
+ pmdv = (pfn << PTE_RPN_SHIFT) & PTE_RPN_MASK;
return pmd_set_protbits(__pmd(pmdv), pgprot);
}
@@ -785,6 +817,13 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
int has_transparent_hugepage(void)
{
+
+ BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER,
+ "hugepages can't be allocated by the buddy allocator");
+
+ BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2,
+ "We need more than 2 pages to do deferred thp split");
+
if (!mmu_has_feature(MMU_FTR_16M_PAGE))
return 0;
/*
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 29d6987c37ba..eb82d787d99a 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -895,7 +895,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
BEGIN_MMU_FTR_SECTION
virt_page_table_tlb_miss_done:
- /* We have overriden MAS2:EPN but currently our primary TLB miss
+ /* We have overridden MAS2:EPN but currently our primary TLB miss
* handler will always restore it so that should not be an issue,
* if we ever optimize the primary handler to not write MAS2 on
* some cases, we'll have to restore MAS2:EPN here based on the
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 68c477592e43..eabecfcaef7c 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -108,7 +108,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
blr
2:
#ifdef CONFIG_PPC_47x
- oris r7,r6,0x8000 /* specify way explicitely */
+ oris r7,r6,0x8000 /* specify way explicitly */
clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */
ori r4,r4,PPC47x_TLBE_SIZE
tlbwe r4,r7,0 /* write it */
@@ -149,7 +149,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
li r3,-1 /* Current set */
lis r10,tlb_47x_boltmap@h
ori r10,r10,tlb_47x_boltmap@l
- lis r7,0x8000 /* Specify way explicitely */
+ lis r7,0x8000 /* Specify way explicitly */
b 9f /* For each set */
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 863d89386f60..c82497a31c54 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -208,7 +208,7 @@ static void pm_rtas_reset_signals(u32 node)
/*
* The debug bus is being set to the passthru disable state.
- * However, the FW still expects atleast one legal signal routing
+ * However, the FW still expects at least one legal signal routing
* entry or it will return an error on the arguments. If we don't
* supply a valid entry, we must ignore all return values. Ignoring
* all return values means we might miss an error we should be
@@ -1008,7 +1008,7 @@ static int initial_lfsr[] = {
*
* To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
* LFSR sequence is broken into four ranges. The spacing of the precomputed
- * values is adjusted in each range so the error between the user specifed
+ * values is adjusted in each range so the error between the user specified
* number (N) of events between samples and the actual number of events based
* on the precomputed value will be les then about 6.2%. Note, if the user
* specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d1e65ce545b3..97a1d40d8696 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -651,7 +651,7 @@ static void pmao_restore_workaround(bool ebb)
/*
* We are already soft-disabled in power_pmu_enable(). We need to hard
- * enable to actually prevent the PMU exception from firing.
+ * disable to actually prevent the PMU exception from firing.
*/
hard_irq_disable();
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 9f9dfda9ed2c..59012e750abe 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -27,20 +27,6 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"
-static const char *event_domain_suffix(unsigned domain)
-{
- switch (domain) {
-#define DOMAIN(n, v, x, c) \
- case HV_PERF_DOMAIN_##n: \
- return "__" #n;
-#include "hv-24x7-domains.h"
-#undef DOMAIN
- default:
- WARN(1, "unknown domain %d\n", domain);
- return "__UNKNOWN_DOMAIN_SUFFIX";
- }
-}
-
static bool domain_is_valid(unsigned domain)
{
switch (domain) {
@@ -68,6 +54,24 @@ static bool is_physical_domain(unsigned domain)
}
}
+static const char *domain_name(unsigned domain)
+{
+ if (!domain_is_valid(domain))
+ return NULL;
+
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
+ case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
+ case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
+ case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
+ }
+
+ WARN_ON_ONCE(domain);
+ return NULL;
+}
+
static bool catalog_entry_domain_is_valid(unsigned domain)
{
return is_physical_domain(domain);
@@ -101,6 +105,7 @@ static bool catalog_entry_domain_is_valid(unsigned domain)
EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
/* u16 */
EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
/* u32, see "data_offset" */
EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
@@ -115,6 +120,7 @@ static struct attribute *format_attrs[] = {
&format_attr_domain.attr,
&format_attr_offset.attr,
&format_attr_core.attr,
+ &format_attr_chip.attr,
&format_attr_vcpu.attr,
&format_attr_lpar.attr,
NULL,
@@ -274,32 +280,70 @@ static unsigned long h_get_24x7_catalog_page(char page[],
version, index);
}
-static unsigned core_domains[] = {
- HV_PERF_DOMAIN_PHYS_CORE,
- HV_PERF_DOMAIN_VCPU_HOME_CORE,
- HV_PERF_DOMAIN_VCPU_HOME_CHIP,
- HV_PERF_DOMAIN_VCPU_HOME_NODE,
- HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
-};
-/* chip event data always yeilds a single event, core yeilds multiple */
-#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
-
+/*
+ * Each event we find in the catalog, will have a sysfs entry. Format the
+ * data for this sysfs entry based on the event's domain.
+ *
+ * Events belonging to the Chip domain can only be monitored in that domain.
+ * i.e the domain for these events is a fixed/knwon value.
+ *
+ * Events belonging to the Core domain can be monitored either in the physical
+ * core or in one of the virtual CPU domains. So the domain value for these
+ * events must be specified by the user (i.e is a required parameter). Format
+ * the Core events with 'domain=?' so the perf-tool can error check required
+ * parameters.
+ *
+ * NOTE: For the Core domain events, rather than making domain a required
+ * parameter we could default it to PHYS_CORE and allowe users to
+ * override the domain to one of the VCPU domains.
+ *
+ * However, this can make the interface a little inconsistent.
+ *
+ * If we set domain=2 (PHYS_CHIP) and allow user to override this field
+ * the user may be tempted to also modify the "offset=x" field in which
+ * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and
+ * HPM_INST (offset=0x20) events. With:
+ *
+ * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/
+ *
+ * we end up monitoring HPM_INST, while the command line has HPM_PCYC.
+ *
+ * By not assigning a default value to the domain for the Core events,
+ * we can have simple guidelines:
+ *
+ * - Specifying values for parameters with "=?" is required.
+ *
+ * - Specifying (i.e overriding) values for other parameters
+ * is undefined.
+ */
static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
{
const char *sindex;
const char *lpar;
+ const char *domain_str;
+ char buf[8];
- if (is_physical_domain(domain)) {
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ snprintf(buf, sizeof(buf), "%d", domain);
+ domain_str = buf;
+ lpar = "0x0";
+ sindex = "chip";
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
+ domain_str = "?";
lpar = "0x0";
sindex = "core";
- } else {
+ break;
+ default:
+ domain_str = "?";
lpar = "?";
sindex = "vcpu";
}
return kasprintf(GFP_KERNEL,
- "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
- domain,
+ "domain=%s,offset=0x%x,%s=?,lpar=%s",
+ domain_str,
be16_to_cpu(event->event_counter_offs) +
be16_to_cpu(event->event_group_record_offs),
sindex,
@@ -339,6 +383,15 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
return &attr->attr.attr;
}
+/*
+ * Allocate and initialize strings representing event attributes.
+ *
+ * NOTE: The strings allocated here are never destroyed and continue to
+ * exist till shutdown. This is to allow us to create as many events
+ * from the catalog as possible, even if we encounter errors with some.
+ * In case of changes to error paths in future, these may need to be
+ * freed by the caller.
+ */
static struct attribute *device_str_attr_create(char *name, int name_max,
int name_nonce,
char *str, size_t str_max)
@@ -370,16 +423,6 @@ out_s:
return NULL;
}
-static void device_str_attr_destroy(struct attribute *attr)
-{
- struct dev_ext_attribute *d;
-
- d = container_of(attr, struct dev_ext_attribute, attr.attr);
- kfree(d->var);
- kfree(d->attr.attr.name);
- kfree(d);
-}
-
static struct attribute *event_to_attr(unsigned ix,
struct hv_24x7_event_data *event,
unsigned domain,
@@ -387,7 +430,6 @@ static struct attribute *event_to_attr(unsigned ix,
{
int event_name_len;
char *ev_name, *a_ev_name, *val;
- const char *ev_suffix;
struct attribute *attr;
if (!domain_is_valid(domain)) {
@@ -400,14 +442,13 @@ static struct attribute *event_to_attr(unsigned ix,
if (!val)
return NULL;
- ev_suffix = event_domain_suffix(domain);
ev_name = event_name(event, &event_name_len);
if (!nonce)
- a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
- (int)event_name_len, ev_name, ev_suffix);
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
+ (int)event_name_len, ev_name);
else
- a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
- (int)event_name_len, ev_name, ev_suffix, nonce);
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
+ (int)event_name_len, ev_name, nonce);
if (!a_ev_name)
goto out_val;
@@ -452,45 +493,14 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
return device_str_attr_create(name, nl, nonce, desc, dl);
}
-static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
+static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
struct hv_24x7_event_data *event, int nonce)
{
- unsigned i;
-
- switch (event->domain) {
- case HV_PERF_DOMAIN_PHYS_CHIP:
- *attrs = event_to_attr(ix, event, event->domain, nonce);
- return 1;
- case HV_PERF_DOMAIN_PHYS_CORE:
- for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
- attrs[i] = event_to_attr(ix, event, core_domains[i],
- nonce);
- if (!attrs[i]) {
- pr_warn("catalog event %u: individual attr %u "
- "creation failure\n", ix, i);
- for (; i; i--)
- device_str_attr_destroy(attrs[i - 1]);
- return -1;
- }
- }
- return i;
- default:
- pr_warn("catalog event %u: domain %u is not allowed in the "
- "catalog\n", ix, event->domain);
+ *attrs = event_to_attr(ix, event, event->domain, nonce);
+ if (!*attrs)
return -1;
- }
-}
-static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
-{
- switch (event->domain) {
- case HV_PERF_DOMAIN_PHYS_CHIP:
- return 1;
- case HV_PERF_DOMAIN_PHYS_CORE:
- return ARRAY_SIZE(core_domains);
- default:
- return 0;
- }
+ return 0;
}
static unsigned long vmalloc_to_phys(void *v)
@@ -726,9 +736,8 @@ static int create_events_from_catalog(struct attribute ***events_,
goto e_free;
}
- if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
- pr_err("event_entry_count %zu is invalid\n",
- event_entry_count);
+ if (SIZE_MAX - 1 < event_entry_count) {
+ pr_err("event_entry_count %zu is invalid\n", event_entry_count);
ret = -EIO;
goto e_free;
}
@@ -801,7 +810,7 @@ static int create_events_from_catalog(struct attribute ***events_,
continue;
}
- attr_max += event_to_attr_ct(event);
+ attr_max++;
}
event_idx_last = event_idx;
@@ -851,12 +860,12 @@ static int create_events_from_catalog(struct attribute ***events_,
nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
ct = event_data_to_attrs(event_idx, events + event_attr_ct,
event, nonce);
- if (ct <= 0) {
+ if (ct < 0) {
pr_warn("event %zu (%.*s) creation failure, skipping\n",
event_idx, nl, name);
junk_events++;
} else {
- event_attr_ct += ct;
+ event_attr_ct++;
event_descs[desc_ct] = event_to_desc_attr(event, nonce);
if (event_descs[desc_ct])
desc_ct++;
@@ -961,6 +970,27 @@ e_free:
return ret;
}
+static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ int d, n, count = 0;
+ const char *str;
+
+ for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
+ str = domain_name(d);
+ if (!str)
+ continue;
+
+ n = sprintf(page, "%d: %s\n", d, str);
+ if (n < 0)
+ break;
+
+ count += n;
+ page += n;
+ }
+ return count;
+}
+
#define PAGE_0_ATTR(_name, _fmt, _expr) \
static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
@@ -989,6 +1019,7 @@ PAGE_0_ATTR(catalog_version, "%lld\n",
PAGE_0_ATTR(catalog_len, "%lld\n",
(unsigned long long)be32_to_cpu(page_0->length) * 4096);
static BIN_ATTR_RO(catalog, 0/* real length varies */);
+static DEVICE_ATTR_RO(domains);
static struct bin_attribute *if_bin_attrs[] = {
&bin_attr_catalog,
@@ -998,6 +1029,7 @@ static struct bin_attribute *if_bin_attrs[] = {
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
+ &dev_attr_domains.attr,
NULL,
};
@@ -1089,10 +1121,16 @@ static int add_event_to_24x7_request(struct perf_event *event,
return -EINVAL;
}
- if (is_physical_domain(event_get_domain(event)))
+ switch (event_get_domain(event)) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ idx = event_get_chip(event);
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
idx = event_get_core(event);
- else
+ break;
+ default:
idx = event_get_vcpu(event);
+ }
i = request_buffer->num_requests++;
req = &request_buffer->requests[i];
@@ -1208,11 +1246,12 @@ static int h_24x7_event_init(struct perf_event *event)
return -EACCES;
}
- /* see if the event complains */
+ /* Get the initial value of the counter for this event */
if (single_24x7_request(event, &ct)) {
pr_devel("test hcall failed\n");
return -EIO;
}
+ (void)local64_xchg(&event->hw.prev_count, ct);
return 0;
}
@@ -1275,6 +1314,16 @@ static void h_24x7_event_read(struct perf_event *event)
h24x7hw = &get_cpu_var(hv_24x7_hw);
h24x7hw->events[i] = event;
put_cpu_var(h24x7hw);
+ /*
+ * Clear the event count so we can compute the _change_
+ * in the 24x7 raw counter value at the end of the txn.
+ *
+ * Note that we could alternatively read the 24x7 value
+ * now and save its value in event->hw.prev_count. But
+ * that would require issuing a hcall, which would then
+ * defeat the purpose of using the txn interface.
+ */
+ local64_set(&event->count, 0);
}
put_cpu_var(hv_24x7_reqb);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 0f9fa21a29f2..791455e7f5cf 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -7,6 +7,7 @@ enum hv_perf_domains {
#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
#include "hv-24x7-domains.h"
#undef DOMAIN
+ HV_PERF_DOMAIN_MAX,
};
struct hv_24x7_request {
@@ -80,7 +81,7 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the coresponding hv_24x7_request */
+ /* This is a copy of @data_size from the corresponding hv_24x7_request */
__be16 result_element_data_size;
__u8 reserved[0x2];
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 856fe6e03c2a..7aa37236bb70 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -127,8 +127,16 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-#define GPCI_MAX_DATA_BYTES \
- (1024 - sizeof(struct hv_get_perf_counter_info_params))
+#define HGPCI_REQ_BUFFER_SIZE 4096
+#define HGPCI_MAX_DATA_BYTES \
+ (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+struct hv_gpci_request_buffer {
+ struct hv_get_perf_counter_info_params params;
+ uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
@@ -137,24 +145,21 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
unsigned long ret;
size_t i;
u64 count;
+ struct hv_gpci_request_buffer *arg;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
- struct {
- struct hv_get_perf_counter_info_params params;
- uint8_t bytes[GPCI_MAX_DATA_BYTES];
- } __packed __aligned(sizeof(uint64_t)) arg = {
- .params = {
- .counter_request = cpu_to_be32(req),
- .starting_index = cpu_to_be32(starting_index),
- .secondary_index = cpu_to_be16(secondary_index),
- .counter_info_version_in = version_in,
- }
- };
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+ arg->params.counter_info_version_in = version_in;
ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
- virt_to_phys(&arg), sizeof(arg));
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
if (ret) {
pr_devel("hcall failed: 0x%lx\n", ret);
- return ret;
+ goto out;
}
/*
@@ -163,9 +168,11 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
*/
count = 0;
for (i = offset; i < offset + length; i++)
- count |= arg.bytes[i] << (i - offset);
+ count |= arg->bytes[i] << (i - offset);
*value = count;
+out:
+ put_cpu_var(hv_gpci_reqb);
return ret;
}
@@ -245,10 +252,10 @@ static int h_gpci_event_init(struct perf_event *event)
}
/* last byte within the buffer? */
- if ((event_get_offset(event) + length) > GPCI_MAX_DATA_BYTES) {
+ if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
pr_devel("request outside of buffer: %zu > %zu\n",
(size_t)event_get_offset(event) + length,
- GPCI_MAX_DATA_BYTES);
+ HGPCI_MAX_DATA_BYTES);
return -EINVAL;
}
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 5b62f2389290..a383c23a9070 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -54,7 +54,7 @@
* Power7 event codes.
*/
#define EVENT(_name, _code) \
- PME_##_name = _code,
+ _name = _code,
enum {
#include "power7-events-list.h"
@@ -318,14 +318,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
}
static int power7_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL,
- [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL,
- [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
- [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
- [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h
new file mode 100644
index 000000000000..741b77edd03e
--- /dev/null
+++ b/arch/powerpc/perf/power8-events-list.h
@@ -0,0 +1,51 @@
+/*
+ * Performance counter support for POWER8 processors.
+ *
+ * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Power8 event codes.
+ */
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_GCT_NOSLOT_CYC, 0x100f8)
+EVENT(PM_CMPLU_STALL, 0x4000a)
+EVENT(PM_INST_CMPL, 0x00002)
+EVENT(PM_BRU_FIN, 0x10068)
+EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100ee)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1, 0x3e054)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF, 0x0d8b8)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE, 0x0408e)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x17080)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x17082)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL, 0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 9958ba8bf0d2..690d9186a855 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -17,48 +17,16 @@
#include <asm/firmware.h>
#include <asm/cputable.h>
-
/*
* Some power8 event codes.
*/
-#define PM_CYC 0x0001e
-#define PM_GCT_NOSLOT_CYC 0x100f8
-#define PM_CMPLU_STALL 0x4000a
-#define PM_INST_CMPL 0x00002
-#define PM_BRU_FIN 0x10068
-#define PM_BR_MPRED_CMPL 0x400f6
-
-/* All L1 D cache load references counted at finish, gated by reject */
-#define PM_LD_REF_L1 0x100ee
-/* Load Missed L1 */
-#define PM_LD_MISS_L1 0x3e054
-/* Store Missed L1 */
-#define PM_ST_MISS_L1 0x300f0
-/* L1 cache data prefetches */
-#define PM_L1_PREF 0x0d8b8
-/* Instruction fetches from L1 */
-#define PM_INST_FROM_L1 0x04080
-/* Demand iCache Miss */
-#define PM_L1_ICACHE_MISS 0x200fd
-/* Instruction Demand sectors wriittent into IL1 */
-#define PM_L1_DEMAND_WRITE 0x0408c
-/* Instruction prefetch written into IL1 */
-#define PM_IC_PREF_WRITE 0x0408e
-/* The data cache was reloaded from local core's L3 due to a demand load */
-#define PM_DATA_FROM_L3 0x4c042
-/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
-#define PM_DATA_FROM_L3MISS 0x300fe
-/* All successful D-side store dispatches for this thread */
-#define PM_L2_ST 0x17080
-/* All successful D-side store dispatches for this thread that were L2 Miss */
-#define PM_L2_ST_MISS 0x17082
-/* Total HW L3 prefetches(Load+store) */
-#define PM_L3_PREF_ALL 0x4e052
-/* Data PTEG reload */
-#define PM_DTLB_MISS 0x300fc
-/* ITLB Reloaded */
-#define PM_ITLB_MISS 0x400fc
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+#include "power8-events-list.h"
+};
+#undef EVENT
/*
* Raw event encoding for POWER8:
@@ -415,7 +383,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc;
}
- /* In continous sampling mode, update SDAR on TLB miss */
+ /* In continuous sampling mode, update SDAR on TLB miss */
mmcra = MMCRA_SDAR_MODE_TLB;
mmcr1 = mmcr2 = 0;
@@ -604,6 +572,71 @@ static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[])
mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
}
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
+
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
+
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power8_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+ GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BRU_FIN),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_L1_PREF),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BRU_FIN),
+
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static struct attribute_group power8_pmu_events_group = {
+ .name = "events",
+ .attrs = power8_events_attr,
+};
+
PMU_FORMAT_ATTR(event, "config:0-49");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
PMU_FORMAT_ATTR(mark, "config:8");
@@ -640,6 +673,7 @@ struct attribute_group power8_pmu_format_group = {
static const struct attribute_group *power8_pmu_attr_groups[] = {
&power8_pmu_format_group,
+ &power8_pmu_events_group,
NULL,
};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index 6eb3b2abae90..00282c2b0cae 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -319,7 +319,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
tmp = in_be32(&pci_regs->gscr);
#if 0
- /* Reset the exteral bus ( internal PCI controller is NOT resetted ) */
+ /* Reset the exteral bus ( internal PCI controller is NOT reset ) */
/* Not necessary and can be a bad thing if for example the bootloader
is displaying a splash screen or ... Just left here for
documentation purpose if anyone need it */
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 5ac70de3e48a..d7e87ff912d7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -99,7 +99,7 @@ static void mpc85xx_cds_restart(char *cmd)
pci_read_config_byte(dev, 0x47, &tmp);
/*
- * At this point, the harware reset should have triggered.
+ * At this point, the hardware reset should have triggered.
* However, if it doesn't work for some mysterious reason,
* just fall through to the default reset below.
*/
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
index 6be1a4af3359..cc5347eb1662 100644
--- a/arch/powerpc/platforms/powermac/cache.S
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -23,7 +23,7 @@
* when going to sleep, when doing a PMU based cpufreq transition,
* or when "offlining" a CPU on SMP machines. This code is over
* paranoid, but I've had enough issues with various CPU revs and
- * bugs that I decided it was worth beeing over cautious
+ * bugs that I decided it was worth being over cautious
*/
_GLOBAL(flush_disable_caches)
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 4882bfd90e27..1e02328c3f2d 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -198,7 +198,7 @@ static long ohare_htw_scc_enable(struct device_node *node, long param,
if (htw) {
/* Side effect: this will also power up the
* modem, but it's too messy to figure out on which
- * ports this controls the tranceiver and on which
+ * ports this controls the transceiver and on which
* it controls the modem
*/
if (trans)
@@ -463,7 +463,7 @@ static long heathrow_sound_enable(struct device_node *node, long param,
unsigned long flags;
/* B&W G3 and Yikes don't support that properly (the
- * sound appear to never come back after beeing shut down).
+ * sound appear to never come back after being shut down).
*/
if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
pmac_mb.model_id == PMAC_TYPE_YIKES)
@@ -2770,7 +2770,7 @@ set_initial_features(void)
* but I'm not too sure it was audited for side-effects on other
* ohare based machines...
* Since I still have difficulties figuring the right way to
- * differenciate them all and since that hack was there for a long
+ * differentiate them all and since that hack was there for a long
* time, I'll keep it around
*/
if (macio_chips[0].type == macio_ohare) {
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index f1516b5ecec9..cd9711e72df6 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 5f152b95ca0c..950b3e539057 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
return 0;
}
-static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD10, val);
-}
-
-static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD10, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xD90, val);
-}
-
-static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
- return pnv_eeh_dbgfs_set(data, 0xE10, val);
-}
+#define PNV_EEH_DBGFS_ENTRY(name, reg) \
+static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
+{ \
+ return pnv_eeh_dbgfs_set(data, reg, val); \
+} \
+ \
+static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
+{ \
+ return pnv_eeh_dbgfs_get(data, reg, val); \
+} \
+ \
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
+ pnv_eeh_dbgfs_get_##name, \
+ pnv_eeh_dbgfs_set_##name, \
+ "0x%llx\n")
+
+PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
+PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
+PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
-static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
- return pnv_eeh_dbgfs_get(data, 0xE10, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
- pnv_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
- pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
- pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
#endif /* CONFIG_DEBUG_FS */
/**
@@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void)
debugfs_create_file("err_injct_outbound", 0600,
phb->dbgfs, hose,
- &pnv_eeh_outb_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_outb);
debugfs_create_file("err_injct_inboundA", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbA_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbA);
debugfs_create_file("err_injct_inboundB", 0600,
phb->dbgfs, hose,
- &pnv_eeh_inbB_dbgfs_ops);
+ &pnv_eeh_dbgfs_ops_inbB);
#endif /* CONFIG_DEBUG_FS */
}
@@ -387,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
edev->mode &= 0xFFFFFF00;
edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
edev->mode |= EEH_DEV_BRIDGE;
@@ -444,9 +429,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* PCI devices of the PE are expected to be removed prior
* to PE reset.
*/
- if (!edev->pe->bus)
+ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
edev->pe->bus = pci_find_bus(hose->global_number,
pdn->busno);
+ if (edev->pe->bus)
+ edev->pe->state |= EEH_PE_PRI_BUS;
+ }
/*
* Enable EEH explicitly so that we will do EEH check
@@ -892,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
}
}
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
+ int pos, u16 mask)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ int i, status = 0;
+
+ /* Wait for Transaction Pending bit to be cleared */
+ for (i = 0; i < 4; i++) {
+ eeh_ops->read_config(pdn, pos, 2, &status);
+ if (!(status & mask))
+ return;
+
+ msleep((1 << i) * 100);
+ }
+
+ pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
+ __func__, type,
+ edev->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 reg = 0;
+
+ if (WARN_ON(!edev->pcie_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+ if (!(reg & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ pnv_eeh_wait_for_pending(pdn, "",
+ edev->pcie_cap + PCI_EXP_DEVSTA,
+ PCI_EXP_DEVSTA_TRPND);
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg |= PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 cap = 0;
+
+ if (WARN_ON(!edev->af_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ /*
+ * Wait for Transaction Pending bit to clear. A word-aligned
+ * test is used, so we use the conrol offset rather than status
+ * and shift the test bit to match.
+ */
+ pnv_eeh_wait_for_pending(pdn, "AF",
+ edev->af_cap + PCI_AF_CTRL,
+ PCI_AF_STATUS_TP << 8);
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
+ 1, PCI_AF_CTRL_FLR);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
+{
+ struct eeh_dev *edev;
+ struct pci_dn *pdn;
+ int ret;
+
+ /* The VF PE should have only one child device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ pdn = eeh_dev_to_pdn(edev);
+ if (!pdn)
+ return -ENXIO;
+
+ ret = pnv_eeh_do_flr(pdn, option);
+ if (!ret)
+ return ret;
+
+ return pnv_eeh_do_af_flr(pdn, option);
+}
+
/**
* pnv_eeh_reset - Reset the specified PE
* @pe: EEH PE
@@ -953,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
}
bus = eeh_pe_bus_get(pe);
- if (pci_is_root_bus(bus) ||
+ if (pe->type & EEH_PE_VF)
+ ret = pnv_eeh_reset_vf_pe(pe, option);
+ else if (pci_is_root_bus(bus) ||
pci_is_root_bus(bus->parent))
ret = pnv_eeh_root_reset(hose, option);
else
@@ -1092,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
if (!edev || !edev->pe)
return false;
+ /*
+ * We will issue FLR or AF FLR to all VFs, which are contained
+ * in VF PE. It relies on the EEH PCI config accessors. So we
+ * can't block them during the window.
+ */
+ if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
+ return false;
+
if (edev->pe->state & EEH_PE_CFG_BLOCKED)
return true;
@@ -1476,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
+static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 devctl, cmd, cap2, aer_capctl;
+ int old_mps;
+
+ if (edev->pcie_cap) {
+ /* Restore MPS */
+ old_mps = (ffs(pdn->mps) - 8) << 5;
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+ devctl |= old_mps;
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+
+ /* Disable Completion Timeout */
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
+ 4, &cap2);
+ if (cap2 & 0x10) {
+ eeh_ops->read_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, &cap2);
+ cap2 |= 0x10;
+ eeh_ops->write_config(pdn,
+ edev->pcie_cap + PCI_EXP_DEVCTL2,
+ 4, cap2);
+ }
+ }
+
+ /* Enable SERR and parity checking */
+ eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
+ cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
+
+ /* Enable report various errors */
+ if (edev->pcie_cap) {
+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, &devctl);
+ devctl &= ~PCI_EXP_DEVCTL_CERE;
+ devctl |= (PCI_EXP_DEVCTL_NFERE |
+ PCI_EXP_DEVCTL_FERE |
+ PCI_EXP_DEVCTL_URRE);
+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 2, devctl);
+ }
+
+ /* Enable ECRC generation and check */
+ if (edev->pcie_cap && edev->aer_cap) {
+ eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, &aer_capctl);
+ aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
+ eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
+ 4, aer_capctl);
+ }
+
+ return 0;
+}
+
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -1485,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
if (!edev)
return -EEXIST;
- phb = edev->phb->private_data;
- ret = opal_pci_reinit(phb->opal_id,
- OPAL_REINIT_PCI_DEV, edev->config_addr);
+ /*
+ * We have to restore the PCI config space after reset since the
+ * firmware can't see SRIOV VFs.
+ *
+ * FIXME: The MPS, error routing rules, timeout setting are worthy
+ * to be exported by firmware in extendible way.
+ */
+ if (edev->physfn) {
+ ret = pnv_eeh_restore_vf_config(pdn);
+ } else {
+ phb = edev->phb->private_data;
+ ret = opal_pci_reinit(phb->opal_id,
+ OPAL_REINIT_PCI_DEV, edev->config_addr);
+ }
+
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
__func__, edev->config_addr, ret);
@@ -1516,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = {
.restore_config = pnv_eeh_restore_config
};
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ int parent_mps;
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /* Synchronize MPS for VF and PF */
+ parent_mps = pcie_get_mps(pdev->physfn);
+ if ((128 << pdev->pcie_mpss) >= parent_mps)
+ pcie_set_mps(pdev, parent_mps);
+ pdn->mps = pcie_get_mps(pdev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps);
+#endif /* CONFIG_PCI_IOV */
+
/**
* eeh_powernv_init - Register platform dependent EEH operations
*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 15bfbcd5debc..fcc8b6861b63 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -35,9 +35,9 @@ int pnv_save_sprs_for_winkle(void)
int rc;
/*
- * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
* all cpus at boot. Get these reg values of current cpu and use the
- * same accross all cpus.
+ * same across all cpus.
*/
uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
uint64_t hid0_val = mfspr(SPRN_HID0);
@@ -185,7 +185,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
* 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have atleast one online thread
+ * 2. Sending ipi to all the cores which have at least one online thread
* 3. Patching out the call to 'apply' workaround in fastsleep entry
* path
* There is no need to send ipi to cores which have all threads
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index e85aa900f5c0..7229acd9bb3a 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -278,7 +278,7 @@ static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
/*
* Enable/disable bypass mode on the NPU. The NPU only supports one
- * window per link, so bypass needs to be explicity enabled or
+ * window per link, so bypass needs to be explicitly enabled or
* disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
* active at the same time.
*/
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 44ed78af1a0d..39d6ff9e5630 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -31,26 +31,25 @@ struct memcons {
__be32 in_cons;
};
-static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *to,
- loff_t pos, size_t count)
+static struct memcons *opal_memcons = NULL;
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
{
- struct memcons *mc = bin_attr->private;
const char *conbuf;
ssize_t ret;
size_t first_read = 0;
uint32_t out_pos, avail;
- if (!mc)
+ if (!opal_memcons)
return -ENODEV;
- out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
+ out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos));
/* Now we've read out_pos, put a barrier in before reading the new
* data it points to in conbuf. */
smp_rmb();
- conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+ conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys));
/* When the buffer has wrapped, read from the out_pos marker to the end
* of the buffer, and then read the remaining data as in the un-wrapped
@@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
if (out_pos & MEMCONS_OUT_POS_WRAP) {
out_pos &= MEMCONS_OUT_POS_MASK;
- avail = be32_to_cpu(mc->obuf_size) - out_pos;
+ avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos;
ret = memory_read_from_buffer(to, count, &pos,
conbuf + out_pos, avail);
@@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
}
/* Sanity check. The firmware should not do this to us. */
- if (out_pos > be32_to_cpu(mc->obuf_size)) {
+ if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) {
pr_err("OPAL: memory console corruption. Aborting read.\n");
return -EINVAL;
}
@@ -91,6 +90,13 @@ out:
return ret;
}
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return opal_msglog_copy(to, pos, count);
+}
+
static struct bin_attribute opal_msglog_attr = {
.attr = {.name = "msglog", .mode = 0444},
.read = opal_msglog_read
@@ -117,7 +123,15 @@ void __init opal_msglog_init(void)
return;
}
- opal_msglog_attr.private = mc;
+ opal_memcons = mc;
+}
+
+void __init opal_msglog_sysfs_init(void)
+{
+ if (!opal_memcons) {
+ pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n");
+ return;
+ }
if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
pr_warn("OPAL: sysfs file creation failed\n");
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4e0da5af94a1..0256d0729252 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -724,6 +724,9 @@ static int __init opal_init(void)
of_node_put(leds);
}
+ /* Initialise OPAL message log interface */
+ opal_msglog_init();
+
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
@@ -739,8 +742,8 @@ static int __init opal_init(void)
opal_platform_dump_init();
/* Setup system parameters interface */
opal_sys_param_init();
- /* Setup message log interface. */
- opal_msglog_init();
+ /* Setup message log sysfs interface. */
+ opal_msglog_sysfs_init();
}
/* Initialize platform devices: IPMI backend, PRD & flash interface */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 573ae1994097..c5baaf3cc4e5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
/*
* The actual IOV BAR range is determined by the start address
* and the actual size for num_vfs VFs BAR. This check is to
@@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
res2 = *res;
res->start += size * offset;
@@ -1196,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void)
}
#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pci_dn *pdn;
int i, j;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
phb = hose->private_data;
pdn = pci_get_pdn(pdev);
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++) {
- if (pdn->m64_wins[i][j] == IODA_INVALID_M64)
+ for (j = 0; j < m64_bars; j++) {
+ if (pdn->m64_map[j][i] == IODA_INVALID_M64)
continue;
opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0);
- clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc);
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
+ clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
+ pdn->m64_map[j][i] = IODA_INVALID_M64;
}
+ kfree(pdn->m64_map);
return 0;
}
@@ -1235,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
int total_vfs;
resource_size_t size, start;
int pe_num;
- int vf_groups;
- int vf_per_group;
+ int m64_bars;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1244,29 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
total_vfs = pci_sriov_get_totalvfs(pdev);
- /* Initialize the m64_wins to IODA_INVALID_M64 */
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < M64_PER_IOV; j++)
- pdn->m64_wins[i][j] = IODA_INVALID_M64;
+ if (pdn->m64_single_mode)
+ m64_bars = num_vfs;
+ else
+ m64_bars = 1;
+
+ pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL);
+ if (!pdn->m64_map)
+ return -ENOMEM;
+ /* Initialize the m64_map to IODA_INVALID_M64 */
+ for (i = 0; i < m64_bars ; i++)
+ for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
+ pdn->m64_map[i][j] = IODA_INVALID_M64;
- if (pdn->m64_per_iov == M64_PER_IOV) {
- vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV;
- vf_per_group = (num_vfs <= M64_PER_IOV)? 1:
- roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
- } else {
- vf_groups = 1;
- vf_per_group = 1;
- }
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || !res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags))
- continue;
-
- for (j = 0; j < vf_groups; j++) {
+ for (j = 0; j < m64_bars; j++) {
do {
win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
phb->ioda.m64_bar_idx + 1, 0);
@@ -1275,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
- pdn->m64_wins[i][j] = win;
+ pdn->m64_map[j][i] = win;
- if (pdn->m64_per_iov == M64_PER_IOV) {
+ if (pdn->m64_single_mode) {
size = pci_iov_resource_size(pdev,
PCI_IOV_RESOURCES + i);
- size = size * vf_per_group;
start = res->start + size * j;
} else {
size = resource_size(res);
@@ -1288,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
}
/* Map the M64 here */
- if (pdn->m64_per_iov == M64_PER_IOV) {
- pe_num = pdn->offset + j;
+ if (pdn->m64_single_mode) {
+ pe_num = pdn->pe_num_map[j];
rc = opal_pci_map_pe_mmio_window(phb->opal_id,
pe_num, OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j], 0);
+ pdn->m64_map[j][i], 0);
}
rc = opal_pci_set_phb_mem_window(phb->opal_id,
OPAL_M64_WINDOW_TYPE,
- pdn->m64_wins[i][j],
+ pdn->m64_map[j][i],
start,
0, /* unused */
size);
@@ -1309,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
goto m64_failed;
}
- if (pdn->m64_per_iov == M64_PER_IOV)
+ if (pdn->m64_single_mode)
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
else
rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1);
+ OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
if (rc != OPAL_SUCCESS) {
dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
@@ -1326,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
return -EBUSY;
}
@@ -1353,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
}
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
{
struct pci_bus *bus;
struct pci_controller *hose;
struct pnv_phb *phb;
struct pnv_ioda_pe *pe, *pe_n;
struct pci_dn *pdn;
- u16 vf_index;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1371,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
if (!pdev->is_physfn)
return;
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++)
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++)
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++){
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_REMOVE_PE_FROM_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
-
list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
if (pe->parent_dev != pdev)
continue;
@@ -1424,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
struct pnv_phb *phb;
struct pci_dn *pdn;
struct pci_sriov *iov;
- u16 num_vfs;
+ u16 num_vfs, i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1434,18 +1399,25 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
num_vfs = pdn->num_vfs;
/* Release VF PEs */
- pnv_ioda_release_vf_PE(pdev, num_vfs);
+ pnv_ioda_release_vf_PE(pdev);
if (phb->type == PNV_PHB_IODA2) {
- if (pdn->m64_per_iov == 1)
- pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+ if (!pdn->m64_single_mode)
+ pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
/* Release M64 windows */
- pnv_pci_vf_release_m64(pdev);
+ pnv_pci_vf_release_m64(pdev, num_vfs);
/* Release PE numbers */
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
}
}
@@ -1460,7 +1432,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
int pe_num;
u16 vf_index;
struct pci_dn *pdn;
- int64_t rc;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1472,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
/* Reserve PE for each VF */
for (vf_index = 0; vf_index < num_vfs; vf_index++) {
- pe_num = pdn->offset + vf_index;
+ if (pdn->m64_single_mode)
+ pe_num = pdn->pe_num_map[vf_index];
+ else
+ pe_num = *pdn->pe_num_map + vf_index;
pe = &phb->ioda.pe_array[pe_num];
pe->pe_number = pe_num;
@@ -1505,37 +1479,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
pnv_pci_ioda2_setup_dma_pe(phb, pe);
}
-
- if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) {
- int vf_group;
- int vf_per_group;
- int vf_index1;
-
- vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
-
- for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
- for (vf_index = vf_group * vf_per_group;
- vf_index < (vf_group + 1) * vf_per_group &&
- vf_index < num_vfs;
- vf_index++) {
- for (vf_index1 = vf_group * vf_per_group;
- vf_index1 < (vf_group + 1) * vf_per_group &&
- vf_index1 < num_vfs;
- vf_index1++) {
-
- rc = opal_pci_set_peltv(phb->opal_id,
- pdn->offset + vf_index,
- pdn->offset + vf_index1,
- OPAL_ADD_PE_TO_DOMAIN);
-
- if (rc)
- dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n",
- __func__,
- pdn->offset + vf_index1, rc);
- }
- }
- }
- }
}
int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
@@ -1545,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
struct pnv_phb *phb;
struct pci_dn *pdn;
int ret;
+ u16 i;
bus = pdev->bus;
hose = pci_bus_to_host(bus);
@@ -1552,20 +1496,59 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
pdn = pci_get_pdn(pdev);
if (phb->type == PNV_PHB_IODA2) {
+ if (!pdn->vfs_expanded) {
+ dev_info(&pdev->dev, "don't support this SRIOV device"
+ " with non 64bit-prefetchable IOV BAR\n");
+ return -ENOSPC;
+ }
+
+ /*
+ * When M64 BARs functions in Single PE mode, the number of VFs
+ * could be enabled must be less than the number of M64 BARs.
+ */
+ if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
+ dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
+ return -EBUSY;
+ }
+
+ /* Allocating pe_num_map */
+ if (pdn->m64_single_mode)
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs,
+ GFP_KERNEL);
+ else
+ pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
+
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ if (pdn->m64_single_mode)
+ for (i = 0; i < num_vfs; i++)
+ pdn->pe_num_map[i] = IODA_INVALID_PE;
+
/* Calculate available PE for required VFs */
- mutex_lock(&phb->ioda.pe_alloc_mutex);
- pdn->offset = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe,
- 0, num_vfs, 0);
- if (pdn->offset >= phb->ioda.total_pe) {
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb);
+ if (pdn->pe_num_map[i] == IODA_INVALID_PE) {
+ ret = -EBUSY;
+ goto m64_failed;
+ }
+ }
+ } else {
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ *pdn->pe_num_map = bitmap_find_next_zero_area(
+ phb->ioda.pe_alloc, phb->ioda.total_pe,
+ 0, num_vfs, 0);
+ if (*pdn->pe_num_map >= phb->ioda.total_pe) {
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+ kfree(pdn->pe_num_map);
+ return -EBUSY;
+ }
+ bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
mutex_unlock(&phb->ioda.pe_alloc_mutex);
- dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
- pdn->offset = 0;
- return -EBUSY;
}
- bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
pdn->num_vfs = num_vfs;
- mutex_unlock(&phb->ioda.pe_alloc_mutex);
/* Assign M64 window accordingly */
ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
@@ -1579,8 +1562,8 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
* the IOV BAR according to the PE# allocated to the VFs.
* Otherwise, the PE# for the VF will conflict with others.
*/
- if (pdn->m64_per_iov == 1) {
- ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+ if (!pdn->m64_single_mode) {
+ ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
if (ret)
goto m64_failed;
}
@@ -1592,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
return 0;
m64_failed:
- bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
- pdn->offset = 0;
+ if (pdn->m64_single_mode) {
+ for (i = 0; i < num_vfs; i++) {
+ if (pdn->pe_num_map[i] != IODA_INVALID_PE)
+ pnv_ioda_free_pe(phb, pdn->pe_num_map[i]);
+ }
+ } else
+ bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
+
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
return ret;
}
@@ -1612,8 +1603,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
/* Allocate PCI data */
add_dev_pci_data(pdev);
- pnv_pci_sriov_enable(pdev, num_vfs);
- return 0;
+ return pnv_pci_sriov_enable(pdev, num_vfs);
}
#endif /* CONFIG_PCI_IOV */
@@ -2851,45 +2841,58 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
#ifdef CONFIG_PCI_IOV
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ const resource_size_t gate = phb->ioda.m64_segsize >> 2;
struct resource *res;
int i;
- resource_size_t size;
+ resource_size_t size, total_vf_bar_sz;
struct pci_dn *pdn;
int mul, total_vfs;
if (!pdev->is_physfn || pdev->is_added)
return;
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
-
pdn = pci_get_pdn(pdev);
pdn->vfs_expanded = 0;
+ pdn->m64_single_mode = false;
total_vfs = pci_sriov_get_totalvfs(pdev);
- pdn->m64_per_iov = 1;
mul = phb->ioda.total_pe;
+ total_vf_bar_sz = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n",
+ dev_warn(&pdev->dev, "Don't support SR-IOV with"
+ " non M64 VF BAR%d: %pR. \n",
i, res);
- continue;
+ goto truncate_iov;
}
- size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ total_vf_bar_sz += pci_iov_resource_size(pdev,
+ i + PCI_IOV_RESOURCES);
- /* bigger than 64M */
- if (size > (1 << 26)) {
- dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n",
- i, res);
- pdn->m64_per_iov = M64_PER_IOV;
+ /*
+ * If bigger than quarter of M64 segment size, just round up
+ * power of two.
+ *
+ * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
+ * with other devices, IOV BAR size is expanded to be
+ * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64
+ * segment size , the expanded size would equal to half of the
+ * whole M64 space size, which will exhaust the M64 Space and
+ * limit the system flexibility. This is a design decision to
+ * set the boundary to quarter of the M64 segment size.
+ */
+ if (total_vf_bar_sz > gate) {
mul = roundup_pow_of_two(total_vfs);
+ dev_info(&pdev->dev,
+ "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
+ total_vf_bar_sz, gate, mul);
+ pdn->m64_single_mode = true;
break;
}
}
@@ -2898,20 +2901,31 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
res = &pdev->resource[i + PCI_IOV_RESOURCES];
if (!res->flags || res->parent)
continue;
- if (!pnv_pci_is_mem_pref_64(res->flags)) {
- dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n",
- i, res);
- continue;
- }
- dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+ /*
+ * On PHB3, the minimum size alignment of M64 BAR in single
+ * mode is 32MB.
+ */
+ if (pdn->m64_single_mode && (size < SZ_32M))
+ goto truncate_iov;
+ dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
res->end = res->start + size * mul - 1;
dev_dbg(&pdev->dev, " %pR\n", res);
dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
i, res, mul);
}
pdn->vfs_expanded = mul;
+
+ return;
+
+truncate_iov:
+ /* To save MMIO space, IOV BAR is truncated. */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ res->flags = 0;
+ res->end = res->start - 1;
+ }
}
#endif /* CONFIG_PCI_IOV */
@@ -3125,18 +3139,35 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
int resno)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
- resource_size_t align, iov_align;
-
- iov_align = resource_size(&pdev->resource[resno]);
- if (iov_align)
- return iov_align;
+ resource_size_t align;
+ /*
+ * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+ * SR-IOV. While from hardware perspective, the range mapped by M64
+ * BAR should be size aligned.
+ *
+ * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
+ * powernv-specific hardware restriction is gone. But if just use the
+ * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
+ * in one segment of M64 #15, which introduces the PE conflict between
+ * PF and VF. Based on this, the minimum alignment of an IOV BAR is
+ * m64_segsize.
+ *
+ * This function returns the total IOV BAR size if M64 BAR is in
+ * Shared PE mode or just VF BAR size if not.
+ * If the M64 BAR is in Single PE mode, return the VF BAR size or
+ * M64 segment size if IOV BAR size is less.
+ */
align = pci_iov_resource_size(pdev, resno);
- if (pdn->vfs_expanded)
- return pdn->vfs_expanded * align;
+ if (!pdn->vfs_expanded)
+ return align;
+ if (pdn->m64_single_mode)
+ return max(align, (resource_size_t)phb->ioda.m64_segsize);
- return align;
+ return pdn->vfs_expanded * align;
}
#endif /* CONFIG_PCI_IOV */
@@ -3180,6 +3211,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
deleted file mode 100644
index f2bdfea3b68d..000000000000
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Support PCI/PCIe on PowerNV platforms
- *
- * Currently supports only P5IOC2
- *
- * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/msi.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/msi_bitmap.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-#include <asm/iommu.h>
-#include <asm/tce.h>
-
-#include "powernv.h"
-#include "pci.h"
-
-/* For now, use a fixed amount of TCE memory for each p5ioc2
- * hub, 16M will do
- */
-#define P5IOC2_TCE_MEMORY 0x01000000
-
-#ifdef CONFIG_PCI_MSI
-static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg)
-{
- if (WARN_ON(!is_64))
- return -ENXIO;
- msg->data = hwirq - phb->msi_base;
- msg->address_hi = 0x10000000;
- msg->address_lo = 0;
-
- return 0;
-}
-
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
-{
- unsigned int count;
- const __be32 *prop = of_get_property(phb->hose->dn,
- "ibm,opal-msi-ranges", NULL);
- if (!prop)
- return;
-
- /* Don't do MSI's on p5ioc2 PCI-X are they are not properly
- * verified in HW
- */
- if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
- return;
- phb->msi_base = be32_to_cpup(prop);
- count = be32_to_cpup(prop + 1);
- if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
- pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
- phb->hose->global_number);
- return;
- }
- phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
- phb->msi32_support = 0;
- pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
- count, phb->msi_base);
-}
-#else
-static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
-#endif /* CONFIG_PCI_MSI */
-
-static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
- .set = pnv_tce_build,
-#ifdef CONFIG_IOMMU_API
- .exchange = pnv_tce_xchg,
-#endif
- .clear = pnv_tce_free,
- .get = pnv_tce_get,
-};
-
-static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
- struct pci_dev *pdev)
-{
- struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
-
- if (!tbl->it_map) {
- tbl->it_ops = &pnv_p5ioc2_iommu_ops;
- iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(&phb->p5ioc2.table_group,
- pci_domain_nr(phb->hose->bus), phb->opal_id);
- INIT_LIST_HEAD_RCU(&tbl->it_group_list);
- pnv_pci_link_table_and_group(phb->hose->node, 0,
- tbl, &phb->p5ioc2.table_group);
- }
-
- set_iommu_table_base(&pdev->dev, tbl);
- iommu_add_device(&pdev->dev);
-}
-
-static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
-#ifdef CONFIG_PCI_MSI
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
-};
-
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
- void *tce_mem, u64 tce_size)
-{
- struct pnv_phb *phb;
- const __be64 *prop64;
- u64 phb_id;
- int64_t rc;
- static int primary = 1;
- struct iommu_table_group *table_group;
- struct iommu_table *tbl;
-
- pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-phbid\" property !\n");
- return;
- }
- phb_id = be64_to_cpup(prop64);
- pr_devel(" PHB-ID : 0x%016llx\n", phb_id);
- pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem));
- pr_devel(" TCE SZ : 0x%016llx\n", tce_size);
-
- rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
- phb->hose = pcibios_alloc_controller(np);
- if (!phb->hose) {
- pr_err(" Failed to allocate PCI controller\n");
- return;
- }
-
- spin_lock_init(&phb->lock);
- phb->hose->first_busno = 0;
- phb->hose->last_busno = 0xff;
- phb->hose->private_data = phb;
- phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
- phb->hub_id = hub_id;
- phb->opal_id = phb_id;
- phb->type = PNV_PHB_P5IOC2;
- phb->model = PNV_PHB_MODEL_P5IOC2;
-
- phb->regs = of_iomap(np, 0);
-
- if (phb->regs == NULL)
- pr_err(" Failed to map registers !\n");
- else {
- pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100));
- pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0));
- pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0));
- pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0));
- pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190));
- pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0));
- pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0));
- pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0));
- pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0));
- pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0));
- pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0));
- }
-
- /* Interpret the "ranges" property */
- /* This also maps the I/O region and sets isa_io/mem_base */
- pci_process_bridge_OF_ranges(phb->hose, np, primary);
- primary = 0;
-
- phb->hose->ops = &pnv_pci_ops;
-
- /* Setup MSI support */
- pnv_pci_init_p5ioc2_msis(phb);
-
- /* Setup TCEs */
- phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
- pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
- tce_mem, tce_size, 0,
- IOMMU_PAGE_SHIFT_4K);
- /*
- * We do not allocate iommu_table as we do not support
- * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
- * should not be called for phb->p5ioc2.table_group.tables[0] ever.
- */
- tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
- table_group = &phb->p5ioc2.table_group;
- table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
- table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
-}
-
-void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
-{
- struct device_node *phbn;
- const __be64 *prop64;
- u64 hub_id;
- void *tce_mem;
- uint64_t tce_per_phb;
- int64_t rc;
- int phb_count = 0;
-
- pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
-
- prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-hubid\" property !\n");
- return;
- }
- hub_id = be64_to_cpup(prop64);
- pr_info(" HUB-ID : 0x%016llx\n", hub_id);
-
- /* Count child PHBs and calculate TCE space per PHB */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
- phb_count++;
- }
-
- if (phb_count <= 0) {
- pr_info(" No PHBs for Hub %s\n", np->full_name);
- return;
- }
-
- tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
- pr_info(" Allocating %lld MB of TCE memory per PHB\n",
- tce_per_phb >> 20);
-
- /* Currently allocate 16M of TCE memory for every Hub
- *
- * XXX TODO: Make it chip local if possible
- */
- tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
- pr_debug(" TCE : 0x%016lx..0x%016lx\n",
- __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
- rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
- P5IOC2_TCE_MEMORY);
- if (rc != OPAL_SUCCESS) {
- pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
- return;
- }
-
- /* Initialize PHBs */
- for_each_child_of_node(np, phbn) {
- if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
- of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
- pnv_pci_init_p5ioc2_phb(phbn, hub_id,
- tce_mem, tce_per_phb);
- tce_mem += tce_per_phb;
- }
- }
-}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 2f55c86df703..73c8dc2a353f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
*/
pe_no = pdn->pe_number;
if (pe_no == IODA_INVALID_PE) {
- if (phb->type == PNV_PHB_P5IOC2)
- pe_no = 0;
- else
- pe_no = phb->ioda.reserved_pe;
+ pe_no = phb->ioda.reserved_pe;
}
/*
@@ -599,6 +596,9 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
long i;
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
+
for (i = 0; i < npages; i++) {
unsigned long newtce = proto_tce |
((rpn + i) << tbl->it_page_shift);
@@ -620,6 +620,9 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
*hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
*direction = iommu_tce_direction(oldtce);
@@ -760,6 +763,26 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
phb->dma_dev_setup(phb, pdev);
}
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
+
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
+}
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
@@ -779,7 +802,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
void __init pnv_pci_init(void)
{
struct device_node *np;
- bool found_ioda = false;
pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
@@ -787,20 +809,11 @@ void __init pnv_pci_init(void)
if (!firmware_has_feature(FW_FEATURE_OPAL))
return;
- /* Look for IODA IO-Hubs. We don't support mixing IODA
- * and p5ioc2 due to the need to change some global
- * probing flags
- */
+ /* Look for IODA IO-Hubs. */
for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
pnv_pci_init_ioda_hub(np);
- found_ioda = true;
}
- /* Look for p5ioc2 IO-Hubs */
- if (!found_ioda)
- for_each_compatible_node(np, NULL, "ibm,p5ioc2")
- pnv_pci_init_p5ioc2_hub(np);
-
/* Look for ioda2 built-in PHB3's */
for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
pnv_pci_init_ioda2_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 7f56313e8d72..3f814f382b2e 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -4,16 +4,14 @@
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_P5IOC2 = 0,
- PNV_PHB_IODA1 = 1,
- PNV_PHB_IODA2 = 2,
- PNV_PHB_NPU = 3,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU = 2,
};
/* Precise PHB model for error management */
enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
- PNV_PHB_MODEL_P5IOC2,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
PNV_PHB_MODEL_NPU,
@@ -121,81 +119,74 @@ struct pnv_phb {
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
- union {
- struct {
- struct iommu_table iommu_table;
- struct iommu_table_group table_group;
- } p5ioc2;
-
- struct {
- /* Global bridge info */
- unsigned int total_pe;
- unsigned int reserved_pe;
-
- /* 32-bit MMIO window */
- unsigned int m32_size;
- unsigned int m32_segsize;
- unsigned int m32_pci_base;
-
- /* 64-bit MMIO window */
- unsigned int m64_bar_idx;
- unsigned long m64_size;
- unsigned long m64_segsize;
- unsigned long m64_base;
- unsigned long m64_bar_alloc;
-
- /* IO ports */
- unsigned int io_size;
- unsigned int io_segsize;
- unsigned int io_pci_base;
-
- /* PE allocation bitmap */
- unsigned long *pe_alloc;
- /* PE allocation mutex */
- struct mutex pe_alloc_mutex;
-
- /* M32 & IO segment maps */
- unsigned int *m32_segmap;
- unsigned int *io_segmap;
- struct pnv_ioda_pe *pe_array;
-
- /* IRQ chip */
- int irq_chip_init;
- struct irq_chip irq_chip;
-
- /* Sorted list of used PE's based
- * on the sequence of creation
- */
- struct list_head pe_list;
- struct mutex pe_list_mutex;
-
- /* Reverse map of PEs, will have to extend if
- * we are to support more than 256 PEs, indexed
- * bus { bus, devfn }
- */
- unsigned char pe_rmap[0x10000];
-
- /* 32-bit TCE tables allocation */
- unsigned long tce32_count;
-
- /* Total "weight" for the sake of DMA resources
- * allocation
- */
- unsigned int dma_weight;
- unsigned int dma_pe_count;
-
- /* Sorted list of used PE's, sorted at
- * boot for resource allocation purposes
- */
- struct list_head pe_dma_list;
-
- /* TCE cache invalidate registers (physical and
- * remapped)
- */
- phys_addr_t tce_inval_reg_phys;
- __be64 __iomem *tce_inval_reg;
- } ioda;
- };
+ struct {
+ /* Global bridge info */
+ unsigned int total_pe;
+ unsigned int reserved_pe;
+
+ /* 32-bit MMIO window */
+ unsigned int m32_size;
+ unsigned int m32_segsize;
+ unsigned int m32_pci_base;
+
+ /* 64-bit MMIO window */
+ unsigned int m64_bar_idx;
+ unsigned long m64_size;
+ unsigned long m64_segsize;
+ unsigned long m64_base;
+ unsigned long m64_bar_alloc;
+
+ /* IO ports */
+ unsigned int io_size;
+ unsigned int io_segsize;
+ unsigned int io_pci_base;
+
+ /* PE allocation bitmap */
+ unsigned long *pe_alloc;
+ /* PE allocation mutex */
+ struct mutex pe_alloc_mutex;
+
+ /* M32 & IO segment maps */
+ unsigned int *m32_segmap;
+ unsigned int *io_segmap;
+ struct pnv_ioda_pe *pe_array;
+
+ /* IRQ chip */
+ int irq_chip_init;
+ struct irq_chip irq_chip;
+
+ /* Sorted list of used PE's based
+ * on the sequence of creation
+ */
+ struct list_head pe_list;
+ struct mutex pe_list_mutex;
+
+ /* Reverse map of PEs, will have to extend if
+ * we are to support more than 256 PEs, indexed
+ * bus { bus, devfn }
+ */
+ unsigned char pe_rmap[0x10000];
+
+ /* 32-bit TCE tables allocation */
+ unsigned long tce32_count;
+
+ /* Total "weight" for the sake of DMA resources
+ * allocation
+ */
+ unsigned int dma_weight;
+ unsigned int dma_pe_count;
+
+ /* Sorted list of used PE's, sorted at
+ * boot for resource allocation purposes
+ */
+ struct list_head pe_dma_list;
+
+ /* TCE cache invalidate registers (physical and
+ * remapped)
+ */
+ phys_addr_t tce_inval_reg_phys;
+ __be64 __iomem *tce_inval_reg;
+ } ioda;
/* PHB and hub status structure */
union {
@@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
-extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
@@ -242,6 +232,7 @@ extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 503a73f59359..0babef11136f 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -407,7 +407,7 @@ static DEVICE_ATTR(subcores_per_core, 0644,
static int subcore_init(void)
{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ if (!cpu_has_feature(CPU_FTR_SUBCORE))
return 0;
/*
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
index 20b46a19a48f..09bf24d616a5 100644
--- a/arch/powerpc/platforms/ps3/gelic_udbg.c
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -13,6 +13,12 @@
*
*/
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
#include <asm/io.h>
#include <asm/udbg.h>
#include <asm/lv1call.h>
@@ -56,39 +62,8 @@ struct debug_block {
u8 pkt[1520];
} __packed;
-struct ethhdr {
- u8 dest[6];
- u8 src[6];
- u16 type;
-} __packed;
-
-struct vlantag {
- u16 vlan;
- u16 subtype;
-} __packed;
-
-struct iphdr {
- u8 ver_len;
- u8 dscp_ecn;
- u16 total_length;
- u16 ident;
- u16 frag_off_flags;
- u8 ttl;
- u8 proto;
- u16 checksum;
- u32 src;
- u32 dest;
-} __packed;
-
-struct udphdr {
- u16 src;
- u16 dest;
- u16 len;
- u16 checksum;
-} __packed;
-
static __iomem struct ethhdr *h_eth;
-static __iomem struct vlantag *h_vlan;
+static __iomem struct vlan_hdr *h_vlan;
static __iomem struct iphdr *h_ip;
static __iomem struct udphdr *h_udp;
@@ -173,8 +148,8 @@ static void gelic_debug_init(void)
h_eth = (struct ethhdr *)dbg.pkt;
- memset(&h_eth->dest, 0xff, 6);
- memcpy(&h_eth->src, &mac, 6);
+ eth_broadcast_addr(h_eth->h_dest);
+ memcpy(&h_eth->h_source, &mac, ETH_ALEN);
header_size = sizeof(struct ethhdr);
@@ -183,28 +158,29 @@ static void gelic_debug_init(void)
GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0,
&vlan_id, &v2);
if (!result) {
- h_eth->type = 0x8100;
+ h_eth->h_proto= ETH_P_8021Q;
- header_size += sizeof(struct vlantag);
- h_vlan = (struct vlantag *)(h_eth + 1);
- h_vlan->vlan = vlan_id;
- h_vlan->subtype = 0x0800;
+ header_size += sizeof(struct vlan_hdr);
+ h_vlan = (struct vlan_hdr *)(h_eth + 1);
+ h_vlan->h_vlan_TCI = vlan_id;
+ h_vlan->h_vlan_encapsulated_proto = ETH_P_IP;
h_ip = (struct iphdr *)(h_vlan + 1);
} else {
- h_eth->type = 0x0800;
+ h_eth->h_proto= 0x0800;
h_ip = (struct iphdr *)(h_eth + 1);
}
header_size += sizeof(struct iphdr);
- h_ip->ver_len = 0x45;
+ h_ip->version = 4;
+ h_ip->ihl = 5;
h_ip->ttl = 10;
- h_ip->proto = 0x11;
- h_ip->src = 0x00000000;
- h_ip->dest = 0xffffffff;
+ h_ip->protocol = 0x11;
+ h_ip->saddr = 0x00000000;
+ h_ip->daddr = 0xffffffff;
header_size += sizeof(struct udphdr);
h_udp = (struct udphdr *)(h_ip + 1);
- h_udp->src = GELIC_DEBUG_PORT;
+ h_udp->source = GELIC_DEBUG_PORT;
h_udp->dest = GELIC_DEBUG_PORT;
pmsgc = pmsg = (char *)(h_udp + 1);
@@ -225,16 +201,16 @@ static void gelic_sendbuf(int msgsize)
int i;
dbg.descr.buf_size = header_size + msgsize;
- h_ip->total_length = msgsize + sizeof(struct udphdr) +
+ h_ip->tot_len = msgsize + sizeof(struct udphdr) +
sizeof(struct iphdr);
h_udp->len = msgsize + sizeof(struct udphdr);
- h_ip->checksum = 0;
+ h_ip->check = 0;
sum = 0;
p = (u16 *)h_ip;
for (i = 0; i < 5; i++)
sum += *p++;
- h_ip->checksum = ~(sum + (sum >> 16));
+ h_ip->check = ~(sum + (sum >> 16));
dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM |
GELIC_DESCR_TX_DMA_FRAME_TAIL;
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 638c4060938e..b831638e6f4a 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -78,7 +78,7 @@ struct ps3_bmp {
/**
* struct ps3_private - a per cpu data structure
* @bmp: ps3_bmp structure
- * @bmp_lock: Syncronize access to bmp.
+ * @bmp_lock: Synchronize access to bmp.
* @ipi_debug_brk_mask: Mask for debug break IPIs
* @ppe_id: HV logical_ppe_id
* @thread_id: HV thread_id
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index 849b29b3e9ae..74da18de853a 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -31,7 +31,7 @@
#include <asm/plpar_wrappers.h>
/**
- * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter
* @vtermno: The vtermno or unit_address of the adapter from which to fetch the
* data.
* @buf: The character buffer into which to put the character data fetched from
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 477290ad855e..2415a0d31f8f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -505,8 +505,8 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
}
#endif
-static void pSeries_lpar_hpte_removebolted(unsigned long ea,
- int psize, int ssize)
+static int pSeries_lpar_hpte_removebolted(unsigned long ea,
+ int psize, int ssize)
{
unsigned long vpn;
unsigned long slot, vsid;
@@ -515,11 +515,14 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
vpn = hpt_vpn(ea, vsid, ssize);
slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
- BUG_ON(slot == -1);
+ if (slot == -1)
+ return -ENOENT;
+
/*
* lpar doesn't use the passed actual page size
*/
pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
+ return 0;
}
/*
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 36df46eaba24..6e944fc6e5f9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -515,7 +515,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
- /* By default, only probe PCI (can be overriden by rtas_pci) */
+ /* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
/* Find and initialize PCI host bridges */
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index c69e88e91459..85729f49764f 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -575,7 +575,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
/* use fsl_indirect_read_config for PCIe */
hose->ops = &fsl_indirect_pcie_ops;
- /* For PCIE read HEADER_TYPE to identify controler mode */
+ /* For PCIE read HEADER_TYPE to identify controller mode */
early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type);
if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
goto no_bridge;
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index b48197ae44d0..ffe0ee832768 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -570,7 +570,7 @@ int fsl_rio_port_write_init(struct fsl_rio_pw *pw)
out_be32(&pw->pw_regs->pwsr,
(RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
- /* Configure port write contoller for snooping enable all reporting,
+ /* Configure port write controller for snooping enable all reporting,
clear queue full */
out_be32(&pw->pw_regs->pwmr,
RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ);
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 6f99ed3967fd..aa2c186d3115 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -238,7 +238,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
/* init master interrupt controller */
outb(0x11, 0x20); /* Start init sequence */
outb(0x00, 0x21); /* Vector base */
- outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */
+ outb(0x04, 0x21); /* edge triggered, Cascade (slave) on IRQ2 */
outb(0x01, 0x21); /* Select 8086 mode */
/* init slave interrupt controller */
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 2a0452e364ba..afe3c7cd395d 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -2,7 +2,7 @@
* arch/powerpc/kernel/mpic.c
*
* Driver for interrupt controllers following the OpenPIC standard, the
- * common implementation beeing IBM's MPIC. This driver also can deal
+ * common implementation being IBM's MPIC. This driver also can deal
* with various broken implementations of this HW.
*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
@@ -1657,7 +1657,7 @@ void __init mpic_init(struct mpic *mpic)
}
}
- /* FSL mpic error interrupt intialization */
+ /* FSL mpic error interrupt initialization */
if (mpic->flags & MPIC_FSL_HAS_EIMR)
mpic_err_int_init(mpic, MPIC_FSL_ERR_INT);
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 07a8508cb7fa..942796fa4767 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -47,6 +47,9 @@
#include <asm/debug.h>
#include <asm/hw_breakpoint.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>
#include <asm/paca.h>
@@ -119,6 +122,16 @@ static void dump(void);
static void prdump(unsigned long, long);
static int ppc_inst_dump(unsigned long, long, int);
static void dump_log_buf(void);
+
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void);
+#else
+static inline void dump_opal_msglog(void)
+{
+ printf("Machine is not running OPAL firmware.\n");
+}
+#endif
+
static void backtrace(struct pt_regs *);
static void excprint(struct pt_regs *);
static void prregs(struct pt_regs *);
@@ -150,6 +163,7 @@ static int cpu_cmd(void);
static void csum(void);
static void bootcmds(void);
static void proccall(void);
+static void show_tasks(void);
void dump_segments(void);
static void symbol_lookup(void);
static void xmon_show_stack(unsigned long sp, unsigned long lr,
@@ -202,6 +216,10 @@ Commands:\n\
df dump float values\n\
dd dump double values\n\
dl dump the kernel log buffer\n"
+#ifdef CONFIG_PPC_POWERNV
+ "\
+ do dump the OPAL message log\n"
+#endif
#ifdef CONFIG_PPC64
"\
dp[#] dump paca for current cpu, or cpu #\n\
@@ -221,6 +239,7 @@ Commands:\n\
mz zero a block of memory\n\
mi show information about memory allocation\n\
p call a procedure\n\
+ P list processes/tasks\n\
r print registers\n\
s single step\n"
#ifdef CONFIG_SPU_BASE
@@ -233,7 +252,7 @@ Commands:\n\
" S print special registers\n\
t print backtrace\n\
x exit monitor and recover\n\
- X exit monitor and dont recover\n"
+ X exit monitor and don't recover\n"
#if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E)
" u dump segment table or SLB\n"
#elif defined(CONFIG_PPC_STD_MMU_32)
@@ -950,6 +969,9 @@ cmds(struct pt_regs *excp)
case 'p':
proccall();
break;
+ case 'P':
+ show_tasks();
+ break;
#ifdef CONFIG_PPC_STD_MMU
case 'u':
dump_segments();
@@ -2253,6 +2275,8 @@ dump(void)
last_cmd = "di\n";
} else if (c == 'l') {
dump_log_buf();
+ } else if (c == 'o') {
+ dump_opal_msglog();
} else if (c == 'r') {
scanhex(&ndump);
if (ndump == 0)
@@ -2395,6 +2419,45 @@ dump_log_buf(void)
catch_memory_errors = 0;
}
+#ifdef CONFIG_PPC_POWERNV
+static void dump_opal_msglog(void)
+{
+ unsigned char buf[128];
+ ssize_t res;
+ loff_t pos = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ printf("Machine is not running OPAL firmware.\n");
+ return;
+ }
+
+ if (setjmp(bus_error_jmp) != 0) {
+ printf("Error dumping OPAL msglog!\n");
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ xmon_start_pagination();
+ while ((res = opal_msglog_copy(buf, pos, sizeof(buf) - 1))) {
+ if (res < 0) {
+ printf("Error dumping OPAL msglog! Error: %zd\n", res);
+ break;
+ }
+ buf[res] = '\0';
+ printf("%s", buf);
+ pos += res;
+ }
+ xmon_end_pagination();
+
+ sync();
+ /* wait a little while to see if we get a machine check */
+ __delay(200);
+ catch_memory_errors = 0;
+}
+#endif
+
/*
* Memory operations - move, set, print differences
*/
@@ -2508,6 +2571,61 @@ memzcan(void)
printf("%.8x\n", a - mskip);
}
+static void show_task(struct task_struct *tsk)
+{
+ char state;
+
+ /*
+ * Cloned from kdb_task_state_char(), which is not entirely
+ * appropriate for calling from xmon. This could be moved
+ * to a common, generic, routine used by both.
+ */
+ state = (tsk->state == 0) ? 'R' :
+ (tsk->state < 0) ? 'U' :
+ (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (tsk->state & TASK_STOPPED) ? 'T' :
+ (tsk->state & TASK_TRACED) ? 'C' :
+ (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
+ (tsk->exit_state & EXIT_DEAD) ? 'E' :
+ (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+
+ printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+ tsk->thread.ksp,
+ tsk->pid, tsk->parent->pid,
+ state, task_thread_info(tsk)->cpu,
+ tsk->comm);
+}
+
+static void show_tasks(void)
+{
+ unsigned long tskv;
+ struct task_struct *tsk = NULL;
+
+ printf(" task_struct ->thread.ksp PID PPID S P CMD\n");
+
+ if (scanhex(&tskv))
+ tsk = (struct task_struct *)tskv;
+
+ if (setjmp(bus_error_jmp) != 0) {
+ catch_memory_errors = 0;
+ printf("*** Error dumping task %p\n", tsk);
+ return;
+ }
+
+ catch_memory_errors = 1;
+ sync();
+
+ if (tsk)
+ show_task(tsk);
+ else
+ for_each_process(tsk)
+ show_task(tsk);
+
+ sync();
+ __delay(200);
+ catch_memory_errors = 0;
+}
+
static void proccall(void)
{
unsigned long args[8];