diff options
Diffstat (limited to 'arch/powerpc')
33 files changed, 270 insertions, 1214 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1e6230bea09d..088dd2afcfe4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -118,28 +118,31 @@ config PPC # Please keep this list sorted alphabetically. # select ARCH_32BIT_OFF_T if PPC32 + select ARCH_ENABLE_MEMORY_HOTPLUG + select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_KCOV select ARCH_HAS_HUGEPD if HUGETLB_PAGE + select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMREMAP_COMPAT_ALIGN select ARCH_HAS_MMIOWB if PPC64 + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API - select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_MEMBARRIER_CALLBACKS - select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE - select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_KEEP_MEMBLOCK @@ -162,9 +165,8 @@ config PPC select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN - select DMA_OPS if PPC64 select DMA_OPS_BYPASS if PPC64 - select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES + select DMA_OPS if PPC64 select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT @@ -184,23 +186,22 @@ config PPC select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_TIME_NS select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 - select HAVE_ARCH_KGDB select HAVE_ARCH_KFENCE if PPC32 + select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS - select HAVE_C_RECORDMCOUNT - select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) - select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE @@ -214,10 +215,13 @@ config PPC select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE select HAVE_KERNEL_LZO if DEFAULT_UIMAGE @@ -229,25 +233,25 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP select HAVE_OPTPROBES select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select MMU_GATHER_RCU_TABLE_FREE - select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE + select HAVE_RSEQ select HAVE_SOFTIRQ_ON_OWN_STACK + select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) + select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING - select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_RSEQ + select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING + select MMU_GATHER_PAGE_SIZE + select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH @@ -420,11 +424,6 @@ config HIGHMEM source "kernel/Kconfig.hz" -config HUGETLB_PAGE_SIZE_VARIABLE - bool - depends on HUGETLB_PAGE && PPC_BOOK3S_64 - default y - config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE @@ -520,12 +519,6 @@ config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU -config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - -config ARCH_ENABLE_MEMORY_HOTREMOVE - def_bool y - config PPC64_SUPPORTS_MEMORY_FAILURE bool "Add support for memory hwpoison" depends on PPC_BOOK3S_64 @@ -705,9 +698,6 @@ config ARCH_SPARSEMEM_DEFAULT def_bool y depends on PPC_BOOK3S_64 -config SYS_SUPPORTS_HUGETLBFS - bool - config ILLEGAL_POINTER_VALUE hex # This is roughly half way between the top of user space and the bottom diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h index 30a31ad2123d..c0fbadb70b5d 100644 --- a/arch/powerpc/include/asm/fsl_pamu_stash.h +++ b/arch/powerpc/include/asm/fsl_pamu_stash.h @@ -7,6 +7,8 @@ #ifndef __FSL_PAMU_STASH_H #define __FSL_PAMU_STASH_H +struct iommu_domain; + /* cache stash targets */ enum pamu_stash_target { PAMU_ATTR_CACHE_L1 = 1, @@ -14,14 +16,6 @@ enum pamu_stash_target { PAMU_ATTR_CACHE_L3, }; -/* - * This attribute allows configuring stashig specific parameters - * in the PAMU hardware. - */ - -struct pamu_stash_attribute { - u32 cpu; /* cpu number */ - u32 cache; /* cache to stash to: L1,L2,L3 */ -}; +int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu); #endif /* __FSL_PAMU_STASH_H */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index bc76970b6ee5..debe8c4f7062 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -12,7 +12,7 @@ #ifdef __ASSEMBLY__ -/* Based off of objdump optput from glibc */ +/* Based off of objdump output from glibc */ #define MCOUNT_SAVE_FRAME \ stwu r1,-48(r1); \ @@ -52,7 +52,7 @@ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { - /* reloction of mcount call site is the same as the address */ + /* relocation of mcount call site is the same as the address */ return addr; } diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 273edd208ec5..f130783c8301 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -663,11 +663,6 @@ static inline void name at \ #define xlate_dev_mem_ptr(p) __va(p) /* - * Convert a virtual cached pointer to an uncached pointer - */ -#define xlate_dev_kmem_ptr(p) p - -/* * We don't do relaxed operations yet, at least not with this semantic */ #define readb_relaxed(addr) readb(addr) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index c58121508157..e6b53c6e21e3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -210,12 +210,12 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid); extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); -extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn); -extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn); -extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn); +extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); +extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); +extern bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn); extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_radix_flush_memslot(struct kvm *kvm, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 05fb00d37609..1e83359f286b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -56,13 +56,6 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER -extern int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end, - unsigned flags); -extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); -extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9531b1c1b190..5bf8ae9bb2cc 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -281,11 +281,10 @@ struct kvmppc_ops { const struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); - int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, - unsigned long end); - int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); - int (*test_age_hva)(struct kvm *kvm, unsigned long hva); - void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); + bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range); + bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); + bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); + bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index c76157237e22..6ea9001de9a9 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -28,9 +28,6 @@ extern struct device_node *opal_node; /* API functions */ int64_t opal_invalid_call(void); -int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf); -int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr, - uint64_t bdf); int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, uint64_t lpcr); int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn, diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index d2a2a14e56f9..74424c14515c 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -126,7 +126,6 @@ struct pci_controller { #endif /* CONFIG_PPC64 */ void *private_data; - struct npu *npu; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 6436f0b41539..d1f53260725c 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -119,11 +119,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose); #endif /* __KERNEL__ */ -extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); -extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); -extern int pnv_npu2_init(struct pci_controller *hose); -extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, - unsigned long msr); -extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev); - #endif /* __ASM_POWERPC_PCI_H */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index fab84024650c..3f35c8d20be7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -8,6 +8,7 @@ #include <linux/moduleloader.h> #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> #include <linux/uaccess.h> @@ -88,17 +89,22 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } -#ifdef MODULES_VADDR static __always_inline void * __module_alloc(unsigned long size, unsigned long start, unsigned long end) { - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); + /* + * Don't do huge page allocations for modules yet until more testing + * is done. STRICT_MODULE_RWX may require extra work to support this + * too. + */ + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, + VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, + NUMA_NO_NODE, __builtin_return_address(0)); } void *module_alloc(unsigned long size) { +#ifdef MODULES_VADDR unsigned long limit = (unsigned long)_etext - SZ_32M; void *ptr = NULL; @@ -112,5 +118,7 @@ void *module_alloc(unsigned long size) ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); return ptr; -} +#else + return __module_alloc(size, VMALLOC_START, VMALLOC_END); #endif +} diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f66f9c9b9d6c..2e68fbb57cc6 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -523,3 +523,6 @@ 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr 443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index f9eb49e23ec8..5056e175ca2c 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -951,6 +951,93 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) } /** + * add_node_props - Reads node properties from device node structure and add + * them to fdt. + * @fdt: Flattened device tree of the kernel + * @node_offset: offset of the node to add a property at + * @dn: device node pointer + * + * Returns 0 on success, negative errno on error. + */ +static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) +{ + int ret = 0; + struct property *pp; + + if (!dn) + return -EINVAL; + + for_each_property_of_node(dn, pp) { + ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); + if (ret < 0) { + pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); + return ret; + } + } + return ret; +} + +/** + * update_cpus_node - Update cpus node of flattened device tree using of_root + * device node. + * @fdt: Flattened device tree of the kernel. + * + * Returns 0 on success, negative errno on error. + */ +static int update_cpus_node(void *fdt) +{ + struct device_node *cpus_node, *dn; + int cpus_offset, cpus_subnode_offset, ret = 0; + + cpus_offset = fdt_path_offset(fdt, "/cpus"); + if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { + pr_err("Malformed device tree: error reading /cpus node: %s\n", + fdt_strerror(cpus_offset)); + return cpus_offset; + } + + if (cpus_offset > 0) { + ret = fdt_del_node(fdt, cpus_offset); + if (ret < 0) { + pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); + return -EINVAL; + } + } + + /* Add cpus node to fdt */ + cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); + if (cpus_offset < 0) { + pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); + return -EINVAL; + } + + /* Add cpus node properties */ + cpus_node = of_find_node_by_path("/cpus"); + ret = add_node_props(fdt, cpus_offset, cpus_node); + of_node_put(cpus_node); + if (ret < 0) + return ret; + + /* Loop through all subnodes of cpus and add them to fdt */ + for_each_node_by_type(dn, "cpu") { + cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); + if (cpus_subnode_offset < 0) { + pr_err("Unable to add %s subnode: %s\n", dn->full_name, + fdt_strerror(cpus_subnode_offset)); + ret = cpus_subnode_offset; + goto out; + } + + ret = add_node_props(fdt, cpus_subnode_offset, dn); + if (ret < 0) + goto out; + } +out: + of_node_put(dn); + return ret; +} + +/** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. * @image: kexec image being loaded. @@ -1006,6 +1093,11 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, } } + /* Update cpus nodes information to account hotplug CPUs. */ + ret = update_cpus_node(fdt); + if (ret < 0) + goto out; + /* Update memory reserve map */ ret = get_reserved_memory_ranges(&rmem); if (ret) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 44bf567b6589..2b691f4d1f26 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -834,26 +834,24 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, - unsigned flags) +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); + return kvm->arch.kvm_ops->unmap_gfn_range(kvm, range); } -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm->arch.kvm_ops->age_hva(kvm, start, end); + return kvm->arch.kvm_ops->age_gfn(kvm, range); } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - return kvm->arch.kvm_ops->test_age_hva(kvm, hva); + return kvm->arch.kvm_ops->test_age_gfn(kvm, range); } -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); - return 0; + return kvm->arch.kvm_ops->set_spte_gfn(kvm, range); } int kvmppc_core_init_vm(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 9b6323ec8e60..740e51def5a5 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -9,12 +9,10 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, struct kvm_memory_slot *memslot); -extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, - unsigned long end); -extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, - unsigned long end); -extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); -extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); +extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range); +extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); +extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); +extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bb6773594cf8..2d9193cd73be 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -752,51 +752,6 @@ void kvmppc_rmap_reset(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, srcu_idx); } -typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn); - -static int kvm_handle_hva_range(struct kvm *kvm, - unsigned long start, - unsigned long end, - hva_handler_fn handler) -{ - int ret; - int retval = 0; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) { - unsigned long hva_start, hva_end; - gfn_t gfn, gfn_end; - - hva_start = max(start, memslot->userspace_addr); - hva_end = min(end, memslot->userspace_addr + - (memslot->npages << PAGE_SHIFT)); - if (hva_start >= hva_end) - continue; - /* - * {gfn(page) | page intersects with [hva_start, hva_end)} = - * {gfn, gfn+1, ..., gfn_end-1}. - */ - gfn = hva_to_gfn_memslot(hva_start, memslot); - gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - - for (; gfn < gfn_end; ++gfn) { - ret = handler(kvm, memslot, gfn); - retval |= ret; - } - } - - return retval; -} - -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - hva_handler_fn handler) -{ - return kvm_handle_hva_range(kvm, hva, hva + 1, handler); -} - /* Must be called with both HPTE and rmap locked */ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, struct kvm_memory_slot *memslot, @@ -840,8 +795,8 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, } } -static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) { unsigned long i; __be64 *hptep; @@ -874,16 +829,21 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unlock_rmap(rmapp); __unlock_hpte(hptep, be64_to_cpu(hptep[0])); } - return 0; } -int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) +bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - hva_handler_fn handler; + gfn_t gfn; - handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; - kvm_handle_hva_range(kvm, start, end, handler); - return 0; + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_rmapp(kvm, range->slot, range->start); + } + + return false; } void kvmppc_core_flush_memslot_hv(struct kvm *kvm, @@ -913,8 +873,8 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, } } -static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) { struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; @@ -968,26 +928,34 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, return ret; } -int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) +bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - hva_handler_fn handler; + gfn_t gfn; + bool ret = false; - handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp; - return kvm_handle_hva_range(kvm, start, end, handler); + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_rmapp(kvm, range->slot, gfn); + } + + return ret; } -static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) { struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; unsigned long *hp; - int ret = 1; + bool ret = true; unsigned long *rmapp; rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; if (*rmapp & KVMPPC_RMAP_REFERENCED) - return 1; + return true; lock_rmap(rmapp); if (*rmapp & KVMPPC_RMAP_REFERENCED) @@ -1002,27 +970,33 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, goto out; } while ((i = j) != head); } - ret = 0; + ret = false; out: unlock_rmap(rmapp); return ret; } -int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) +bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - hva_handler_fn handler; + WARN_ON(range->start + 1 != range->end); - handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp; - return kvm_handle_hva(kvm, hva, handler); + if (kvm_is_radix(kvm)) + return kvm_test_age_radix(kvm, range->slot, range->start); + else + return kvm_test_age_rmapp(kvm, range->slot, range->start); } -void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) +bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - hva_handler_fn handler; + WARN_ON(range->start + 1 != range->end); + + if (kvm_is_radix(kvm)) + kvm_unmap_radix(kvm, range->slot, range->start); + else + kvm_unmap_rmapp(kvm, range->slot, range->start); - handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; - kvm_handle_hva(kvm, hva, handler); + return false; } static int vcpus_running(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e603de7ade52..d909c069363e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -993,8 +993,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, } /* Called with kvm->mmu_lock held */ -int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) { pte_t *ptep; unsigned long gpa = gfn << PAGE_SHIFT; @@ -1002,24 +1002,23 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); - return 0; + return; } ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; } /* Called with kvm->mmu_lock held */ -int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) { pte_t *ptep; unsigned long gpa = gfn << PAGE_SHIFT; unsigned int shift; - int ref = 0; + bool ref = false; unsigned long old, *rmapp; if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) @@ -1035,26 +1034,27 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0, old & PTE_RPN_MASK, 1UL << shift); - ref = 1; + ref = true; } return ref; } /* Called with kvm->mmu_lock held */ -int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn) +bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long gfn) + { pte_t *ptep; unsigned long gpa = gfn << PAGE_SHIFT; unsigned int shift; - int ref = 0; + bool ref = false; if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) - ref = 1; + ref = true; return ref; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4a532410e128..28a80d240b76 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4812,7 +4812,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) kvmhv_release_all_nested(kvm); kvmppc_rmap_reset(kvm); kvm->arch.process_table = 0; - /* Mutual exclusion with kvm_unmap_hva_range etc. */ + /* Mutual exclusion with kvm_unmap_gfn_range etc. */ spin_lock(&kvm->mmu_lock); kvm->arch.radix = 0; spin_unlock(&kvm->mmu_lock); @@ -4834,7 +4834,7 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) if (err) return err; kvmppc_rmap_reset(kvm); - /* Mutual exclusion with kvm_unmap_hva_range etc. */ + /* Mutual exclusion with kvm_unmap_gfn_range etc. */ spin_lock(&kvm->mmu_lock); kvm->arch.radix = 1; spin_unlock(&kvm->mmu_lock); @@ -5699,10 +5699,10 @@ static struct kvmppc_ops kvm_ops_hv = { .flush_memslot = kvmppc_core_flush_memslot_hv, .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, .commit_memory_region = kvmppc_core_commit_memory_region_hv, - .unmap_hva_range = kvm_unmap_hva_range_hv, - .age_hva = kvm_age_hva_hv, - .test_age_hva = kvm_test_age_hva_hv, - .set_spte_hva = kvm_set_spte_hva_hv, + .unmap_gfn_range = kvm_unmap_gfn_range_hv, + .age_gfn = kvm_age_gfn_hv, + .test_age_gfn = kvm_test_age_gfn_hv, + .set_spte_gfn = kvm_set_spte_gfn_hv, .free_memslot = kvmppc_core_free_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 913944dc3620..d7733b07f489 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -425,61 +425,39 @@ static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) } /************* MMU Notifiers *************/ -static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, - unsigned long end) +static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { long i; struct kvm_vcpu *vcpu; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) { - unsigned long hva_start, hva_end; - gfn_t gfn, gfn_end; + kvm_for_each_vcpu(i, vcpu, kvm) + kvmppc_mmu_pte_pflush(vcpu, range->start << PAGE_SHIFT, + range->end << PAGE_SHIFT); - hva_start = max(start, memslot->userspace_addr); - hva_end = min(end, memslot->userspace_addr + - (memslot->npages << PAGE_SHIFT)); - if (hva_start >= hva_end) - continue; - /* - * {gfn(page) | page intersects with [hva_start, hva_end)} = - * {gfn, gfn+1, ..., gfn_end-1}. - */ - gfn = hva_to_gfn_memslot(hva_start, memslot); - gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - kvm_for_each_vcpu(i, vcpu, kvm) - kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, - gfn_end << PAGE_SHIFT); - } + return false; } -static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, - unsigned long end) +static bool kvm_unmap_gfn_range_pr(struct kvm *kvm, struct kvm_gfn_range *range) { - do_kvm_unmap_hva(kvm, start, end); - - return 0; + return do_kvm_unmap_gfn(kvm, range); } -static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start, - unsigned long end) +static bool kvm_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) { /* XXX could be more clever ;) */ - return 0; + return false; } -static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) +static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) { /* XXX could be more clever ;) */ - return 0; + return false; } -static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) +static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) { /* The page will get remapped properly on its next fault */ - do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); + return do_kvm_unmap_gfn(kvm, range); } /*****************************************/ @@ -2079,10 +2057,10 @@ static struct kvmppc_ops kvm_ops_pr = { .flush_memslot = kvmppc_core_flush_memslot_pr, .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, .commit_memory_region = kvmppc_core_commit_memory_region_pr, - .unmap_hva_range = kvm_unmap_hva_range_pr, - .age_hva = kvm_age_hva_pr, - .test_age_hva = kvm_test_age_hva_pr, - .set_spte_hva = kvm_set_spte_hva_pr, + .unmap_gfn_range = kvm_unmap_gfn_range_pr, + .age_gfn = kvm_age_gfn_pr, + .test_age_gfn = kvm_test_age_gfn_pr, + .set_spte_gfn = kvm_set_spte_gfn_pr, .free_memslot = kvmppc_core_free_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index ed0c9c43d0cf..7f16afc331ef 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -721,45 +721,36 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, /************* MMU Notifiers *************/ -static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +static bool kvm_e500_mmu_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { - trace_kvm_unmap_hva(hva); - /* * Flush all shadow tlb entries everywhere. This is slow, but * we are 100% sure that we catch the to be unmapped page */ - kvm_flush_remote_tlbs(kvm); - - return 0; + return true; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, - unsigned flags) +bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { - /* kvm_unmap_hva flushes everything anyways */ - kvm_unmap_hva(kvm, start); - - return 0; + return kvm_e500_mmu_unmap_gfn(kvm, range); } -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { /* XXX could be more clever ;) */ - return 0; + return false; } -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { /* XXX could be more clever ;) */ - return 0; + return false; } -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { /* The page will get remapped properly on its next fault */ - kvm_unmap_hva(kvm, hva); - return 0; + return kvm_e500_mmu_unmap_gfn(kvm, range); } /*****************************************/ diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index 3837842986aa..eff6e82dbcd4 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -69,21 +69,6 @@ TRACE_EVENT(kvm_exit, ) ); -TRACE_EVENT(kvm_unmap_hva, - TP_PROTO(unsigned long hva), - TP_ARGS(hva), - - TP_STRUCT__entry( - __field( unsigned long, hva ) - ), - - TP_fast_assign( - __entry->hva = hva; - ), - - TP_printk("unmap hva 0x%lx\n", __entry->hva) -); - TRACE_EVENT(kvm_booke206_stlb_write, TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f2c690ee75d1..cc1a8a0f311e 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -5,6 +5,9 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +CFLAGS_code-patching.o += -fno-stack-protector +CFLAGS_feature-fixups.o += -fno-stack-protector + CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d142b76d507d..9a75ba078e1b 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -106,7 +106,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. */ -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pg; p4d_t *p4; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e4b05667686e..f998e655b570 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -40,8 +40,8 @@ config PPC_85xx config PPC_8xx bool "Freescale 8xx" + select ARCH_SUPPORTS_HUGETLBFS select FSL_SOC - select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK @@ -95,9 +95,11 @@ config PPC_BOOK3S_64 bool "Server processors" select PPC_FPU select PPC_HAVE_PMU_SUPPORT - select SYS_SUPPORTS_HUGETLBFS select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_PMD_SPLIT_PTLOCK select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK select PPC_MM_SLICES @@ -280,9 +282,9 @@ config FSL_BOOKE # this is for common code between PPC32 & PPC64 FSL BOOKE config PPC_FSL_BOOK3E bool + select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 select FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI - select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 select PPC_DOORBELL default y if FSL_BOOKE @@ -358,10 +360,6 @@ config SPE If in doubt, say Y here. -config ARCH_ENABLE_SPLIT_PMD_PTLOCK - def_bool y - depends on PPC_BOOK3S_64 - config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 @@ -421,10 +419,6 @@ config PPC_PKEY depends on PPC_BOOK3S_64 depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP -config ARCH_ENABLE_HUGEPAGE_MIGRATION - def_bool y - depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION - config PPC_MMU_NOHASH def_bool y diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 2eb6ae150d1f..be2546b96816 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_FA_DUMP) += opal-fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o -obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o obj-$(CONFIG_PCI_IOV) += pci-sriov.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 71c1262589fe..537a4daed614 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -104,8 +104,8 @@ static void memtrace_clear_range(unsigned long start_pfn, * Before we go ahead and use this range as cache inhibited range * flush the cache. */ - flush_dcache_range_chunked(PFN_PHYS(start_pfn), - PFN_PHYS(start_pfn + nr_pages), + flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn), + (unsigned long)pfn_to_kaddr(start_pfn + nr_pages), FLUSH_CHUNK_SIZE); } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c deleted file mode 100644 index b711dc3262a3..000000000000 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ /dev/null @@ -1,705 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * This file implements the DMA operations for NVLink devices. The NPU - * devices all point to the same iommu table as the parent PCI device. - * - * Copyright Alistair Popple, IBM Corporation 2015. - */ - -#include <linux/mmu_notifier.h> -#include <linux/mmu_context.h> -#include <linux/of.h> -#include <linux/pci.h> -#include <linux/memblock.h> -#include <linux/sizes.h> - -#include <asm/debugfs.h> -#include <asm/powernv.h> -#include <asm/ppc-pci.h> -#include <asm/opal.h> - -#include "pci.h" - -static struct pci_dev *get_pci_dev(struct device_node *dn) -{ - struct pci_dn *pdn = PCI_DN(dn); - struct pci_dev *pdev; - - pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), - pdn->busno, pdn->devfn); - - /* - * pci_get_domain_bus_and_slot() increased the reference count of - * the PCI device, but callers don't need that actually as the PE - * already holds a reference to the device. Since callers aren't - * aware of the reference count change, call pci_dev_put() now to - * avoid leaks. - */ - if (pdev) - pci_dev_put(pdev); - - return pdev; -} - -/* Given a NPU device get the associated PCI device. */ -struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) -{ - struct device_node *dn; - struct pci_dev *gpdev; - - if (WARN_ON(!npdev)) - return NULL; - - if (WARN_ON(!npdev->dev.of_node)) - return NULL; - - /* Get assoicated PCI device */ - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); - if (!dn) - return NULL; - - gpdev = get_pci_dev(dn); - of_node_put(dn); - - return gpdev; -} -EXPORT_SYMBOL(pnv_pci_get_gpu_dev); - -/* Given the real PCI device get a linked NPU device. */ -struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) -{ - struct device_node *dn; - struct pci_dev *npdev; - - if (WARN_ON(!gpdev)) - return NULL; - - /* Not all PCI devices have device-tree nodes */ - if (!gpdev->dev.of_node) - return NULL; - - /* Get assoicated PCI device */ - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); - if (!dn) - return NULL; - - npdev = get_pci_dev(dn); - of_node_put(dn); - - return npdev; -} -EXPORT_SYMBOL(pnv_pci_get_npu_dev); - -#ifdef CONFIG_IOMMU_API -/* - * Returns the PE assoicated with the PCI device of the given - * NPU. Returns the linked pci device if pci_dev != NULL. - */ -static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, - struct pci_dev **gpdev) -{ - struct pnv_phb *phb; - struct pci_controller *hose; - struct pci_dev *pdev; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - pdev = pnv_pci_get_gpu_dev(npe->pdev); - if (!pdev) - return NULL; - - pdn = pci_get_pdn(pdev); - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return NULL; - - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pe = &phb->ioda.pe_array[pdn->pe_number]; - - if (gpdev) - *gpdev = pdev; - - return pe; -} - -static long pnv_npu_unset_window(struct iommu_table_group *table_group, - int num); - -static long pnv_npu_set_window(struct iommu_table_group *table_group, int num, - struct iommu_table *tbl) -{ - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pnv_phb *phb = npe->phb; - int64_t rc; - const unsigned long size = tbl->it_indirect_levels ? - tbl->it_level_size : tbl->it_size; - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; - const __u64 win_size = tbl->it_size << tbl->it_page_shift; - int num2 = (num == 0) ? 1 : 0; - - /* NPU has just one TVE so if there is another table, remove it first */ - if (npe->table_group.tables[num2]) - pnv_npu_unset_window(&npe->table_group, num2); - - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", - start_addr, start_addr + win_size - 1, - IOMMU_PAGE_SIZE(tbl)); - - rc = opal_pci_map_pe_dma_window(phb->opal_id, - npe->pe_number, - npe->pe_number, - tbl->it_indirect_levels + 1, - __pa(tbl->it_base), - size << 3, - IOMMU_PAGE_SIZE(tbl)); - if (rc) { - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); - return rc; - } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); - - /* Add the table to the list so its TCE cache will get invalidated */ - pnv_pci_link_table_and_group(phb->hose->node, num, - tbl, &npe->table_group); - - return 0; -} - -static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num) -{ - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pnv_phb *phb = npe->phb; - int64_t rc; - - if (!npe->table_group.tables[num]) - return 0; - - pe_info(npe, "Removing DMA window\n"); - - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, - npe->pe_number, - 0/* levels */, 0/* table address */, - 0/* table size */, 0/* page size */); - if (rc) { - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); - return rc; - } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); - - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], - &npe->table_group); - - return 0; -} - -/* Switch ownership from platform code to external user (e.g. VFIO) */ -static void pnv_npu_take_ownership(struct iommu_table_group *table_group) -{ - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pnv_phb *phb = npe->phb; - int64_t rc; - struct pci_dev *gpdev = NULL; - - /* - * Note: NPU has just a single TVE in the hardware which means that - * while used by the kernel, it can have either 32bit window or - * DMA bypass but never both. So we deconfigure 32bit window only - * if it was enabled at the moment of ownership change. - */ - if (npe->table_group.tables[0]) { - pnv_npu_unset_window(&npe->table_group, 0); - return; - } - - /* Disable bypass */ - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, - npe->pe_number, npe->pe_number, - 0 /* bypass base */, 0); - if (rc) { - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); - return; - } - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); - - get_gpu_pci_dev_and_pe(npe, &gpdev); - if (gpdev) - pnv_npu2_unmap_lpar_dev(gpdev); -} - -static void pnv_npu_release_ownership(struct iommu_table_group *table_group) -{ - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pci_dev *gpdev = NULL; - - get_gpu_pci_dev_and_pe(npe, &gpdev); - if (gpdev) - pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV); -} - -static struct iommu_table_group_ops pnv_pci_npu_ops = { - .set_window = pnv_npu_set_window, - .unset_window = pnv_npu_unset_window, - .take_ownership = pnv_npu_take_ownership, - .release_ownership = pnv_npu_release_ownership, -}; -#endif /* !CONFIG_IOMMU_API */ - -/* - * NPU2 ATS - */ -/* Maximum possible number of ATSD MMIO registers per NPU */ -#define NV_NMMU_ATSD_REGS 8 -#define NV_NPU_MAX_PE_NUM 16 - -/* - * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or - * up to 3 x (GPU + 2xNPUs) (POWER9). - */ -struct npu_comp { - struct iommu_table_group table_group; - int pe_num; - struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM]; -}; - -/* An NPU descriptor, valid for POWER9 only */ -struct npu { - int index; - struct npu_comp npucomp; -}; - -#ifdef CONFIG_IOMMU_API -static long pnv_npu_peers_create_table_userspace( - struct iommu_table_group *table_group, - int num, __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table **ptbl) -{ - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, - table_group); - - if (!npucomp->pe_num || !npucomp->pe[0] || - !npucomp->pe[0]->table_group.ops || - !npucomp->pe[0]->table_group.ops->create_table) - return -EFAULT; - - return npucomp->pe[0]->table_group.ops->create_table( - &npucomp->pe[0]->table_group, num, page_shift, - window_size, levels, ptbl); -} - -static long pnv_npu_peers_set_window(struct iommu_table_group *table_group, - int num, struct iommu_table *tbl) -{ - int i, j; - long ret = 0; - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, - table_group); - - for (i = 0; i < npucomp->pe_num; ++i) { - struct pnv_ioda_pe *pe = npucomp->pe[i]; - - if (!pe->table_group.ops->set_window) - continue; - - ret = pe->table_group.ops->set_window(&pe->table_group, - num, tbl); - if (ret) - break; - } - - if (ret) { - for (j = 0; j < i; ++j) { - struct pnv_ioda_pe *pe = npucomp->pe[j]; - - if (!pe->table_group.ops->unset_window) - continue; - - ret = pe->table_group.ops->unset_window( - &pe->table_group, num); - if (ret) - break; - } - } else { - table_group->tables[num] = iommu_tce_table_get(tbl); - } - - return ret; -} - -static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group, - int num) -{ - int i, j; - long ret = 0; - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, - table_group); - - for (i = 0; i < npucomp->pe_num; ++i) { - struct pnv_ioda_pe *pe = npucomp->pe[i]; - - WARN_ON(npucomp->table_group.tables[num] != - table_group->tables[num]); - if (!npucomp->table_group.tables[num]) - continue; - - if (!pe->table_group.ops->unset_window) - continue; - - ret = pe->table_group.ops->unset_window(&pe->table_group, num); - if (ret) - break; - } - - if (ret) { - for (j = 0; j < i; ++j) { - struct pnv_ioda_pe *pe = npucomp->pe[j]; - - if (!npucomp->table_group.tables[num]) - continue; - - if (!pe->table_group.ops->set_window) - continue; - - ret = pe->table_group.ops->set_window(&pe->table_group, - num, table_group->tables[num]); - if (ret) - break; - } - } else if (table_group->tables[num]) { - iommu_tce_table_put(table_group->tables[num]); - table_group->tables[num] = NULL; - } - - return ret; -} - -static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) -{ - int i; - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, - table_group); - - for (i = 0; i < npucomp->pe_num; ++i) { - struct pnv_ioda_pe *pe = npucomp->pe[i]; - - if (!pe->table_group.ops || - !pe->table_group.ops->take_ownership) - continue; - pe->table_group.ops->take_ownership(&pe->table_group); - } -} - -static void pnv_npu_peers_release_ownership( - struct iommu_table_group *table_group) -{ - int i; - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, - table_group); - - for (i = 0; i < npucomp->pe_num; ++i) { - struct pnv_ioda_pe *pe = npucomp->pe[i]; - - if (!pe->table_group.ops || - !pe->table_group.ops->release_ownership) - continue; - pe->table_group.ops->release_ownership(&pe->table_group); - } -} - -static struct iommu_table_group_ops pnv_npu_peers_ops = { - .get_table_size = pnv_pci_ioda2_get_table_size, - .create_table = pnv_npu_peers_create_table_userspace, - .set_window = pnv_npu_peers_set_window, - .unset_window = pnv_npu_peers_unset_window, - .take_ownership = pnv_npu_peers_take_ownership, - .release_ownership = pnv_npu_peers_release_ownership, -}; - -static void pnv_comp_attach_table_group(struct npu_comp *npucomp, - struct pnv_ioda_pe *pe) -{ - if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM)) - return; - - npucomp->pe[npucomp->pe_num] = pe; - ++npucomp->pe_num; -} - -static struct iommu_table_group * - pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) -{ - struct iommu_table_group *compound_group; - struct npu_comp *npucomp; - struct pci_dev *gpdev = NULL; - struct pci_controller *hose; - struct pci_dev *npdev = NULL; - - list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) { - npdev = pnv_pci_get_npu_dev(gpdev, 0); - if (npdev) - break; - } - - if (!npdev) - /* It is not an NPU attached device, skip */ - return NULL; - - hose = pci_bus_to_host(npdev->bus); - - if (hose->npu) { - /* P9 case: compound group is per-NPU (all gpus, all links) */ - npucomp = &hose->npu->npucomp; - } else { - /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ - npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); - } - - compound_group = &npucomp->table_group; - if (!compound_group->group) { - compound_group->ops = &pnv_npu_peers_ops; - iommu_register_group(compound_group, hose->global_number, - pe->pe_number); - - /* Steal capabilities from a GPU PE */ - compound_group->max_dynamic_windows_supported = - pe->table_group.max_dynamic_windows_supported; - compound_group->tce32_start = pe->table_group.tce32_start; - compound_group->tce32_size = pe->table_group.tce32_size; - compound_group->max_levels = pe->table_group.max_levels; - if (!compound_group->pgsizes) - compound_group->pgsizes = pe->table_group.pgsizes; - } - - /* - * The gpu would have been added to the iommu group that's created - * for the PE. Pull it out now. - */ - iommu_del_device(&gpdev->dev); - - /* - * I'm not sure this is strictly required, but it's probably a good idea - * since the table_group for the PE is going to be attached to the - * compound table group. If we leave the PE's iommu group active then - * we might have the same table_group being modifiable via two sepeate - * iommu groups. - */ - iommu_group_put(pe->table_group.group); - - /* now put the GPU into the compound group */ - pnv_comp_attach_table_group(npucomp, pe); - iommu_add_device(compound_group, &gpdev->dev); - - return compound_group; -} - -static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) -{ - struct iommu_table_group *table_group; - struct npu_comp *npucomp; - struct pci_dev *gpdev = NULL; - struct pci_dev *npdev; - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev); - - WARN_ON(!(pe->flags & PNV_IODA_PE_DEV)); - if (!gpe) - return NULL; - - /* - * IODA2 bridges get this set up from pci_controller_ops::setup_bridge - * but NPU bridges do not have this hook defined so we do it here. - * We do not setup other table group parameters as they won't be used - * anyway - NVLink bridges are subordinate PEs. - */ - pe->table_group.ops = &pnv_pci_npu_ops; - - table_group = iommu_group_get_iommudata( - iommu_group_get(&gpdev->dev)); - - /* - * On P9 NPU PHB and PCI PHB support different page sizes, - * keep only matching. We expect here that NVLink bridge PE pgsizes is - * initialized by the caller. - */ - table_group->pgsizes &= pe->table_group.pgsizes; - npucomp = container_of(table_group, struct npu_comp, table_group); - pnv_comp_attach_table_group(npucomp, pe); - - list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) { - struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev); - - if (gpdevtmp != gpdev) - continue; - - iommu_add_device(table_group, &npdev->dev); - } - - return table_group; -} - -void pnv_pci_npu_setup_iommu_groups(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - /* - * For non-nvlink devices the IOMMU group is registered when the PE is - * configured and devices are added to the group when the per-device - * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is - * only initialise for "normal" IODA PHBs. - * - * For NVLink devices we need to ensure the NVLinks and the GPU end up - * in the same IOMMU group, so that's handled here. - */ - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - - if (phb->type == PNV_PHB_IODA2) - list_for_each_entry(pe, &phb->ioda.pe_list, list) - pnv_try_setup_npu_table_group(pe); - } - - /* - * Now we have all PHBs discovered, time to add NPU devices to - * the corresponding IOMMU groups. - */ - list_for_each_entry(hose, &hose_list, list_node) { - unsigned long pgsizes; - - phb = hose->private_data; - - if (phb->type != PNV_PHB_NPU_NVLINK) - continue; - - pgsizes = pnv_ioda_parse_tce_sizes(phb); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - /* - * IODA2 bridges get this set up from - * pci_controller_ops::setup_bridge but NPU bridges - * do not have this hook defined so we do it here. - */ - pe->table_group.pgsizes = pgsizes; - pnv_npu_compound_attach(pe); - } - } -} -#endif /* CONFIG_IOMMU_API */ - -int pnv_npu2_init(struct pci_controller *hose) -{ - static int npu_index; - struct npu *npu; - int ret; - - npu = kzalloc(sizeof(*npu), GFP_KERNEL); - if (!npu) - return -ENOMEM; - - npu_index++; - if (WARN_ON(npu_index >= NV_MAX_NPUS)) { - ret = -ENOSPC; - goto fail_exit; - } - npu->index = npu_index; - hose->npu = npu; - - return 0; - -fail_exit: - kfree(npu); - return ret; -} - -int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, - unsigned long msr) -{ - int ret; - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - struct pci_controller *hose; - struct pnv_phb *nphb; - - if (!npdev) - return -ENODEV; - - hose = pci_bus_to_host(npdev->bus); - if (hose->npu == NULL) { - dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); - return 0; - } - - nphb = hose->private_data; - - dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", - nphb->opal_id, lparid); - /* - * Currently we only support radix and non-zero LPCR only makes sense - * for hash tables so skiboot expects the LPCR parameter to be a zero. - */ - ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid, - 0 /* LPCR bits */); - if (ret) { - dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); - return ret; - } - - dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n", - nphb->opal_id, msr); - ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr, - pci_dev_id(gpdev)); - if (ret < 0) - dev_err(&gpdev->dev, "Failed to init context: %d\n", ret); - else - ret = 0; - - return 0; -} -EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev); - -void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr) -{ - struct pci_dev *gpdev; - - list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list) - pnv_npu2_map_lpar_dev(gpdev, 0, msr); -} - -int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) -{ - int ret; - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - struct pci_controller *hose; - struct pnv_phb *nphb; - - if (!npdev) - return -ENODEV; - - hose = pci_bus_to_host(npdev->bus); - if (hose->npu == NULL) { - dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); - return 0; - } - - nphb = hose->private_data; - - dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", - nphb->opal_id); - ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/, - pci_dev_id(gpdev)); - if (ret < 0) { - dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret); - return ret; - } - - /* Set LPID to 0 anyway, just to be safe */ - dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id); - ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/, - 0 /* LPCR bits */); - if (ret) - dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); - - return ret; -} -EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev); diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 5cd0f52d258f..01401e3da7ca 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -267,8 +267,6 @@ OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE); OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); -OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); -OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 66c3c3337334..7de464679292 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -47,8 +47,7 @@ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ #define PNV_IODA1_DMA32_SEGSIZE 0x10000000 -static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", - "NPU_OCAPI" }; +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" }; static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); static void pnv_pci_configure_bus(struct pci_bus *bus); @@ -192,8 +191,6 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); - WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ - kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); mutex_lock(&phb->ioda.pe_alloc_mutex); @@ -875,7 +872,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) * Release from all parents PELT-V. NPUs don't have a PELTV * table */ - if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) + if (phb->type != PNV_PHB_NPU_OCAPI) pnv_ioda_unset_peltv(phb, pe, parent); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, @@ -946,7 +943,7 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) * Configure PELTV. NPUs don't have a PELTV table so skip * configuration on them. */ - if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) + if (phb->type != PNV_PHB_NPU_OCAPI) pnv_ioda_set_peltv(phb, pe, true); /* Setup reverse map */ @@ -1002,8 +999,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) /* NOTE: We don't get a reference for the pointer in the PE * data structure, both the device and PE structures should be - * destroyed at the same time. However, removing nvlink - * devices will need some work. + * destroyed at the same time. * * At some point we want to remove the PDN completely anyways */ @@ -1099,113 +1095,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return pe; } -static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) -{ - int pe_num, found_pe = false, rc; - long rid; - struct pnv_ioda_pe *pe; - struct pci_dev *gpu_pdev; - struct pci_dn *npu_pdn; - struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); - - /* - * Intentionally leak a reference on the npu device (for - * nvlink only; this is not an opencapi path) to make sure it - * never goes away, as it's been the case all along and some - * work is needed otherwise. - */ - pci_dev_get(npu_pdev); - - /* - * Due to a hardware errata PE#0 on the NPU is reserved for - * error handling. This means we only have three PEs remaining - * which need to be assigned to four links, implying some - * links must share PEs. - * - * To achieve this we assign PEs such that NPUs linking the - * same GPU get assigned the same PE. - */ - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { - pe = &phb->ioda.pe_array[pe_num]; - if (!pe->pdev) - continue; - - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { - /* - * This device has the same peer GPU so should - * be assigned the same PE as the existing - * peer NPU. - */ - dev_info(&npu_pdev->dev, - "Associating to existing PE %x\n", pe_num); - npu_pdn = pci_get_pdn(npu_pdev); - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; - npu_pdn->pe_number = pe_num; - phb->ioda.pe_rmap[rid] = pe->pe_number; - pe->device_count++; - - /* Map the PE to this link */ - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, - OpalPciBusAll, - OPAL_COMPARE_RID_DEVICE_NUMBER, - OPAL_COMPARE_RID_FUNCTION_NUMBER, - OPAL_MAP_PE); - WARN_ON(rc != OPAL_SUCCESS); - found_pe = true; - break; - } - } - - if (!found_pe) - /* - * Could not find an existing PE so allocate a new - * one. - */ - return pnv_ioda_setup_dev_PE(npu_pdev); - else - return pe; -} - -static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) -{ - struct pci_dev *pdev; - - list_for_each_entry(pdev, &bus->devices, bus_list) - pnv_ioda_setup_npu_PE(pdev); -} - -static void pnv_pci_ioda_setup_nvlink(void) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - if (phb->type == PNV_PHB_NPU_NVLINK) { - /* PE#0 is needed for error reporting */ - pnv_ioda_reserve_pe(phb, 0); - pnv_ioda_setup_npu_PEs(hose->bus); - if (phb->model == PNV_PHB_MODEL_NPU2) - WARN_ON_ONCE(pnv_npu2_init(hose)); - } - } - list_for_each_entry(hose, &hose_list, list_node) { - phb = hose->private_data; - if (phb->type != PNV_PHB_IODA2) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) - pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); - } - -#ifdef CONFIG_IOMMU_API - /* setup iommu groups so we can do nvlink pass-thru */ - pnv_pci_npu_setup_iommu_groups(); -#endif -} - static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); @@ -1468,18 +1357,6 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) #define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) -static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) -{ - __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); - const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; - - mb(); /* Ensure previous TCE table stores are visible */ - if (rm) - __raw_rm_writeq_be(val, invalidate); - else - __raw_writeq_be(val, invalidate); -} - static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ @@ -1539,20 +1416,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_phb *phb = pe->phb; unsigned int shift = tbl->it_page_shift; - /* - * NVLink1 can use the TCE kill register directly as - * it's the same as PHB3. NVLink2 is different and - * should go via the OPAL call. - */ - if (phb->model == PNV_PHB_MODEL_NPU) { - /* - * The NVLink hardware does not support TCE kill - * per TCE entry so we have to invalidate - * the entire cache for it. - */ - pnv_pci_phb3_tce_invalidate_entire(phb, rm); - continue; - } if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) pnv_pci_phb3_tce_invalidate(pe, rm, shift, index, npages); @@ -1564,14 +1427,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) -{ - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) - pnv_pci_phb3_tce_invalidate_entire(phb, rm); - else - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); -} - static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, @@ -2451,7 +2306,6 @@ static void pnv_pci_enable_bridges(void) static void pnv_pci_ioda_fixup(void) { - pnv_pci_ioda_setup_nvlink(); pnv_pci_ioda_create_dbgfs(); pnv_pci_enable_bridges(); @@ -2824,15 +2678,6 @@ static void pnv_pci_release_device(struct pci_dev *pdev) pnv_ioda_release_pe(pe); } -static void pnv_npu_disable_device(struct pci_dev *pdev) -{ - struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); - struct eeh_pe *eehpe = edev ? edev->pe : NULL; - - if (eehpe && eeh_ops && eeh_ops->reset) - eeh_ops->reset(eehpe, EEH_RESET_HOT); -} - static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; @@ -2874,16 +2719,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; -static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, - .enable_device_hook = pnv_pci_enable_device_hook, - .window_alignment = pnv_pci_window_alignment, - .reset_secondary_bus = pnv_pci_reset_secondary_bus, - .shutdown = pnv_pci_ioda_shutdown, - .disable_device = pnv_npu_disable_device, -}; - static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { .enable_device_hook = pnv_ocapi_enable_device_hook, .release_device = pnv_pci_release_device, @@ -2957,10 +2792,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->model = PNV_PHB_MODEL_P7IOC; else if (of_device_is_compatible(np, "ibm,power8-pciex")) phb->model = PNV_PHB_MODEL_PHB3; - else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) - phb->model = PNV_PHB_MODEL_NPU; - else if (of_device_is_compatible(np, "ibm,power9-npu-pciex")) - phb->model = PNV_PHB_MODEL_NPU2; else phb->model = PNV_PHB_MODEL_UNKNOWN; @@ -3118,9 +2949,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; switch (phb->type) { - case PNV_PHB_NPU_NVLINK: - hose->controller_ops = pnv_npu_ioda_controller_ops; - break; case PNV_PHB_NPU_OCAPI: hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; break; @@ -3173,11 +3001,6 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np) pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } -void __init pnv_pci_init_npu_phb(struct device_node *np) -{ - pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK); -} - void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) { pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b9bca169275..b18468dc31ff 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -926,17 +926,6 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda3-phb") pnv_pci_init_ioda2_phb(np); - /* Look for NPU PHBs */ - for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") - pnv_pci_init_npu_phb(np); - - /* - * Look for NPU2 PHBs which we treat mostly as NPU PHBs with - * the exception of TCE kill which requires an OPAL call. - */ - for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") - pnv_pci_init_npu_phb(np); - /* Look for NPU2 OpenCAPI PHBs */ for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") pnv_pci_init_npu2_opencapi_phb(np); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 36d22920f5a3..c8d4f222a86f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -10,10 +10,9 @@ struct pci_dn; enum pnv_phb_type { - PNV_PHB_IODA1 = 0, - PNV_PHB_IODA2 = 1, - PNV_PHB_NPU_NVLINK = 2, - PNV_PHB_NPU_OCAPI = 3, + PNV_PHB_IODA1, + PNV_PHB_IODA2, + PNV_PHB_NPU_OCAPI, }; /* Precise PHB model for error management */ @@ -21,8 +20,6 @@ enum pnv_phb_model { PNV_PHB_MODEL_UNKNOWN, PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, - PNV_PHB_MODEL_NPU, - PNV_PHB_MODEL_NPU2, }; #define PNV_PCI_DIAG_BUF_SIZE 8192 @@ -81,7 +78,6 @@ struct pnv_ioda_pe { /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ struct iommu_table_group table_group; - struct npu_comp *npucomp; /* 64-bit TCE bypass region */ bool tce_bypass_enabled; @@ -289,9 +285,7 @@ extern struct iommu_table *pnv_pci_table_alloc(int nid); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); -extern void pnv_pci_init_npu_phb(struct device_node *np); extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); -extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); @@ -314,11 +308,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, #define pe_info(pe, fmt, ...) \ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) -/* Nvlink functions */ -extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); -extern void pnv_pci_npu_setup_iommu_groups(void); - /* pci-ioda-tce.c */ #define POWERNV_IOMMU_DEFAULT_LEVELS 2 #define POWERNV_IOMMU_MAX_LEVELS 5 diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 1bffbd1c9a94..3b6800f774c2 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void) void __init pSeries_final_fixup(void) { - struct pci_controller *hose; - pSeries_request_regions(); eeh_show_enabled(); @@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void) ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; #endif - list_for_each_entry(hose, &hose_list, list_node) { - struct device_node *dn = hose->dn, *nvdn; - - while (1) { - dn = of_find_all_nodes(dn); - if (!dn) - break; - nvdn = of_parse_phandle(dn, "ibm,nvlink", 0); - if (!nvdn) - continue; - if (!of_device_is_compatible(nvdn, "ibm,npu-link")) - continue; - if (!of_device_is_compatible(nvdn->parent, - "ibm,power9-npu")) - continue; -#ifdef CONFIG_PPC_POWERNV - WARN_ON_ONCE(pnv_npu2_init(hose)); -#endif - break; - } - } } /* diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 7b739cc7a8a9..1d829e257996 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -55,9 +55,9 @@ void __init svm_swiotlb_init(void) if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false)) return; - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + + memblock_free_early(__pa(vstart), + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } |