diff options
Diffstat (limited to 'arch/powerpc')
27 files changed, 273 insertions, 84 deletions
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index b1f5549a3c9c..fd6db10ef9e6 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 77306be62e9e..129355f87f80 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -78,4 +78,4 @@ CONFIG_DEBUG_VM_PGTABLE=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_GENERIC_PTDUMP=y +CONFIG_PTDUMP_DEBUGFS=y diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 46a4c85e85e2..15783f2307df 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -3,10 +3,12 @@ menu "Accelerated Cryptographic Algorithms for CPU (powerpc)" config CRYPTO_CURVE25519_PPC64 - tristate "Public key crypto: Curve25519 (PowerPC64)" + tristate depends on PPC64 && CPU_LITTLE_ENDIAN + select CRYPTO_KPP select CRYPTO_LIB_CURVE25519_GENERIC select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + default CRYPTO_LIB_CURVE25519_INTERNAL help Curve25519 algorithm @@ -124,11 +126,12 @@ config CRYPTO_AES_GCM_P10 later CPU. This module supports stitched acceleration for AES/GCM. config CRYPTO_CHACHA20_P10 - tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)" + tristate depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_SKCIPHER select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA + default CRYPTO_LIB_CHACHA_INTERNAL help Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms @@ -140,6 +143,7 @@ config CRYPTO_CHACHA20_P10 config CRYPTO_POLY1305_P10 tristate "Hash functions: Poly1305 (P10 or later)" depends on PPC64 && CPU_LITTLE_ENDIAN && VSX + depends on BROKEN # Needs to be fixed to work in softirq context select CRYPTO_HASH select CRYPTO_LIB_POLY1305_GENERIC help diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 59808592f0a1..1e52b02d8943 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -56,3 +56,4 @@ $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y +OBJECT_FILES_NON_STANDARD_ghashp8-ppc.o := y diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index dad2e7980f24..86326587e58d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); } @@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_hugetlb_page(vma, addr); return pte; } diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index d99863cd6cde..049152f8d597 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -29,6 +29,7 @@ extern cpumask_var_t node_to_cpumask_map[]; #ifdef CONFIG_MEMORY_HOTPLUG extern unsigned long max_pfn; u64 memory_hotplug_max(void); +u64 hot_add_drconf_memory_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 02897f4b0dbf..b891910fce8a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -183,7 +183,7 @@ /* * Used to name C functions called from asm */ -#ifdef CONFIG_PPC_KERNEL_PCREL +#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL) #define CFUNC(name) name@notoc #else #define CFUNC(name) name diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index 2c145da3b774..b5211e413829 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f43c1198768c..04d6a1e8ff9a 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -162,9 +162,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 83fe99861eb1..ca7f7bb2b478 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 935568d68196..b1dc4cb9f78e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2982,11 +2982,11 @@ static void __init fixup_device_tree_pmac(void) char type[8]; phandle node; - // Some pmacs are missing #size-cells on escc nodes + // Some pmacs are missing #size-cells on escc or i2s nodes for (node = 0; prom_next_node(&node); ) { type[0] = '\0'; prom_getprop(node, "device_type", type, sizeof(type)); - if (prom_strcmp(type, "escc")) + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) continue; if (prom_getproplen(node, "#size-cells") != PROM_ERROR) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index c568cad6a22e..6ba68b28ed87 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) CC32FLAGS := -m32 -CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel ifdef CONFIG_CC_IS_CLANG # This flag is supported by clang for 64-bit but not 32-bit so it will cause # an unused command line flag warning for this file. diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 9ac3266e4965..a325c1c02f96 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) is_via_system_reset = 1; - crash_smp_send_stop(); + if (IS_ENABLED(CONFIG_SMP)) + crash_smp_send_stop(); + else + crash_kexec_prepare(); crash_save_cpu(regs, crashing_cpu); diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S index 104c9911f406..dd86e338307d 100644 --- a/arch/powerpc/kexec/relocate_32.S +++ b/arch/powerpc/kexec/relocate_32.S @@ -348,16 +348,13 @@ write_utlb: rlwinm r10, r24, 0, 22, 27 cmpwi r10, PPC47x_TLB0_4K - bne 0f li r10, 0x1000 /* r10 = 4k */ - ANNOTATE_INTRA_FUNCTION_CALL - bl 1f + beq 0f -0: /* Defaults to 256M */ lis r10, 0x1000 - bcl 20,31,$+4 +0: bcl 20,31,$+4 1: mflr r4 addi r4, r4, (2f-1b) /* virtual address of 2f */ diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 6824e8139801..3708fa48bee9 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -242,7 +242,7 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref, +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, kvm_pfn_t pfn, unsigned int wimg) { @@ -252,7 +252,11 @@ static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref, /* Use guest supplied MAS2_G and MAS2_E */ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; - return tlbe_is_writable(gtlbe); + /* Mark the page accessed */ + kvm_set_pfn_accessed(pfn); + + if (tlbe_is_writable(gtlbe)) + kvm_set_pfn_dirty(pfn); } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) @@ -322,7 +326,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, { struct kvm_memory_slot *slot; unsigned long pfn = 0; /* silence GCC warning */ - struct page *page = NULL; unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; @@ -334,7 +337,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned int wimg = 0; pgd_t *pgdir; unsigned long flags; - bool writable = false; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_invalidate_seq; @@ -444,7 +446,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); - pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page); + pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_noslot_pfn(pfn)) { if (printk_ratelimit()) pr_err("%s: real page not found for gfn %lx\n", @@ -489,7 +491,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } local_irq_restore(flags); - writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); + kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); @@ -497,8 +499,11 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_mmu_flush_icache(pfn); out: - kvm_release_faultin_page(kvm, page, !!ret, writable); spin_unlock(&kvm->mmu_lock); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); + return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f14329989e9a..4b6ce4f07bc2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -550,12 +550,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: + fallthrough; case KVM_CAP_SPAPR_TCE_64: - r = 1; - break; case KVM_CAP_SPAPR_TCE_VFIO: - r = !!cpu_has_feature(CPU_FTR_HVMODE); - break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b0d927009af8..0d807bf2328d 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -988,7 +988,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -996,6 +996,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) @@ -1132,6 +1133,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1157,8 +1171,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3c1da08304d0..603a0f652ba6 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1336,7 +1336,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) return nid; } -static u64 hot_add_drconf_memory_max(void) +u64 hot_add_drconf_memory_max(void) { struct device_node *memory = NULL; struct device_node *dn = NULL; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 42867469752d..33d726bb99e3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2222,6 +2222,10 @@ static struct pmu power_pmu = { #define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_DATA_PAGE_SIZE) + +#define SIER_TYPE_SHIFT 15 +#define SIER_TYPE_MASK (0x7ull << SIER_TYPE_SHIFT) + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -2291,6 +2295,22 @@ static void record_and_restart(struct perf_event *event, unsigned long val, record = 0; /* + * SIER[46-48] presents instruction type of the sampled instruction. + * In ISA v3.0 and before values "0" and "7" are considered reserved. + * In ISA v3.1, value "7" has been used to indicate "larx/stcx". + * Drop the sample if "type" has reserved values for this field with a + * ISA version check. + */ + if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && + ppmu->get_mem_data_src) { + val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT; + if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) { + record = 0; + atomic64_inc(&event->lost_samples); + } + } + + /* * Finally record data if requested. */ if (record) { diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 56301b2bc8ae..031a2b63c171 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -321,8 +321,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, sier = mfspr(SPRN_SIER); val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) + if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) { + dsrc->val = 0; return; + } idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 827d338deaf4..2c2999de6bfa 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -25,6 +25,7 @@ struct spu_gang *alloc_spu_gang(void) mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); INIT_LIST_HEAD(&gang->aff_list_head); + gang->alive = 1; out: return gang; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 70236d1df3d3..9f9e4b871627 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -192,13 +192,32 @@ static int spufs_fill_dir(struct dentry *dir, return -ENOMEM; ret = spufs_new_file(dir->d_sb, dentry, files->ops, files->mode & mode, files->size, ctx); - if (ret) + if (ret) { + dput(dentry); return ret; + } files++; } return 0; } +static void unuse_gang(struct dentry *dir) +{ + struct inode *inode = dir->d_inode; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; + + if (gang) { + bool dead; + + inode_lock(inode); // exclusion with spufs_create_context() + dead = !--gang->alive; + inode_unlock(inode); + + if (dead) + simple_recursive_removal(dir, NULL); + } +} + static int spufs_dir_close(struct inode *inode, struct file *file) { struct inode *parent; @@ -213,6 +232,7 @@ static int spufs_dir_close(struct inode *inode, struct file *file) inode_unlock(parent); WARN_ON(ret); + unuse_gang(dir->d_parent); return dcache_dir_close(inode, file); } @@ -405,7 +425,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, { int ret; int affinity; - struct spu_gang *gang; + struct spu_gang *gang = SPUFS_I(inode)->i_gang; struct spu_context *neighbor; struct path path = {.mnt = mnt, .dentry = dentry}; @@ -420,11 +440,15 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) return -ENODEV; - gang = NULL; + if (gang) { + if (!gang->alive) + return -ENOENT; + gang->alive++; + } + neighbor = NULL; affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); if (affinity) { - gang = SPUFS_I(inode)->i_gang; if (!gang) return -EINVAL; mutex_lock(&gang->aff_mutex); @@ -436,8 +460,11 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } ret = spufs_mkdir(inode, dentry, flags, mode & 0777); - if (ret) + if (ret) { + if (neighbor) + put_spu_context(neighbor); goto out_aff_unlock; + } if (affinity) { spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, @@ -453,6 +480,8 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, out_aff_unlock: if (affinity) mutex_unlock(&gang->aff_mutex); + if (ret && gang) + gang->alive--; // can't reach 0 return ret; } @@ -482,6 +511,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_fop = &simple_dir_operations; d_instantiate(dentry, inode); + dget(dentry); inc_nlink(dir); inc_nlink(d_inode(dentry)); return ret; @@ -492,6 +522,21 @@ out: return ret; } +static int spufs_gang_close(struct inode *inode, struct file *file) +{ + unuse_gang(file->f_path.dentry); + return dcache_dir_close(inode, file); +} + +static const struct file_operations spufs_gang_fops = { + .open = dcache_dir_open, + .release = spufs_gang_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .iterate_shared = dcache_readdir, + .fsync = noop_fsync, +}; + static int spufs_gang_open(const struct path *path) { int ret; @@ -511,7 +556,7 @@ static int spufs_gang_open(const struct path *path) return PTR_ERR(filp); } - filp->f_op = &simple_dir_operations; + filp->f_op = &spufs_gang_fops; fd_install(ret, filp); return ret; } @@ -526,10 +571,8 @@ static int spufs_create_gang(struct inode *inode, ret = spufs_mkgang(inode, dentry, mode & 0777); if (!ret) { ret = spufs_gang_open(&path); - if (ret < 0) { - int err = simple_rmdir(inode, dentry); - WARN_ON(err); - } + if (ret < 0) + unuse_gang(dentry); } return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 84958487f696..d33787c57c39 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -151,6 +151,8 @@ struct spu_gang { int aff_flags; struct spu *aff_ref_spu; atomic_t aff_sched_count; + + int alive; }; /* Flag bits for spu_gang aff_flags */ diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 877720c64515..35471b679638 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index ae6f7a235d8b..eec333dd2e59 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -52,7 +52,8 @@ enum { enum { DDW_EXT_SIZE = 0, DDW_EXT_RESET_DMA_WIN = 1, - DDW_EXT_QUERY_OUT_SIZE = 2 + DDW_EXT_QUERY_OUT_SIZE = 2, + DDW_EXT_LIMITED_ADDR_MODE = 3 }; static struct iommu_table *iommu_pseries_alloc_table(int node) @@ -196,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl) static void tce_free_pSeries(struct iommu_table *tbl) { - if (!tbl->it_userspace) + if (tbl->it_userspace) tce_iommu_userspace_view_free(tbl); } @@ -1284,17 +1285,13 @@ static LIST_HEAD(failed_ddw_pdn_list); static phys_addr_t ddw_memory_hotplug_max(void) { - resource_size_t max_addr = memory_hotplug_max(); - struct device_node *memory; + resource_size_t max_addr; - for_each_node_by_type(memory, "memory") { - struct resource res; - - if (of_address_to_resource(memory, 0, &res)) - continue; - - max_addr = max_t(resource_size_t, max_addr, res.end + 1); - } +#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) + max_addr = hot_add_drconf_memory_max(); +#else + max_addr = memblock_end_of_DRAM(); +#endif return max_addr; } @@ -1331,6 +1328,54 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) ret); } +/* + * Platforms support placing PHB in limited address mode starting with LoPAR + * level 2.13 implement. In this mode, the DMA address returned by DDW is over + * 4GB but, less than 64-bits. This benefits IO adapters that don't support + * 64-bits for DMA addresses. + */ +static int limited_dma_window(struct pci_dev *dev, struct device_node *par_dn) +{ + int ret; + u32 cfg_addr, reset_dma_win, las_supported; + u64 buid; + struct device_node *dn; + struct pci_dn *pdn; + + ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); + if (ret) + goto out; + + ret = ddw_read_ext(par_dn, DDW_EXT_LIMITED_ADDR_MODE, &las_supported); + + /* Limited Address Space extension available on the platform but DDW in + * limited addressing mode not supported + */ + if (!ret && !las_supported) + ret = -EPROTO; + + if (ret) { + dev_info(&dev->dev, "Limited Address Space for DDW not Supported, err: %d", ret); + goto out; + } + + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); + + ret = rtas_call(reset_dma_win, 4, 1, NULL, cfg_addr, BUID_HI(buid), + BUID_LO(buid), 1); + if (ret) + dev_info(&dev->dev, + "ibm,reset-pe-dma-windows(%x) for Limited Addr Support: %x %x %x returned %d ", + reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), + ret); + +out: + return ret; +} + /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ static int iommu_get_page_shift(u32 query_page_size) { @@ -1398,7 +1443,7 @@ static struct property *ddw_property_create(const char *propname, u32 liobn, u64 * * returns true if can map all pages (direct mapping), false otherwise.. */ -static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) +static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn, u64 dma_mask) { int len = 0, ret; int max_ram_len = order_base_2(ddw_memory_hotplug_max()); @@ -1417,6 +1462,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) bool pmem_present; struct pci_dn *pci = PCI_DN(pdn); struct property *default_win = NULL; + bool limited_addr_req = false, limited_addr_enabled = false; + int dev_max_ddw; + int ddw_sz; dn = of_find_node_by_type(NULL, "ibm,pmemory"); pmem_present = dn != NULL; @@ -1443,7 +1491,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) * the ibm,ddw-applicable property holds the tokens for: * ibm,query-pe-dma-window * ibm,create-pe-dma-window - * ibm,remove-pe-dma-window * for the given node in that order. * the property is actually in the parent, not the PE */ @@ -1463,6 +1510,20 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_failed; + /* DMA Limited Addressing required? This is when the driver has + * requested to create DDW but supports mask which is less than 64-bits + */ + limited_addr_req = (dma_mask != DMA_BIT_MASK(64)); + + /* place the PHB in Limited Addressing mode */ + if (limited_addr_req) { + if (limited_dma_window(dev, pdn)) + goto out_failed; + + /* PHB is in Limited address mode */ + limited_addr_enabled = true; + } + /* * If there is no window available, remove the default DMA window, * if it's present. This will make all the resources available to the @@ -1509,6 +1570,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } + /* Maximum DMA window size that the device can address (in log2) */ + dev_max_ddw = fls64(dma_mask); + + /* If the device DMA mask is less than 64-bits, make sure the DMA window + * size is not bigger than what the device can access + */ + ddw_sz = min(order_base_2(query.largest_available_block << page_shift), + dev_max_ddw); + /* * The "ibm,pmemory" can appear anywhere in the address space. * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS @@ -1517,23 +1587,21 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) */ len = max_ram_len; if (pmem_present) { - if (query.largest_available_block >= - (1ULL << (MAX_PHYSMEM_BITS - page_shift))) + if (ddw_sz >= MAX_PHYSMEM_BITS) len = MAX_PHYSMEM_BITS; else dev_info(&dev->dev, "Skipping ibm,pmemory"); } /* check if the available block * number of ptes will map everything */ - if (query.largest_available_block < (1ULL << (len - page_shift))) { + if (ddw_sz < len) { dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu %llu-sized pages\n", 1ULL << len, query.largest_available_block, 1ULL << page_shift); - len = order_base_2(query.largest_available_block << page_shift); - + len = ddw_sz; dynamic_mapping = true; } else { direct_mapping = !default_win_removed || @@ -1547,8 +1615,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) */ if (default_win_removed && pmem_present && !direct_mapping) { /* DDW is big enough to be split */ - if ((query.largest_available_block << page_shift) >= - MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) { + if ((1ULL << ddw_sz) >= + MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) { + direct_mapping = true; /* offset of the Dynamic part of DDW */ @@ -1559,8 +1628,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) dynamic_mapping = true; /* create max size DDW possible */ - len = order_base_2(query.largest_available_block - << page_shift); + len = ddw_sz; } } @@ -1600,7 +1668,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (direct_mapping) { /* DDW maps the whole partition, so enable direct DMA mapping */ - ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, + ret = walk_system_ram_range(0, ddw_memory_hotplug_max() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); if (ret) { dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", @@ -1689,7 +1757,7 @@ out_remove_win: __remove_dma_window(pdn, ddw_avail, create.liobn); out_failed: - if (default_win_removed) + if (default_win_removed || limited_addr_enabled) reset_dma_window(dev, pdn); fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); @@ -1708,6 +1776,9 @@ out_unlock: dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << max_ram_len); + dev_info(&dev->dev, "lsa_required: %x, lsa_enabled: %x, direct mapping: %x\n", + limited_addr_req, limited_addr_enabled, direct_mapping); + return direct_mapping; } @@ -1833,8 +1904,11 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) { struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; - /* only attempt to use a new window if 64-bit DMA is requested */ - if (dma_mask < DMA_BIT_MASK(64)) + /* For DDW, DMA mask should be more than 32-bits. For mask more then + * 32-bits but less then 64-bits, DMA addressing is supported in + * Limited Addressing mode. + */ + if (dma_mask <= DMA_BIT_MASK(32)) return false; dev_dbg(&pdev->dev, "node is %pOF\n", dn); @@ -1847,7 +1921,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) */ pdn = pci_dma_find(dn, NULL); if (pdn && PCI_DN(pdn)) - return enable_ddw(pdev, pdn); + return enable_ddw(pdev, pdn, dma_mask); return false; } @@ -2349,11 +2423,17 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, struct memory_notify *arg = data; int ret = 0; + /* This notifier can get called when onlining persistent memory as well. + * TCEs are not pre-mapped for persistent memory. Persistent memory will + * always be above ddw_memory_hotplug_max() + */ + switch (action) { case MEM_GOING_ONLINE: spin_lock(&dma_win_list_lock); list_for_each_entry(window, &dma_win_list, list) { - if (window->direct) { + if (window->direct && (arg->start_pfn << PAGE_SHIFT) < + ddw_memory_hotplug_max()) { ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); } @@ -2365,7 +2445,8 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_OFFLINE: spin_lock(&dma_win_list_lock); list_for_each_entry(window, &dma_win_list, list) { - if (window->direct) { + if (window->direct && (arg->start_pfn << PAGE_SHIFT) < + ddw_memory_hotplug_max()) { ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); } diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6dfb55b52d36..ba98a680a12e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -524,7 +524,12 @@ static struct msi_domain_info pseries_msi_domain_info = { static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { - __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); + struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + + if (dev->current_state == PCI_D0) + __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); + else + get_cached_msi_msg(data->irq, msg); } static struct irq_chip pseries_msi_irq_chip = { |