diff options
Diffstat (limited to 'arch')
44 files changed, 775 insertions, 223 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7401e921aad0..67ee6731f4e9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,6 +80,11 @@ config NR_IRQS /proc/interrupts. If you configure your system to have too few, drivers will fail to load or worse - handle with care. +config NMI_IPI + bool + depends on SMP && (DEBUGGER || KEXEC_CORE) + default y + config STACKTRACE_SUPPORT bool default y @@ -535,7 +540,7 @@ config KEXEC_FILE config RELOCATABLE bool "Build a relocatable kernel" - depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) + depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE)) select NONSTATIC_KERNEL select MODULE_REL_CRCS if MODVERSIONS help @@ -569,7 +574,7 @@ config RELOCATABLE_TEST config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE + select RELOCATABLE if PPC64 || 44x || FSL_BOOKE help Build a kernel suitable for use as a kdump capture kernel. The same kernel binary can be used as production kernel and dump diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 19b0d1a81959..3e0f0e1fadef 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -136,7 +136,7 @@ CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif ifdef CONFIG_MPROFILE_KERNEL - ifeq ($(shell $(srctree)/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK) + ifeq ($(shell $(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK) CC_FLAGS_FTRACE := -pg -mprofile-kernel KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL else @@ -274,17 +274,6 @@ PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2) boot := arch/$(ARCH)/boot -ifeq ($(CONFIG_RELOCATABLE),y) -quiet_cmd_relocs_check = CALL $< - cmd_relocs_check = $(CONFIG_SHELL) $< "$(OBJDUMP)" "$(obj)/vmlinux" - -PHONY += relocs_check -relocs_check: arch/powerpc/relocs_check.sh vmlinux - $(call cmd,relocs_check) - -zImage: relocs_check -endif - $(BOOT_TARGETS1): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) $(BOOT_TARGETS2): vmlinux diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink new file mode 100644 index 000000000000..eccfcc88afae --- /dev/null +++ b/arch/powerpc/Makefile.postlink @@ -0,0 +1,34 @@ +# =========================================================================== +# Post-link powerpc pass +# =========================================================================== +# +# 1. Check that vmlinux relocations look sane + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include scripts/Kbuild.include + +quiet_cmd_relocs_check = CHKREL $@ + cmd_relocs_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifdef CONFIG_RELOCATABLE + $(call if_changed,relocs_check) +endif + +%.ko: FORCE + @true + +clean: + @true + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 214219dff87c..9732837aaae8 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -2,9 +2,9 @@ #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define H_PTE_INDEX_SIZE 8 -#define H_PMD_INDEX_SIZE 5 -#define H_PUD_INDEX_SIZE 5 -#define H_PGD_INDEX_SIZE 15 +#define H_PMD_INDEX_SIZE 10 +#define H_PUD_INDEX_SIZE 7 +#define H_PGD_INDEX_SIZE 8 /* * 64k aligned address free up few of the lower bits of RPN for us diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 6d56974adf28..6981a52b3887 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -494,7 +494,7 @@ extern void slb_set_size(u16 size); * * For user processes max context id is limited to MAX_USER_CONTEXT. - * For kernel space, we use context ids 1-5 to map address as below: + * For kernel space, we use context ids 1-4 to map addresses as below: * NOTE: each context only support 64TB now. * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ] * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ] diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 89259817f5ef..38c5c0b33af9 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -200,6 +200,21 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) +/* _NORI variant keeps MSR_RI clear */ +#define __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ + ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ + xori r10,r10,MSR_RI; /* Clear MSR_RI */ \ + mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ + LOAD_HANDLER(r12,label) \ + mtspr SPRN_##h##SRR0,r12; \ + mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ + mtspr SPRN_##h##SRR1,r10; \ + h##rfid; \ + b . /* prevent speculative execution */ + +#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ + __EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) + #define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ @@ -260,6 +275,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif +/* Do not enable RI */ +#define EXCEPTION_PROLOG_PSERIES_NORI(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1_NORI(label, h); + #define __KVM_HANDLER(area, h, n) \ BEGIN_FTR_SECTION_NESTED(947) \ @@ -308,6 +329,15 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define NOTEST(n) +#define EXCEPTION_PROLOG_COMMON_1() \ + std r9,_CCR(r1); /* save CR in stackframe */ \ + std r11,_NIP(r1); /* save SRR0 in stackframe */ \ + std r12,_MSR(r1); /* save SRR1 in stackframe */ \ + std r10,0(r1); /* make stack chain pointer */ \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r10,GPR1(r1); /* save r1 in stackframe */ \ + + /* * The common exception prolog is used for all except a few exceptions * such as a segment miss on a kernel address. We have to be prepared @@ -332,12 +362,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) addi r3,r13,area; /* r3 -> where regs are saved*/ \ RESTORE_CTR(r1, area); \ b bad_stack; \ -3: std r9,_CCR(r1); /* save CR in stackframe */ \ - std r11,_NIP(r1); /* save SRR0 in stackframe */ \ - std r12,_MSR(r1); /* save SRR1 in stackframe */ \ - std r10,0(r1); /* make stack chain pointer */ \ - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r10,GPR1(r1); /* save r1 in stackframe */ \ +3: EXCEPTION_PROLOG_COMMON_1(); \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9, r10); \ @@ -530,26 +555,39 @@ BEGIN_FTR_SECTION \ beql ppc64_runlatch_on_trampoline; \ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) -#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ +#define EXCEPTION_COMMON(area, trap, label, hdlr, ret, additions) \ + EXCEPTION_PROLOG_COMMON(trap, area); \ /* Volatile regs are potentially clobbered here */ \ additions; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ b ret +/* + * Exception where stack is already set in r1, r1 is saved in r10, and it + * continues rather than returns. + */ +#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ + EXCEPTION_PROLOG_COMMON_1(); \ + EXCEPTION_PROLOG_COMMON_2(area); \ + EXCEPTION_PROLOG_COMMON_3(trap); \ + /* Volatile regs are potentially clobbered here */ \ + additions; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr + #define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \ - ADD_NVGPRS;ADD_RECONCILE) + EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ + ret_from_except, ADD_NVGPRS;ADD_RECONCILE) /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur * in the idle task and therefore need the special idle handling * (finish nap and runlatch) */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ - FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) +#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ + EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ + ret_from_except_lite, FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 2c1d50792944..d96142572e6d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -64,6 +64,11 @@ struct iommu_table_ops { long index, unsigned long *hpa, enum dma_data_direction *direction); + /* Real mode */ + int (*exchange_rm)(struct iommu_table *tbl, + long index, + unsigned long *hpa, + enum dma_data_direction *direction); #endif void (*clear)(struct iommu_table *tbl, long index, long npages); @@ -114,6 +119,7 @@ struct iommu_table { struct list_head it_group_list;/* List of iommu_table_group_link */ unsigned long *it_userspace; /* userspace view of the table */ struct iommu_table_ops *it_ops; + struct kref it_kref; }; #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ @@ -146,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev) extern int dma_iommu_dma_supported(struct device *dev, u64 mask); -/* Frees table for an individual device node */ -extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); +extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl); +extern int iommu_tce_table_put(struct iommu_table *tbl); /* Initializes an iommu_table based in values set in the passed-in * structure @@ -208,6 +214,8 @@ extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); +extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index a114248de2ee..da06e0496699 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -29,10 +29,14 @@ extern void mm_iommu_init(struct mm_struct *mm); extern void mm_iommu_cleanup(struct mm_struct *mm); extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( + struct mm_struct *mm, unsigned long ua, unsigned long size); extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); +extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 140ddb9ae5a8..1c09f8fe2ee8 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -99,7 +99,6 @@ struct paca_struct { */ /* used for most interrupts/exceptions */ u64 exgen[13] __attribute__((aligned(0x80))); - u64 exmc[13]; /* used for machine checks */ u64 exslb[13]; /* used for SLB/segment table misses * on the linear mapping */ /* SLB related definitions */ @@ -180,15 +179,24 @@ struct paca_struct { struct paca_struct **thread_sibling_pacas; #endif +#ifdef CONFIG_PPC_STD_MMU_64 + /* Non-maskable exceptions that are not performance critical */ + u64 exnmi[13]; /* used for system reset (nmi) */ + u64 exmc[13]; /* used for machine checks */ +#endif #ifdef CONFIG_PPC_BOOK3S_64 - /* Exclusive emergency stack pointer for machine check exception. */ + /* Exclusive stacks for system reset and machine check exception. */ + void *nmi_emergency_sp; void *mc_emergency_sp; + + u16 in_nmi; /* In nmi handler */ + /* * Flag to check whether we are in machine check early handler * and already using emergency stack. */ u16 in_mce; - u8 hmi_event_available; /* HMI event is available */ + u8 hmi_event_available; /* HMI event is available */ #endif /* Stuff for accurate time accounting */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 751b2bd944fc..ebddb2111d87 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -42,6 +42,7 @@ struct smp_ops_t { #ifdef CONFIG_PPC_SMP_MUXED_IPI void (*cause_ipi)(int cpu); #endif + int (*cause_nmi_ipi)(int cpu); void (*probe)(void); int (*kick_cpu)(int nr); int (*prepare_cpu)(int nr); @@ -112,14 +113,22 @@ extern int cpu_to_core_id(int cpu); * * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up * in /proc/interrupts will be wrong!!! --Troy */ -#define PPC_MSG_CALL_FUNCTION 0 -#define PPC_MSG_RESCHEDULE 1 +#define PPC_MSG_CALL_FUNCTION 0 +#define PPC_MSG_RESCHEDULE 1 #define PPC_MSG_TICK_BROADCAST 2 -#define PPC_MSG_DEBUGGER_BREAK 3 +#define PPC_MSG_NMI_IPI 3 /* This is only used by the powernv kernel */ #define PPC_MSG_RM_HOST_ACTION 4 +#define NMI_IPI_ALL_OTHERS -2 + +#ifdef CONFIG_NMI_IPI +extern int smp_handle_nmi_ipi(struct pt_regs *regs); +#else +static inline int smp_handle_nmi_ipi(struct pt_regs *regs) { return 0; } +#endif + /* for irq controllers that have dedicated ipis per message (4) */ extern int smp_request_message_ipi(int virq, int message); extern const char *smp_ipi_name[]; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8e1163426ccb..439c257dec4a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -220,6 +220,7 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXSLB, paca_struct, exslb); + OFFSET(PACA_EXNMI, paca_struct, exnmi); OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); @@ -233,7 +234,9 @@ int main(void) OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); + OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); + OFFSET(PACA_IN_NMI, paca_struct, in_nmi); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 1fce4ddd2e6c..10cb2896b2ae 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -30,7 +30,7 @@ _GLOBAL(__setup_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -44,7 +44,7 @@ _GLOBAL(__restore_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR + bl __init_LPCR_ISA206 bl __init_tlb_power7 mtlr r11 blr @@ -62,7 +62,7 @@ _GLOBAL(__setup_cpu_power8) mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ - bl __init_LPCR + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -84,7 +84,7 @@ _GLOBAL(__restore_cpu_power8) mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ - bl __init_LPCR + bl __init_LPCR_ISA206 bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV @@ -108,7 +108,7 @@ _GLOBAL(__setup_cpu_power9) LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 li r4,0 /* LPES = 0 */ - bl __init_LPCR + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -132,7 +132,7 @@ _GLOBAL(__restore_cpu_power9) LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) andc r3, r3, r4 li r4,0 /* LPES = 0 */ - bl __init_LPCR + bl __init_LPCR_ISA300 bl __init_HFSCR bl __init_tlb_power9 bl __init_PMU_HV @@ -150,7 +150,7 @@ __init_hvmode_206: std r5,CPU_SPEC_FEATURES(r4) blr -__init_LPCR: +__init_LPCR_ISA206: /* Setup a sane LPCR: * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 * @@ -163,6 +163,11 @@ __init_LPCR: * * Other bits untouched for now */ + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 + + /* POWER9 has no VRMASD */ +__init_LPCR_ISA300: rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) li r5,4 @@ -170,8 +175,6 @@ __init_LPCR: clrrdi r3,r3,1 /* clear HDICE */ li r5,4 rldimi r3,r5, LPCR_VC_SH, 0 - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b94887165a10..c405c79e50cd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -724,7 +724,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ #define MAX_WAIT_FOR_RECOVERY 300 -static void eeh_handle_normal_event(struct eeh_pe *pe) +/** + * eeh_handle_normal_event - Handle EEH events on a specific PE + * @pe: EEH PE + * + * Attempts to recover the given PE. If recovery fails or the PE has failed + * too many times, remove the PE. + * + * Returns true if @pe should no longer be used, else false. + */ +static bool eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; struct eeh_dev *edev, *tmp; @@ -736,13 +745,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) if (!frozen_bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); - return; + return false; } eeh_pe_update_time_stamp(pe); pe->freeze_count++; - if (pe->freeze_count > eeh_max_freezes) - goto excess_failures; + if (pe->freeze_count > eeh_max_freezes) { + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); + goto hard_fail; + } pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); @@ -870,27 +884,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return; + return false; -excess_failures: +hard_fail: /* * About 90% of all real-life EEH failures in the field * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n" - "Please try reseating or replacing it.\n", - pe->phb->global_number, pe->addr, - pe->freeze_count); - goto perm_error; - -hard_fail: pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); -perm_error: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ @@ -915,10 +920,21 @@ perm_error: pci_lock_rescan_remove(); pci_hp_remove_devices(frozen_bus); pci_unlock_rescan_remove(); + + /* The passed PE should no longer be used */ + return true; } } + return false; } +/** + * eeh_handle_special_event - Handle EEH events without a specific failing PE + * + * Called when an EEH event is detected but can't be narrowed down to a + * specific PE. Iterates through possible failures and handles them as + * necessary. + */ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; @@ -982,7 +998,14 @@ static void eeh_handle_special_event(void) */ if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { - eeh_handle_normal_event(pe); + /* + * eeh_handle_normal_event() can make the PE stale if it + * determines that the PE cannot possibly be recovered. + * Don't modify the PE state if that's the case. + */ + if (eeh_handle_normal_event(pe)) + continue; + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 28f8d7bed6b1..ef72065f684c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,7 +116,11 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + /* + * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is + * being used, so a nested NMI exception would corrupt it. + */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x100) @@ -127,7 +131,34 @@ EXC_COMMON_BEGIN(system_reset_idle_common) b pnv_powersave_wakeup #endif -EXC_COMMON(system_reset_common, 0x100, system_reset_exception) +EXC_COMMON_BEGIN(system_reset_common) + /* + * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able + * to recover, but nested NMI will notice in_nmi and not recover + * because of the use of the NMI stack. in_nmi reentrancy is tested in + * system_reset_exception. + */ + lhz r10,PACA_IN_NMI(r13) + addi r10,r10,1 + sth r10,PACA_IN_NMI(r13) + li r10,MSR_RI + mtmsrd r10,1 + + mr r10,r1 + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, + system_reset, system_reset_exception, + ADD_NVGPRS;ADD_RECONCILE) + + /* + * The stack is no longer in use, decrement in_nmi. + */ + lhz r10,PACA_IN_NMI(r13) + subi r10,r10,1 + sth r10,PACA_IN_NMI(r13) + + b ret_from_except #ifdef CONFIG_PPC_PSERIES /* @@ -135,8 +166,9 @@ EXC_COMMON(system_reset_common, 0x100, system_reset_exception) */ TRAMP_REAL_BEGIN(system_reset_fwnmi) SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) + /* See comment at system_reset exception */ + EXCEPTION_PROLOG_PSERIES_NORI(PACA_EXNMI, system_reset_common, + EXC_STD, NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ @@ -240,20 +272,11 @@ machine_check_fwnmi: machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* - * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the - * difference that MSR_RI is not enabled, because PACA_EXMC is being - * used, so nested machine check corrupts it. machine_check_common - * enables MSR_RI. + * MSR_RI is not enabled, because PACA_EXMC is being used, so a + * nested machine check corrupts it. machine_check_common enables + * MSR_RI. */ - ld r10,PACAKMSR(r13) - xori r10,r10,MSR_RI - mfspr r11,SPRN_SRR0 - LOAD_HANDLER(r12, machine_check_common) - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 - mtspr SPRN_SRR1,r10 - rfid - b . /* prevent speculative execution */ + EXCEPTION_PROLOG_PSERIES_1_NORI(machine_check_common, EXC_STD) TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -368,9 +391,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ BEGIN_FTR_SECTION rlwinm. r11,r12,47-31,30,31 - beq- 4f - BRANCH_TO_COMMON(r10, machine_check_idle_common) -4: + bne machine_check_idle_common END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif @@ -938,17 +959,12 @@ EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ + EXCEPTION_PROLOG_COMMON_1() EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5f202a566ec5..5a3231fedf08 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) return tbl; } -void iommu_free_table(struct iommu_table *tbl, const char *node_name) +static void iommu_table_free(struct kref *kref) { unsigned long bitmap_sz; unsigned int order; + struct iommu_table *tbl; - if (!tbl) - return; + tbl = container_of(kref, struct iommu_table, it_kref); + + if (tbl->it_ops->free) + tbl->it_ops->free(tbl); if (!tbl->it_map) { kfree(tbl); @@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); + pr_warn("%s: Unexpected TCEs\n", __func__); /* calculate bitmap size in bytes */ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) kfree(tbl); } +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl) +{ + if (kref_get_unless_zero(&tbl->it_kref)) + return tbl; + + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_tce_table_get); + +int iommu_tce_table_put(struct iommu_table *tbl) +{ + if (WARN_ON(!tbl)) + return 0; + + return kref_put(&tbl->it_kref, iommu_table_free); +} +EXPORT_SYMBOL_GPL(iommu_tce_table_put); + /* Creates TCEs for a user provided buffer. The user buffer must be * contiguous real kernel storage (not vmalloc). The address passed here * comprises a page address and offset into that page. The dma_addr_t @@ -1004,6 +1025,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); +#ifdef CONFIG_PPC_BOOK3S_64 +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); + + if (likely(pg)) { + SetPageDirty(pg); + } else { + tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = -EFAULT; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); +#endif + int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 16eb0b508761..5f9eada3519b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -221,6 +221,8 @@ static void machine_check_process_queued_event(struct irq_work *work) { int index; + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* * For now just print it to console. * TODO: log this error event to FSP or nvram. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 729e990a019d..0f7b15860a06 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -628,6 +628,11 @@ void __init emergency_stack_init(void) paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for NMI exception handling. */ + ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); + klp_init_thread_info(ti); + paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; + /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); klp_init_thread_info(ti); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2eca1e491e2b..1eef9e73e6a3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -87,8 +87,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS]; int smt_enabled_at_boot = 1; -static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; - /* * Returns 1 if the specified cpu should be brought up during boot. * Used to inhibit booting threads if they've been disabled or @@ -159,32 +157,33 @@ static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +#ifdef CONFIG_NMI_IPI +static irqreturn_t nmi_ipi_action(int irq, void *data) { - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - return IRQ_HANDLED; - } - -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); -#endif /* CONFIG_DEBUGGER */ - + smp_handle_nmi_ipi(get_irq_regs()); return IRQ_HANDLED; } +#endif static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, - [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, +#ifdef CONFIG_NMI_IPI + [PPC_MSG_NMI_IPI] = nmi_ipi_action, +#endif }; +/* + * The NMI IPI is a fallback and not truly non-maskable. It is simpler + * than going through the call function infrastructure, and strongly + * serialized, so it is more appropriate for debugging. + */ const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", - [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", + [PPC_MSG_NMI_IPI] = "nmi ipi", }; /* optional function to request ipi, for controllers with >= 4 ipis */ @@ -192,14 +191,13 @@ int smp_request_message_ipi(int virq, int msg) { int err; - if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { + if (msg < 0 || msg > PPC_MSG_NMI_IPI) return -EINVAL; - } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) - if (msg == PPC_MSG_DEBUGGER_BREAK) { +#ifndef CONFIG_NMI_IPI + if (msg == PPC_MSG_NMI_IPI) return 1; - } #endif + err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, smp_ipi_name[msg], NULL); @@ -277,8 +275,10 @@ irqreturn_t smp_ipi_demux_relaxed(void) scheduler_ipi(); if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) tick_broadcast_ipi_handler(); - if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) - debug_ipi_action(0, NULL); +#ifdef CONFIG_NMI_IPI + if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) + nmi_ipi_action(0, NULL); +#endif } while (info->messages); return IRQ_HANDLED; @@ -315,6 +315,187 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_NMI_IPI + +/* + * "NMI IPI" system. + * + * NMI IPIs may not be recoverable, so should not be used as ongoing part of + * a running system. They can be used for crash, debug, halt/reboot, etc. + * + * NMI IPIs are globally single threaded. No more than one in progress at + * any time. + * + * The IPI call waits with interrupts disabled until all targets enter the + * NMI handler, then the call returns. + * + * No new NMI can be initiated until targets exit the handler. + * + * The IPI call may time out without all targets entering the NMI handler. + * In that case, there is some logic to recover (and ignore subsequent + * NMI interrupts that may eventually be raised), but the platform interrupt + * handler may not be able to distinguish this from other exception causes, + * which may cause a crash. + */ + +static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); +static struct cpumask nmi_ipi_pending_mask; +static int nmi_ipi_busy_count = 0; +static void (*nmi_ipi_function)(struct pt_regs *) = NULL; + +static void nmi_ipi_lock_start(unsigned long *flags) +{ + raw_local_irq_save(*flags); + hard_irq_disable(); + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + raw_local_irq_restore(*flags); + cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); + } +} + +static void nmi_ipi_lock(void) +{ + while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + cpu_relax(); +} + +static void nmi_ipi_unlock(void) +{ + smp_mb(); + WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); + atomic_set(&__nmi_ipi_lock, 0); +} + +static void nmi_ipi_unlock_end(unsigned long *flags) +{ + nmi_ipi_unlock(); + raw_local_irq_restore(*flags); +} + +/* + * Platform NMI handler calls this to ack + */ +int smp_handle_nmi_ipi(struct pt_regs *regs) +{ + void (*fn)(struct pt_regs *); + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 0; + + /* + * Unexpected NMIs are possible here because the interrupt may not + * be able to distinguish NMI IPIs from other types of NMIs, or + * because the caller may have timed out. + */ + nmi_ipi_lock_start(&flags); + if (!nmi_ipi_busy_count) + goto out; + if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask)) + goto out; + + fn = nmi_ipi_function; + if (!fn) + goto out; + + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + ret = 1; + + fn(regs); + + nmi_ipi_lock(); + nmi_ipi_busy_count--; +out: + nmi_ipi_unlock_end(&flags); + + return ret; +} + +static void do_smp_send_nmi_ipi(int cpu) +{ + if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + return; + + if (cpu >= 0) { + do_message_pass(cpu, PPC_MSG_NMI_IPI); + } else { + int c; + + for_each_online_cpu(c) { + if (c == raw_smp_processor_id()) + continue; + do_message_pass(c, PPC_MSG_NMI_IPI); + } + } +} + +/* + * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. + * - fn is the target callback function. + * - delay_us > 0 is the delay before giving up waiting for targets to + * enter the handler, == 0 specifies indefinite delay. + */ +static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + unsigned long flags; + int me = raw_smp_processor_id(); + int ret = 1; + + BUG_ON(cpu == me); + BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); + + if (unlikely(!smp_ops)) + return 0; + + /* Take the nmi_ipi_busy count/lock with interrupts hard disabled */ + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + cpu_relax(); + nmi_ipi_lock_start(&flags); + } + + nmi_ipi_function = fn; + + if (cpu < 0) { + /* ALL_OTHERS */ + cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); + cpumask_clear_cpu(me, &nmi_ipi_pending_mask); + } else { + /* cpumask starts clear */ + cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); + } + nmi_ipi_busy_count++; + nmi_ipi_unlock(); + + do_smp_send_nmi_ipi(cpu); + + while (!cpumask_empty(&nmi_ipi_pending_mask)) { + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + break; + } + } + + nmi_ipi_lock(); + if (!cpumask_empty(&nmi_ipi_pending_mask)) { + /* Could not gather all CPUs */ + ret = 0; + cpumask_clear(&nmi_ipi_pending_mask); + } + nmi_ipi_busy_count--; + nmi_ipi_unlock_end(&flags); + + return ret; +} +#endif /* CONFIG_NMI_IPI */ + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { @@ -325,29 +506,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) -void smp_send_debugger_break(void) +#ifdef CONFIG_DEBUGGER +void debugger_ipi_callback(struct pt_regs *regs) { - int cpu; - int me = raw_smp_processor_id(); - - if (unlikely(!smp_ops)) - return; + debugger_ipi(regs); +} - for_each_online_cpu(cpu) - if (cpu != me) - do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +void smp_send_debugger_break(void) +{ + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); } #endif #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { - crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback) { - mb(); - smp_send_debugger_break(); - } + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); } #endif @@ -441,7 +615,14 @@ int generic_cpu_disable(void) /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); - /* Give the CPU time to drain in-flight ones */ + /* + * Depending on the details of the interrupt controller, it's possible + * that one of the interrupts we just migrated away from this CPU is + * actually already pending on this CPU. If we leave it in that state + * the interrupt will never be EOI'ed, and will never fire again. So + * temporarily enable interrupts here, to allow any pending interrupt to + * be received (and EOI'ed), before we take this CPU offline. + */ local_irq_enable(); mdelay(1); local_irq_disable(); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 949957b97ead..4437c70c7c2b 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -789,9 +789,9 @@ static int register_cpu_online(unsigned int cpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU static int unregister_cpu_online(unsigned int cpu) { -#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -870,9 +870,11 @@ static int unregister_cpu_online(unsigned int cpu) cacheinfo_cpu_offline(cpu); of_node_put(s->of_node); s->of_node = NULL; -#endif /* CONFIG_HOTPLUG_CPU */ return 0; } +#else /* !CONFIG_HOTPLUG_CPU */ +#define unregister_cpu_online NULL +#endif #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ssize_t arch_cpu_probe(const char *buf, size_t count) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 76f6045b021b..d4e545d27ef9 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -279,18 +279,35 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) void system_reset_exception(struct pt_regs *regs) { + /* + * Avoid crashes in case of nested NMI exceptions. Recoverability + * is determined by RI and in_nmi + */ + bool nested = in_nmi(); + if (!nested) + nmi_enter(); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) - return; + goto out; } die("System Reset", regs, SIGABRT); +out: +#ifdef CONFIG_PPC_BOOK3S_64 + BUG_ON(get_paca()->in_nmi == 0); + if (get_paca()->in_nmi > 1) + panic("Unrecoverable nested System Reset"); +#endif /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); + if (!nested) + nmi_exit(); + /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -306,8 +323,6 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); - add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; @@ -741,6 +756,8 @@ void machine_check_exception(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 96f835cbf212..e0a2d8e806ed 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); +struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, + unsigned long ua, unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); + struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries) { @@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, } EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); +long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + void *va = &mem->hpas[entry]; + unsigned long *pa; + + if (entry >= mem->entries) + return -EFAULT; + + pa = (void *) vmalloc_to_phys(va); + if (!pa) + return -EFAULT; + + *hpa = *pa | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); + long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) { if (atomic64_inc_not_zero(&mem->mapped)) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 5e17c4e873a5..02e71402fdd3 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -17,7 +17,6 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -static DEFINE_RAW_SPINLOCK(native_tlbie_lock); #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -203,15 +202,9 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else + else _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); @@ -235,15 +228,9 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_pid(pid, RIC_FLUSH_PWC); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else + else tlbiel_pwc(pid); no_context: preempt_enable(); @@ -260,15 +247,9 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else + else _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); @@ -289,13 +270,7 @@ EXPORT_SYMBOL(radix__flush_tlb_page); void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); _tlbie_pid(0, RIC_FLUSH_ALL); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); } EXPORT_SYMBOL(radix__flush_tlb_kernel_range); @@ -357,7 +332,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long addr; int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; @@ -378,13 +352,8 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, if (local) _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); - else { - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + else _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } } err_out: preempt_enable(); diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index b625a2c6f4f2..e4c745981912 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -33,7 +33,6 @@ config PPC_EFIKA bool "bPlan Efika 5k2. MPC5200B based computer" depends on PPC_MPC52xx select PPC_RTAS - select RTAS_PROC select PPC_NATIVE config PPC_LITE5200 diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 0975066f76e8..f51fd35f4618 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -344,6 +344,7 @@ done: } struct smp_ops_t smp_85xx_ops = { + .cause_nmi_ipi = NULL, .kick_cpu = smp_85xx_kick_cpu, .cpu_bootable = smp_generic_cpu_bootable, #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index af09baee22cb..020e84a47a32 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -105,6 +105,7 @@ smp_86xx_setup_cpu(int cpu_nr) struct smp_ops_t smp_86xx_ops = { + .cause_nmi_ipi = NULL, .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_86xx_kick_cpu, diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index a6bbbaba14a3..871d38479a25 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -211,7 +211,7 @@ void iic_request_IPIs(void) iic_request_ipi(PPC_MSG_CALL_FUNCTION); iic_request_ipi(PPC_MSG_RESCHEDULE); iic_request_ipi(PPC_MSG_TICK_BROADCAST); - iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); + iic_request_ipi(PPC_MSG_NMI_IPI); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index e7d075077cb0..a88944db9fc3 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -88,11 +88,14 @@ static void cbe_power_save(void) static int cbe_system_reset_exception(struct pt_regs *regs) { switch (regs->msr & SRR1_WAKEMASK) { - case SRR1_WAKEEE: - do_IRQ(regs); - break; case SRR1_WAKEDEC: - timer_interrupt(regs); + set_dec(1); + case SRR1_WAKEEE: + /* + * Handle these when interrupts get re-enabled and we take + * them as regular exceptions. We are in an NMI context + * and can't handle these here. + */ break; case SRR1_WAKEMT: return cbe_sysreset_hack(); diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index b6c9a0dcc924..14515040f7cd 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -44,6 +44,7 @@ static void smp_chrp_setup_cpu(int cpu_nr) /* CHRP with openpic */ struct smp_ops_t chrp_smp_ops = { + .cause_nmi_ipi = NULL, .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_chrp_kick_cpu, diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 75b296bc51af..44e0d9226f0a 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -53,11 +53,14 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) regs->nip = regs->link; switch (regs->msr & SRR1_WAKEMASK) { - case SRR1_WAKEEE: - do_IRQ(regs); - break; case SRR1_WAKEDEC: - timer_interrupt(regs); + set_dec(1); + case SRR1_WAKEEE: + /* + * Handle these when interrupts get re-enabled and we take + * them as regular exceptions. We are in an NMI context + * and can't handle these here. + */ break; default: /* do system reset */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index a3207cea360e..2cd99eb30762 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -447,6 +447,7 @@ void __init smp_psurge_give_timebase(void) struct smp_ops_t psurge_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = smp_psurge_cause_ipi, + .cause_nmi_ipi = NULL, .probe = smp_psurge_probe, .kick_cpu = smp_psurge_kick_cpu, .setup_cpu = smp_psurge_setup_cpu, diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 6fb5522acd70..d12ea7b9fd47 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -412,11 +412,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * been set for the PE, we will set EEH_PE_CFG_BLOCKED for * that PE to block its config space. * + * Broadcom BCM5718 2-ports NICs (14e4:1656) * Broadcom Austin 4-ports NICs (14e4:1657) * Broadcom Shiner 4-ports 1G NICs (14e4:168a) * Broadcom Shiner 2-ports 10G NICs (14e4:168e) */ if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x1656) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && pdn->device_id == 0x1657) || (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && pdn->device_id == 0x168a) || @@ -1102,6 +1105,13 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) return -EIO; } + /* + * If dealing with the root bus (or the bus underneath the + * root port), we reset the bus underneath the root port. + * + * The cxl driver depends on this behaviour for bi-modal card + * switching. + */ if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) return pnv_eeh_root_reset(hose, option); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 4c88c3e6ec9e..067defeea691 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -203,7 +203,7 @@ long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); return rc; } - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); /* Add the table to the list so its TCE cache will get invalidated */ pnv_pci_link_table_and_group(phb->hose->node, num, @@ -227,7 +227,7 @@ long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) pe_err(npe, "Unmapping failed, ret = %lld\n", rc); return rc; } - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); pnv_pci_unlink_table_and_group(npe->table_group.tables[num], &npe->table_group); @@ -293,7 +293,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) 0 /* bypass base */, top); if (rc == OPAL_SUCCESS) - pnv_pci_phb3_tce_invalidate_entire(phb, false); + pnv_pci_ioda2_tce_invalidate_entire(phb, false); return rc; } @@ -357,7 +357,7 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) pe_err(npe, "Failed to disable bypass, err %lld\n", rc); return; } - pnv_pci_phb3_tce_invalidate_entire(npe->phb, false); + pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); } struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7eebc76721ea..6fdbd383f676 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1425,8 +1425,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_group_put(pe->table_group.group); BUG_ON(pe->table_group.group); } - pnv_pci_ioda2_table_free_pages(tbl); - iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); + iommu_tce_table_put(tbl); } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) @@ -1861,6 +1860,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, return ret; } + +static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret) + pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true); + + return ret; +} #endif static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, @@ -1875,6 +1885,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build, #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda1_tce_xchg, + .exchange_rm = pnv_ioda1_tce_xchg_rm, #endif .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, @@ -1884,7 +1895,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { #define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) #define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) -void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) { __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; @@ -1949,7 +1960,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, { struct iommu_table_group_link *tgl; - list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) { struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); struct pnv_phb *phb = pe->phb; @@ -1980,6 +1991,14 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +{ + if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) + pnv_pci_phb3_tce_invalidate_entire(phb, rm); + else + opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, @@ -2005,6 +2024,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, return ret; } + +static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret) + pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true); + + return ret; +} #endif static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, @@ -2018,13 +2048,13 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, static void pnv_ioda2_table_free(struct iommu_table *tbl) { pnv_pci_ioda2_table_free_pages(tbl); - iommu_free_table(tbl, "pnv"); } static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API .exchange = pnv_ioda2_tce_xchg, + .exchange_rm = pnv_ioda2_tce_xchg_rm, #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, @@ -2129,6 +2159,9 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, found: tbl = pnv_pci_table_alloc(phb->hose->node); + if (WARN_ON(!tbl)) + return; + iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); @@ -2204,7 +2237,7 @@ found: __free_pages(tce_mem, get_order(tce32_segsz * segs)); if (tbl) { pnv_pci_unlink_table_and_group(tbl, &pe->table_group); - iommu_free_table(tbl, "pnv"); + iommu_tce_table_put(tbl); } } @@ -2294,16 +2327,16 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, if (!tbl) return -ENOMEM; + tbl->it_ops = &pnv_ioda2_iommu_ops; + ret = pnv_pci_ioda2_table_alloc_pages(nid, bus_offset, page_shift, window_size, levels, tbl); if (ret) { - iommu_free_table(tbl, "pnv"); + iommu_tce_table_put(tbl); return ret; } - tbl->it_ops = &pnv_ioda2_iommu_ops; - *ptbl = tbl; return 0; @@ -2344,7 +2377,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) if (rc) { pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc); - pnv_ioda2_table_free(tbl); + iommu_tce_table_put(tbl); return rc; } @@ -2415,7 +2448,8 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, tce_table_size /= direct_table_size; tce_table_size <<= 3; - tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size); + tce_table_size = max_t(unsigned long, + tce_table_size, direct_table_size); } return bytes; @@ -2432,7 +2466,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (pe->pbus) pnv_ioda_setup_bus_dma(pe, pe->pbus, false); - pnv_ioda2_table_free(tbl); + iommu_tce_table_put(tbl); } static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) @@ -3405,7 +3439,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) } free_pages(tbl->it_base, get_order(tbl->it_size << 3)); - iommu_free_table(tbl, "pnv"); + iommu_tce_table_put(tbl); } static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) @@ -3432,7 +3466,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) } pnv_pci_ioda2_table_free_pages(tbl); - iommu_free_table(tbl, "pnv"); + iommu_tce_table_put(tbl); } static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index a43f22dc069e..935ccb249a8a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -766,7 +766,11 @@ struct iommu_table *pnv_pci_table_alloc(int nid) struct iommu_table *tbl; tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + if (!tbl) + return NULL; + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + kref_init(&tbl->it_kref); return tbl; } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 4eab713136d1..18c8a2fa03b8 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -242,7 +242,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); -extern void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm); +extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, struct iommu_table *tbl); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 5189445db164..4aff754b6f2c 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -302,6 +302,7 @@ static void __init pnv_smp_probe(void) static struct smp_ops_t pnv_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */ + .cause_nmi_ipi = NULL, .probe = pnv_smp_probe, .prepare_cpu = pnv_smp_prepare_cpu, .kick_cpu = pnv_smp_kick_cpu, diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 60154d08debf..1d1ad5df106f 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -77,7 +77,7 @@ static void __init ps3_smp_probe(void) BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0); BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1); BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2); - BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3); + BUILD_BUG_ON(PPC_MSG_NMI_IPI != 3); for (i = 0; i < MSG_COUNT; i++) { result = ps3_event_receive_port_setup(cpu, &virqs[i]); @@ -96,7 +96,7 @@ static void __init ps3_smp_probe(void) ps3_register_ipi_irq(cpu, virqs[i]); } - ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_DEBUGGER_BREAK]); + ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_NMI_IPI]); DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu); } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4d757eaa46bf..8374adee27e3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -74,6 +74,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) goto fail_exit; INIT_LIST_HEAD_RCU(&tbl->it_group_list); + kref_init(&tbl->it_kref); tgl->table_group = table_group; list_add_rcu(&tgl->next, &tbl->it_group_list); @@ -115,7 +116,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, BUG_ON(table_group->group); } #endif - iommu_free_table(tbl, node_name); + iommu_tce_table_put(tbl); kfree(table_group); } @@ -550,6 +551,7 @@ static void iommu_table_setparms(struct pci_controller *phb, static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl, + struct iommu_table_group *table_group, const __be32 *dma_window) { unsigned long offset, size; @@ -563,6 +565,9 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_type = TCE_PCI; tbl->it_offset = offset >> tbl->it_page_shift; tbl->it_size = size >> tbl->it_page_shift; + + table_group->tce32_start = offset; + table_group->tce32_size = size; } struct iommu_table_ops iommu_table_pseries_ops = { @@ -651,8 +656,38 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); } +#ifdef CONFIG_IOMMU_API +static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned + long *tce, enum dma_data_direction *direction) +{ + long rc; + unsigned long ioba = (unsigned long) index << tbl->it_page_shift; + unsigned long flags, oldtce = 0; + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *tce | proto_tce; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + + rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); + if (!rc) + rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); + + if (!rc) { + *direction = iommu_tce_direction(oldtce); + *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + } + + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); + + return rc; +} +#endif + struct iommu_table_ops iommu_table_lpar_multi_ops = { .set = tce_buildmulti_pSeriesLP, +#ifdef CONFIG_IOMMU_API + .exchange = tce_exchange_pseries, +#endif .clear = tce_freemulti_pSeriesLP, .get = tce_get_pSeriesLP }; @@ -689,7 +724,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, + ppci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; iommu_init_table(tbl, ppci->phb->node); iommu_register_group(ppci->table_group, @@ -1143,7 +1179,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); + iommu_table_setparms_lpar(pci->phb, pdn, tbl, + pci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; iommu_init_table(tbl, pci->phb->node); iommu_register_group(pci->table_group, diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 904a677208d1..bb70b26334f0 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -386,6 +386,10 @@ int pSeries_system_reset_exception(struct pt_regs *regs) } fwnmi_release_errinfo(); } + + if (smp_handle_nmi_ipi(regs)) + return 1; + return 0; /* need to perform reset */ } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 12ff5bef67fd..52ca6b311d44 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -189,6 +189,27 @@ static void smp_pseries_cause_ipi(int cpu) icp_ops->cause_ipi(cpu); } +static int pseries_cause_nmi_ipi(int cpu) +{ + int hwcpu; + + if (cpu == NMI_IPI_ALL_OTHERS) { + hwcpu = H_SIGNAL_SYS_RESET_ALL_OTHERS; + } else { + if (cpu < 0) { + WARN_ONCE(true, "incorrect cpu parameter %d", cpu); + return 0; + } + + hwcpu = get_hard_smp_processor_id(cpu); + } + + if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS) + return 1; + + return 0; +} + static __init void pSeries_smp_probe(void) { xics_smp_probe(); @@ -202,6 +223,7 @@ static __init void pSeries_smp_probe(void) static struct smp_ops_t pseries_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */ + .cause_nmi_ipi = pseries_cause_nmi_ipi, .probe = pSeries_smp_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_setup_cpu, diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 720493932486..28b09fd797ec 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1318,7 +1318,7 @@ static void vio_dev_release(struct device *dev) struct iommu_table *tbl = get_iommu_table_base(dev); if (tbl) - iommu_free_table(tbl, of_node_full_name(dev->of_node)); + iommu_tce_table_put(tbl); of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } diff --git a/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh index c658d8cf760b..c658d8cf760b 100755 --- a/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh +++ b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh diff --git a/arch/powerpc/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index ec2d5c835170..ec2d5c835170 100755 --- a/arch/powerpc/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 2b21e90fff8d..f11f65634aab 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -418,7 +418,22 @@ int cpus_are_in_xmon(void) { return !cpumask_empty(&cpus_in_xmon); } -#endif + +static bool wait_for_other_cpus(int ncpus) +{ + unsigned long timeout; + + /* We wait for 2s, which is a metric "little while" */ + for (timeout = 20000; timeout != 0; --timeout) { + if (cpumask_weight(&cpus_in_xmon) >= ncpus) + return true; + udelay(100); + barrier(); + } + + return false; +} +#endif /* CONFIG_SMP */ static inline int unrecoverable_excp(struct pt_regs *regs) { @@ -440,7 +455,6 @@ static int xmon_core(struct pt_regs *regs, int fromipi) #ifdef CONFIG_SMP int cpu; int secondary; - unsigned long timeout; #endif local_irq_save(flags); @@ -527,13 +541,17 @@ static int xmon_core(struct pt_regs *regs, int fromipi) xmon_owner = cpu; mb(); if (ncpus > 1) { - smp_send_debugger_break(); - /* wait for other cpus to come in */ - for (timeout = 100000000; timeout != 0; --timeout) { - if (cpumask_weight(&cpus_in_xmon) >= ncpus) - break; - barrier(); - } + /* + * A system reset (trap == 0x100) can be triggered on + * all CPUs, so when we come in via 0x100 try waiting + * for the other CPUs to come in before we send the + * debugger break (IPI). This is similar to + * crash_kexec_secondary(). + */ + if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus)) + smp_send_debugger_break(); + + wait_for_other_cpus(ncpus); } remove_bpts(); disable_surveillance(); @@ -1351,9 +1369,19 @@ const char *getvecname(unsigned long vec) case 0x100: ret = "(System Reset)"; break; case 0x200: ret = "(Machine Check)"; break; case 0x300: ret = "(Data Access)"; break; - case 0x380: ret = "(Data SLB Access)"; break; + case 0x380: + if (radix_enabled()) + ret = "(Data Access Out of Range)"; + else + ret = "(Data SLB Access)"; + break; case 0x400: ret = "(Instruction Access)"; break; - case 0x480: ret = "(Instruction SLB Access)"; break; + case 0x480: + if (radix_enabled()) + ret = "(Instruction Access Out of Range)"; + else + ret = "(Instruction SLB Access)"; + break; case 0x500: ret = "(Hardware Interrupt)"; break; case 0x600: ret = "(Alignment)"; break; case 0x700: ret = "(Program Check)"; break; @@ -2235,7 +2263,9 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_msr, "lx"); DUMP(p, emergency_sp, "p"); #ifdef CONFIG_PPC_BOOK3S_64 + DUMP(p, nmi_emergency_sp, "p"); DUMP(p, mc_emergency_sp, "p"); + DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); #endif |