From 3b702ddd066813952154c22dd76d3b0c10644940 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 4 Jan 2019 22:31:52 +0100 Subject: powerpc/hvsi: Fix spelling mistake: "lenght" should be "length" Signed-off-by: Matteo Croce Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h index 3fdc54df63c9..464a7519ed64 100644 --- a/arch/powerpc/include/asm/hvsi.h +++ b/arch/powerpc/include/asm/hvsi.h @@ -64,7 +64,7 @@ struct hvsi_priv { unsigned int inbuf_len; /* data in input buffer */ unsigned char inbuf[HVSI_INBUF_SIZE]; unsigned int inbuf_cur; /* Cursor in input buffer */ - unsigned int inbuf_pktlen; /* packet lenght from cursor */ + unsigned int inbuf_pktlen; /* packet length from cursor */ atomic_t seqno; /* packet sequence number */ unsigned int opened:1; /* driver opened */ unsigned int established:1; /* protocol established */ -- cgit v1.2.3 From 8acb88682cc00a41a677c2455a7c992d78e43035 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Jan 2019 15:08:00 +0000 Subject: powerpc/ipic: drop unused functions ipic_set_highest_priority(), ipic_enable_mcp() and ipic_disable_mcp() are unused. This patch drops them. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ipic.h | 3 --- arch/powerpc/sysdev/ipic.c | 35 ----------------------------------- 2 files changed, 38 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h index 3dbd47f2bffe..abad50a745db 100644 --- a/arch/powerpc/include/asm/ipic.h +++ b/arch/powerpc/include/asm/ipic.h @@ -69,10 +69,7 @@ enum ipic_mcp_irq { IPIC_MCP_MU = 7, }; -extern void ipic_set_highest_priority(unsigned int irq); extern void ipic_set_default_priority(void); -extern void ipic_enable_mcp(enum ipic_mcp_irq mcp_irq); -extern void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq); extern u32 ipic_get_mcp_status(void); extern void ipic_clear_mcp_status(u32 mask); diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 8030a0f55e96..fd129c8ecceb 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -771,21 +771,6 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) return ipic; } -void ipic_set_highest_priority(unsigned int virq) -{ - struct ipic *ipic = ipic_from_irq(virq); - unsigned int src = virq_to_hw(virq); - u32 temp; - - temp = ipic_read(ipic->regs, IPIC_SICFR); - - /* clear and set HPI */ - temp &= 0x7f000000; - temp |= (src & 0x7f) << 24; - - ipic_write(ipic->regs, IPIC_SICFR, temp); -} - void ipic_set_default_priority(void) { ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT); @@ -796,26 +781,6 @@ void ipic_set_default_priority(void) ipic_write(primary_ipic->regs, IPIC_SMPRR_B, IPIC_PRIORITY_DEFAULT); } -void ipic_enable_mcp(enum ipic_mcp_irq mcp_irq) -{ - struct ipic *ipic = primary_ipic; - u32 temp; - - temp = ipic_read(ipic->regs, IPIC_SERMR); - temp |= (1 << (31 - mcp_irq)); - ipic_write(ipic->regs, IPIC_SERMR, temp); -} - -void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq) -{ - struct ipic *ipic = primary_ipic; - u32 temp; - - temp = ipic_read(ipic->regs, IPIC_SERMR); - temp &= (1 << (31 - mcp_irq)); - ipic_write(ipic->regs, IPIC_SERMR, temp); -} - u32 ipic_get_mcp_status(void) { return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0; -- cgit v1.2.3 From cd6b8a631c5de3a6b7c8ef30337fd02bd8210a44 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 14 Jan 2019 11:38:49 +1100 Subject: powerpc/mm: Fix compile when CONFIG_PPC_RADIX_MMU is not defined This adds some stubs for hash only configs. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 30 +++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 671316f9e95d..05147cecb8df 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -13,8 +13,32 @@ static inline int mmu_get_ap(int psize) #ifdef CONFIG_PPC_RADIX_MMU extern void radix__tlbiel_all(unsigned int action); +extern void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size); +extern void radix__flush_pwc_lpid(unsigned int lpid); +extern void radix__flush_tlb_lpid(unsigned int lpid); +extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); #else static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +static inline void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size) +{ + WARN_ON(1); +} +static inline void radix__flush_pwc_lpid(unsigned int lpid) +{ + WARN_ON(1); +} +static inline void radix__flush_tlb_lpid(unsigned int lpid) +{ + WARN_ON(1); +} +static inline void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +{ + WARN_ON(1); +} #endif extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, @@ -49,12 +73,6 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_all(void); -extern void radix__flush_tlb_lpid_page(unsigned int lpid, - unsigned long addr, - unsigned long page_size); -extern void radix__flush_pwc_lpid(unsigned int lpid); -extern void radix__flush_tlb_lpid(unsigned int lpid); extern void radix__local_flush_tlb_lpid(unsigned int lpid); -extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); #endif -- cgit v1.2.3 From c35f78d7a422750917029d20d9e57000b1181d75 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 14 Jan 2019 11:40:27 +1100 Subject: powerpc/powernv: Remove never used pnv_power9_force_smt4 This removes never used symbol - pnv_power9_force_smt4. Note that we might still want to add stubs for: void pnv_power9_force_smt4_catch(void); void pnv_power9_force_smt4_release(void); Fixes: 7672691a08c88 "powerpc/powernv: Provide a way to force a core into SMT4 mode" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/powernv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index 2f3ff7a27881..362ea12a4501 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -40,7 +40,6 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context, } static inline void pnv_tm_init(void) { } -static inline void pnv_power9_force_smt4(void) { } #endif #endif /* _ASM_POWERNV_H */ -- cgit v1.2.3 From 81b61324922c67f73813d8a9c175f3c153f6a1c6 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 29 Oct 2018 13:43:36 -0500 Subject: powerpc/pseries: Perform full re-add of CPU for topology update post-migration On pseries systems, performing a partition migration can result in altering the nodes a CPU is assigned to on the destination system. For exampl, pre-migration on the source system CPUs are in node 1 and 3, post-migration on the destination system CPUs are in nodes 2 and 3. Handling the node change for a CPU can cause corruption in the slab cache if we hit a timing where a CPUs node is changed while cache_reap() is invoked. The corruption occurs because the slab cache code appears to rely on the CPU and slab cache pages being on the same node. The current dynamic updating of a CPUs node done in arch/powerpc/mm/numa.c does not prevent us from hitting this scenario. Changing the device tree property update notification handler that recognizes an affinity change for a CPU to do a full DLPAR remove and add of the CPU instead of dynamically changing its node resolves this issue. Signed-off-by: Nathan Fontenot Signed-off-by: Michael W. Bringmann Tested-by: Michael W. Bringmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/topology.h | 2 ++ arch/powerpc/mm/numa.c | 9 +-------- arch/powerpc/platforms/pseries/hotplug-cpu.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index a4a718dbfec6..f85e2b01c3df 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -132,6 +132,8 @@ static inline void shared_proc_topology_init(void) {} #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) + +int dlpar_cpu_readd(int cpu); #endif #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 87f0dd004295..b5d1c45c1475 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1460,13 +1460,6 @@ static void reset_topology_timer(void) #ifdef CONFIG_SMP -static void stage_topology_update(int core_id) -{ - cpumask_or(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, cpu_sibling_mask(core_id)); - reset_topology_timer(); -} - static int dt_update_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -1479,7 +1472,7 @@ static int dt_update_callback(struct notifier_block *nb, !of_prop_cmp(update->prop->name, "ibm,associativity")) { u32 core_id; of_property_read_u32(update->dn, "reg", &core_id); - stage_topology_update(core_id); + rc = dlpar_cpu_readd(core_id); rc = NOTIFY_OK; } break; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 2f8e62163602..97feb6e79f1a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -802,6 +802,25 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) return rc; } +int dlpar_cpu_readd(int cpu) +{ + struct device_node *dn; + struct device *dev; + u32 drc_index; + int rc; + + dev = get_cpu_device(cpu); + dn = dev->of_node; + + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + + rc = dlpar_cpu_remove_by_index(drc_index); + if (!rc) + rc = dlpar_cpu_add(drc_index); + + return rc; +} + int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { u32 count, drc_index; -- cgit v1.2.3 From 865a9432d16fe2f40a1a52005fd30778056c7921 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Mon, 28 Jan 2019 11:31:42 -0600 Subject: powerpc/mm: Add _PAGE_SAO to _PAGE_CACHE_CTL mask In htab_convert_pte_flags(), _PAGE_CACHE_CTL is used to check for the _PAGE_SAO flag: else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); But, it isn't defined to include that flag: #define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) This happens to work, but only because of the flag values: #define _PAGE_SAO 0x00010 /* Strong access order */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ To prevent any issues if these particulars ever change, add _PAGE_SAO to the mask. Suggested-by: Charles Johns Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 2e6ada28da64..1d97a2800cf8 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -811,7 +811,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, return hash__set_pte_at(mm, addr, ptep, pte, percpu); } -#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) +#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) -- cgit v1.2.3 From 26b523356f49a0117c8f9e32ca98aa6d6e496e1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 1 Feb 2019 10:46:52 +0000 Subject: powerpc: Drop page_is_ram() and walk_system_ram_range() Since commit c40dd2f76644 ("powerpc: Add System RAM to /proc/iomem") it is possible to use the generic walk_system_ram_range() and the generic page_is_ram(). To enable the use of walk_system_ram_range() by the IBM EHEA ethernet driver, we still need an export of the generic function. As powerpc was the only user of CONFIG_ARCH_HAS_WALK_MEMORY, the ifdef around the generic walk_system_ram_range() has become useless and can be dropped. Fixes: c40dd2f76644 ("powerpc: Add System RAM to /proc/iomem") Signed-off-by: Christophe Leroy [mpe: Keep the EXPORT_SYMBOL_GPL in powerpc code] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 3 --- arch/powerpc/include/asm/page.h | 1 - arch/powerpc/mm/mem.c | 39 ++++++--------------------------------- kernel/resource.c | 4 ---- 4 files changed, 6 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9c70c2864657..08908219fba9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -478,9 +478,6 @@ config ARCH_CPU_PROBE_RELEASE config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y -config ARCH_HAS_WALK_MEMORY - def_bool y - config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 5c5ea2413413..aa4497175bd3 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -326,7 +326,6 @@ struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); -extern int page_is_ram(unsigned long pfn); extern int devmem_is_allowed(unsigned long pfn); #ifdef CONFIG_PPC_SMLPAR diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 33cc6f676fa6..81f251fc4169 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -80,11 +80,6 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) #define TOP_ZONE ZONE_NORMAL #endif -int page_is_ram(unsigned long pfn) -{ - return memblock_is_memory(__pfn_to_phys(pfn)); -} - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -176,34 +171,6 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size, #endif #endif /* CONFIG_MEMORY_HOTPLUG */ -/* - * walk_memory_resource() needs to make sure there is no holes in a given - * memory range. PPC64 does not maintain the memory layout in /proc/iomem. - * Instead it maintains it in memblock.memory structures. Walk through the - * memory regions, find holes and callback for contiguous regions. - */ -int -walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, - void *arg, int (*func)(unsigned long, unsigned long, void *)) -{ - struct memblock_region *reg; - unsigned long end_pfn = start_pfn + nr_pages; - unsigned long tstart, tend; - int ret = -1; - - for_each_memblock(memory, reg) { - tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); - tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); - if (tstart >= tend) - continue; - ret = (*func)(tstart, tend - tstart, arg); - if (ret) - break; - } - return ret; -} -EXPORT_SYMBOL_GPL(walk_system_ram_range); - #ifndef CONFIG_NEED_MULTIPLE_NODES void __init mem_topology_setup(void) { @@ -585,3 +552,9 @@ int devmem_is_allowed(unsigned long pfn) return 0; } #endif /* CONFIG_STRICT_DEVMEM */ + +/* + * This is defined in kernel/resource.c but only powerpc needs to export it, for + * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed. + */ +EXPORT_SYMBOL_GPL(walk_system_ram_range); diff --git a/kernel/resource.c b/kernel/resource.c index 915c02e8e5dd..e81b17b53fa5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -448,8 +448,6 @@ int walk_mem_res(u64 start, u64 end, void *arg, arg, func); } -#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) - /* * This function calls the @func callback against all memory ranges of type * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. @@ -481,8 +479,6 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, return ret; } -#endif - static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) { return 1; -- cgit v1.2.3 From 188fdea69fa91dcd674a3d40f060a5891d4bc45a Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Thu, 29 Nov 2018 14:16:38 +1100 Subject: powerpc/eeh: remove sw_state from eeh_unfreeze_pe() eeh_unfreeze_pe() performs two operations: unfreezing a PE (which may cause firmware to unfreeze child PEs as well) and de-isolating the PE and it's children. To simplify this and support future work, separate out the de-isolation and perform it at the call sites (when necessary). There should be no change in behaviour. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 +- arch/powerpc/kernel/eeh.c | 18 ++++++++++-------- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/eeh_sysfs.c | 3 ++- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8b596d096ebe..2ff123f745cc 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -293,7 +293,7 @@ void eeh_add_device_late(struct pci_dev *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_add_sysfs_files(struct pci_bus *); void eeh_remove_device(struct pci_dev *); -int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state); +int eeh_unfreeze_pe(struct eeh_pe *pe); int eeh_pe_reset_and_recover(struct eeh_pe *pe); int eeh_dev_open(struct pci_dev *pdev); void eeh_dev_release(struct pci_dev *pdev); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ae05203eb4de..c56537d03017 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -823,7 +823,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - eeh_unfreeze_pe(pe, false); + eeh_unfreeze_pe(pe); if (!(pe->type & EEH_PE_VF)) eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); @@ -1309,7 +1309,7 @@ void eeh_remove_device(struct pci_dev *dev) edev->mode &= ~EEH_DEV_SYSFS; } -int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) +int eeh_unfreeze_pe(struct eeh_pe *pe) { int ret; @@ -1327,10 +1327,6 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) return ret; } - /* Clear software isolated state */ - if (sw_state && (pe->state & EEH_PE_ISOLATED)) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - return ret; } @@ -1382,7 +1378,10 @@ static int eeh_pe_change_owner(struct eeh_pe *pe) } } - return eeh_unfreeze_pe(pe, true); + ret = eeh_unfreeze_pe(pe); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + return ret; } /** @@ -1639,7 +1638,10 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) } /* The PE is still in frozen state */ - return eeh_unfreeze_pe(pe, true); + ret = eeh_unfreeze_pe(pe); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + return ret; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3456d9c2d4da..5303429ac0e3 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -598,7 +598,7 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *root) eeh_for_each_pe(root, pe) { for (i = 0; i < 3; i++) - if (!eeh_unfreeze_pe(pe, false)) + if (!eeh_unfreeze_pe(pe)) break; if (i >= 3) return -EIO; diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index deed906dd8f1..0731d2f01dd9 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -82,8 +82,9 @@ static ssize_t eeh_pe_state_store(struct device *dev, if (!(edev->pe->state & EEH_PE_ISOLATED)) return count; - if (eeh_unfreeze_pe(edev->pe, true)) + if (eeh_unfreeze_pe(edev->pe)) return -EIO; + eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); return count; } -- cgit v1.2.3 From 9ed5ca66aa66e5ce2e1d8758250a4d740052c8cd Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Thu, 29 Nov 2018 14:16:39 +1100 Subject: powerpc/eeh: Add include_passed to eeh_pe_state_clear() Add a parameter to eeh_pe_state_clear() that allows passed-through PEs to be excluded. Update callers to always pass true so that there is no change in behaviour. Also refactor to use direct traversal, to allow the removal of some boilerplate. This is to prepare for follow-up work for passed-through devices. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-pci.h | 2 +- arch/powerpc/kernel/eeh.c | 18 +++++----- arch/powerpc/kernel/eeh_driver.c | 20 +++++------ arch/powerpc/kernel/eeh_pe.c | 68 ++++++++++++++++---------------------- arch/powerpc/kernel/eeh_sysfs.c | 2 +- 5 files changed, 50 insertions(+), 60 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index f67da277d652..08e094eaeccf 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -59,7 +59,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_mark_isolated(struct eeh_pe *pe); -void eeh_pe_state_clear(struct eeh_pe *pe, int state); +void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed); void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state); void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c56537d03017..8d32587b07dc 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -825,13 +825,13 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); eeh_unfreeze_pe(pe); if (!(pe->type & EEH_PE_VF)) - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); break; case pcie_hot_reset: eeh_pe_mark_isolated(pe); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -840,7 +840,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat break; case pcie_warm_reset: eeh_pe_mark_isolated(pe); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -848,7 +848,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: - eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; }; @@ -936,7 +936,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe) __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } - eeh_pe_state_clear(pe, reset_state); + eeh_pe_state_clear(pe, reset_state, true); return ret; } @@ -1380,7 +1380,7 @@ static int eeh_pe_change_owner(struct eeh_pe *pe) ret = eeh_unfreeze_pe(pe); if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); return ret; } @@ -1640,7 +1640,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) /* The PE is still in frozen state */ ret = eeh_unfreeze_pe(pe); if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); return ret; } @@ -1668,7 +1668,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); if (ret) break; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5303429ac0e3..997aba0fe593 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -603,7 +603,7 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *root) if (i >= 3) return -EIO; } - eeh_pe_state_clear(root, EEH_PE_ISOLATED); + eeh_pe_state_clear(root, EEH_PE_ISOLATED, true); return 0; } @@ -624,14 +624,14 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) /* Issue reset */ ret = eeh_pe_reset_full(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); return ret; } /* Unfreeze the PE */ ret = eeh_clear_pe_frozen_state(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); return ret; } @@ -639,7 +639,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL); /* Clear recovery mode */ - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); return 0; } @@ -730,11 +730,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, eeh_add_virt_device(edev); } else { if (!driver_eeh_aware) - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); pci_hp_add_devices(bus); } } - eeh_pe_state_clear(pe, EEH_PE_KEEP); + eeh_pe_state_clear(pe, EEH_PE_KEEP, true); pe->tstamp = tstamp; pe->freeze_count = cnt; @@ -886,7 +886,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * is still in frozen state. Clear it before * resuming the PE. */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); result = PCI_ERS_RESULT_RECOVERED; } } @@ -963,7 +963,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); pci_lock_rescan_remove(); @@ -973,7 +973,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) return; } } - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); } /** @@ -1055,7 +1055,7 @@ void eeh_handle_special_event(void) continue; /* Notify all devices to be down */ - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_report( "error_detected(permanent failure)", pe, diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 6fa2032e0594..8b578891f27c 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -657,62 +657,52 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) } /** - * __eeh_pe_state_clear - Clear state for the PE + * eeh_pe_state_clear - Clear state for the PE * @data: EEH PE - * @flag: state + * @state: state + * @include_passed: include passed-through devices? * * The function is used to clear the indicated state from the * given PE. Besides, we also clear the check count of the PE * as well. */ -static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag) +void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed) { - int state = *((int *)flag); + struct eeh_pe *pe; struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* Keep the state of permanently removed PE intact */ - if (pe->state & EEH_PE_REMOVED) - return NULL; + eeh_for_each_pe(root, pe) { + /* Keep the state of permanently removed PE intact */ + if (pe->state & EEH_PE_REMOVED) + continue; - pe->state &= ~state; + if (!include_passed && eeh_pe_passed(pe)) + continue; - /* - * Special treatment on clearing isolated state. Clear - * check count since last isolation and put all affected - * devices to normal state. - */ - if (!(state & EEH_PE_ISOLATED)) - return NULL; + pe->state &= ~state; - pe->check_count = 0; - eeh_pe_for_each_dev(pe, edev, tmp) { - pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + /* + * Special treatment on clearing isolated state. Clear + * check count since last isolation and put all affected + * devices to normal state. + */ + if (!(state & EEH_PE_ISOLATED)) continue; - pdev->error_state = pci_channel_io_normal; - } - - /* Unblock PCI config access if required */ - if (pe->state & EEH_PE_CFG_RESTRICTED) - pe->state &= ~EEH_PE_CFG_BLOCKED; + pe->check_count = 0; + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; - return NULL; -} + pdev->error_state = pci_channel_io_normal; + } -/** - * eeh_pe_state_clear - Clear state for the PE and its children - * @pe: PE - * @state: state to be cleared - * - * When the PE and its children has been recovered from error, - * we need clear the error state for that. The function is used - * for the purpose. - */ -void eeh_pe_state_clear(struct eeh_pe *pe, int state) -{ - eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); + /* Unblock PCI config access if required */ + if (pe->state & EEH_PE_CFG_RESTRICTED) + pe->state &= ~EEH_PE_CFG_BLOCKED; + } } /* diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 0731d2f01dd9..3fa04dda1737 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -84,7 +84,7 @@ static ssize_t eeh_pe_state_store(struct device *dev, if (eeh_unfreeze_pe(edev->pe)) return -EIO; - eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED); + eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true); return count; } -- cgit v1.2.3 From 1ef52073fd25ea97090eaff2c8b528ebf401a12a Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Thu, 29 Nov 2018 14:16:41 +1100 Subject: powerpc/eeh: Improve recovery of passed-through devices Currently, the EEH recovery process considers passed-through devices as if they were not EEH-aware, which can cause them to be removed as part of recovery. Because device removal requires cooperation from the guest, this may lead to the process stalling or deadlocking. Also, if devices are removed on the host side, they will be removed from their IOMMU group, making recovery in the guest impossible. Therefore, alter the recovery process so that passed-through devices are not removed but are instead left frozen (and marked isolated) until the guest performs it's own recovery. If firmware thaws a passed-through PE because it's parent PE has been thawed (because it was not passed through), re-freeze it. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 +- arch/powerpc/include/asm/ppc-pci.h | 2 +- arch/powerpc/kernel/eeh.c | 47 ++++++++++++++++++++++++++++++-------- arch/powerpc/kernel/eeh_driver.c | 32 +++++++++++--------------- drivers/vfio/vfio_spapr_eeh.c | 6 ++--- 5 files changed, 55 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2ff123f745cc..0b655810f32d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -300,7 +300,7 @@ void eeh_dev_release(struct pci_dev *pdev); struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group); int eeh_pe_set_option(struct eeh_pe *pe, int option); int eeh_pe_get_state(struct eeh_pe *pe); -int eeh_pe_reset(struct eeh_pe *pe, int option); +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 08e094eaeccf..f191ef0d2a0a 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev); struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_pe_reset_full(struct eeh_pe *pe); +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed); void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8d32587b07dc..416d1ef49762 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -877,6 +877,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) return NULL; } +static void eeh_pe_refreeze_passed(struct eeh_pe *root) +{ + struct eeh_pe *pe; + int state; + + eeh_for_each_pe(root, pe) { + if (eeh_pe_passed(pe)) { + state = eeh_ops->get_state(pe, NULL); + if (state & + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { + pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", + pe->phb->global_number, pe->addr); + eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); + } + } + } +} + /** * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE @@ -889,7 +907,7 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) * * This function will attempt to reset a PE three times before failing. */ -int eeh_pe_reset_full(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) { int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); int type = EEH_RESET_HOT; @@ -911,11 +929,11 @@ int eeh_pe_reset_full(struct eeh_pe *pe) /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - ret = eeh_pe_reset(pe, type); + ret = eeh_pe_reset(pe, type, include_passed); if (ret) break; - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, include_passed); if (ret) break; @@ -936,6 +954,12 @@ int eeh_pe_reset_full(struct eeh_pe *pe) __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } + /* Resetting the PE may have unfrozen child PEs. If those PEs have been + * (potentially) passed through to a guest, re-freeze them: + */ + if (!include_passed) + eeh_pe_refreeze_passed(pe); + eeh_pe_state_clear(pe, reset_state, true); return ret; } @@ -1611,13 +1635,12 @@ int eeh_pe_get_state(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_get_state); -static int eeh_pe_reenable_devices(struct eeh_pe *pe) +static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) { struct eeh_dev *edev, *tmp; struct pci_dev *pdev; int ret = 0; - /* Restore config space */ eeh_pe_restore_bars(pe); /* @@ -1638,9 +1661,13 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) } /* The PE is still in frozen state */ - ret = eeh_unfreeze_pe(pe); + if (include_passed || !eeh_pe_passed(pe)) { + ret = eeh_unfreeze_pe(pe); + } else + pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", + pe->phb->global_number, pe->addr); if (!ret) - eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); return ret; } @@ -1654,7 +1681,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) * indicated type, either fundamental reset or hot reset. * PE reset is the most important part for error recovery. */ -int eeh_pe_reset(struct eeh_pe *pe, int option) +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) { int ret = 0; @@ -1668,11 +1695,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); if (ret) break; - ret = eeh_pe_reenable_devices(pe); + ret = eeh_pe_reenable_devices(pe, include_passed); break; case EEH_RESET_HOT: case EEH_RESET_FUNDAMENTAL: diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 91629b3f3b74..89623962c727 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) * support EEH. So we just care about PCI devices for * simplicity here. */ - if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) - return NULL; - - /* - * We rely on count-based pcibios_release_device() to - * detach permanently offlined PEs. Unfortunately, that's - * not reliable enough. We might have the permanently - * offlined PEs attached, but we needn't take care of - * them and their child devices. - */ - if (eeh_dev_removed(edev)) + if (!eeh_edev_actionable(edev) || + (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) return NULL; if (rmv_data) { - if (eeh_pe_passed(edev->pe)) - return NULL; driver = eeh_pcid_get(dev); if (driver) { if (driver->err_handler && @@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) } /* Remove it from PCI subsystem */ - pr_debug("EEH: Removing %s without EEH sensitive driver\n", - pci_name(dev)); + pr_info("EEH: Removing %s without EEH sensitive driver\n", + pci_name(dev)); edev->mode |= EEH_DEV_DISCONNECTED; if (rmv_data) rmv_data->removed_dev_count++; @@ -624,7 +613,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_pe_reset_full(pe); + ret = eeh_pe_reset_full(pe, true); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); return ret; @@ -664,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, time64_t tstamp; int cnt, rc; struct eeh_dev *edev; + struct eeh_pe *tmp_pe; + bool any_passed = false; + + eeh_for_each_pe(pe, tmp_pe) + any_passed |= eeh_pe_passed(tmp_pe); /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -676,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * into pci_hp_add_devices(). */ eeh_pe_state_mark(pe, EEH_PE_KEEP); - if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { + if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { pci_lock_rescan_remove(); @@ -693,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_pe_reset_full(pe); + rc = eeh_pe_reset_full(pe, false); if (rc) return rc; @@ -704,7 +698,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, eeh_pe_restore_bars(pe); /* Clear frozen state */ - rc = eeh_clear_pe_frozen_state(pe, true); + rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { pci_unlock_rescan_remove(); return rc; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 38edeb4729a9..1a742fe8f6db 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -74,13 +74,13 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, ret = eeh_pe_get_state(pe); break; case VFIO_EEH_PE_RESET_DEACTIVATE: - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); break; case VFIO_EEH_PE_RESET_HOT: - ret = eeh_pe_reset(pe, EEH_RESET_HOT); + ret = eeh_pe_reset(pe, EEH_RESET_HOT, true); break; case VFIO_EEH_PE_RESET_FUNDAMENTAL: - ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL); + ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); break; case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); -- cgit v1.2.3 From a20f507f577b04f286c88a4885ac528e69f6f308 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:04 +0100 Subject: powerpc/dma: untangle vio_dma_mapping_ops from dma_iommu_ops vio_dma_mapping_ops currently does a lot of indirect calls through dma_iommu_ops, which not only make the code harder to follow but are also expensive in the post-spectre world. Unwind the indirect calls by calling the ppc_iommu_* or iommu_* APIs directly applicable, or just use the dma_iommu_* methods directly where we can. Signed-off-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 87 +++++++++++++++--------------------- 3 files changed, 38 insertions(+), 52 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 17524d222a7b..bd069a6542ab 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -237,6 +237,7 @@ static inline void iommu_del_device(struct device *dev) } #endif /* !CONFIG_IOMMU_API */ +u64 dma_iommu_get_required_mask(struct device *dev); #else static inline void *get_iommu_table_base(struct device *dev) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 9c9bcaae2f75..dd8601cd20df 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -92,7 +92,7 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } -static u64 dma_iommu_get_required_mask(struct device *dev) +u64 dma_iommu_get_required_mask(struct device *dev) { struct iommu_table *tbl = get_iommu_table_base(dev); u64 mask; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 1fad4649735b..7870bf99168c 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -492,7 +492,9 @@ static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, return NULL; } - ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs); + ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, + dma_handle, dev->coherent_dma_mask, flag, + dev_to_node(dev)); if (unlikely(ret == NULL)) { vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); atomic_inc(&viodev->cmo.allocs_failed); @@ -507,8 +509,7 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, { struct vio_dev *viodev = to_vio_dev(dev); - dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs); - + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); } @@ -518,22 +519,22 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; + struct iommu_table *tbl = get_iommu_table_base(dev); dma_addr_t ret = DMA_MAPPING_ERROR; - tbl = get_iommu_table_base(dev); - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { - atomic_inc(&viodev->cmo.allocs_failed); - return ret; - } - - ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); - if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); - atomic_inc(&viodev->cmo.allocs_failed); - } - + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) + goto out_fail; + ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev), + direction, attrs); + if (unlikely(ret == DMA_MAPPING_ERROR)) + goto out_deallocate; return ret; + +out_deallocate: + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); +out_fail: + atomic_inc(&viodev->cmo.allocs_failed); + return DMA_MAPPING_ERROR; } static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, @@ -542,11 +543,9 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; - - tbl = get_iommu_table_base(dev); - dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); + struct iommu_table *tbl = get_iommu_table_base(dev); + iommu_unmap_page(tbl, dma_handle, size, direction, attrs); vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); } @@ -555,34 +554,32 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; + struct iommu_table *tbl = get_iommu_table_base(dev); struct scatterlist *sgl; int ret, count; size_t alloc_size = 0; - tbl = get_iommu_table_base(dev); for_each_sg(sglist, sgl, nelems, count) alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); - if (vio_cmo_alloc(viodev, alloc_size)) { - atomic_inc(&viodev->cmo.allocs_failed); - return 0; - } - - ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); - - if (unlikely(!ret)) { - vio_cmo_dealloc(viodev, alloc_size); - atomic_inc(&viodev->cmo.allocs_failed); - return ret; - } + if (vio_cmo_alloc(viodev, alloc_size)) + goto out_fail; + ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev), + direction, attrs); + if (unlikely(!ret)) + goto out_deallocate; for_each_sg(sglist, sgl, ret, count) alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); - return ret; + +out_deallocate: + vio_cmo_dealloc(viodev, alloc_size); +out_fail: + atomic_inc(&viodev->cmo.allocs_failed); + return 0; } static void vio_dma_iommu_unmap_sg(struct device *dev, @@ -591,30 +588,18 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, unsigned long attrs) { struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; + struct iommu_table *tbl = get_iommu_table_base(dev); struct scatterlist *sgl; size_t alloc_size = 0; int count; - tbl = get_iommu_table_base(dev); for_each_sg(sglist, sgl, nelems, count) alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); - dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); - + ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs); vio_cmo_dealloc(viodev, alloc_size); } -static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) -{ - return dma_iommu_ops.dma_supported(dev, mask); -} - -static u64 vio_dma_get_required_mask(struct device *dev) -{ - return dma_iommu_ops.get_required_mask(dev); -} - static const struct dma_map_ops vio_dma_mapping_ops = { .alloc = vio_dma_iommu_alloc_coherent, .free = vio_dma_iommu_free_coherent, @@ -623,8 +608,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, .unmap_page = vio_dma_iommu_unmap_page, - .dma_supported = vio_dma_iommu_dma_supported, - .get_required_mask = vio_dma_get_required_mask, + .dma_supported = dma_iommu_dma_supported, + .get_required_mask = dma_iommu_get_required_mask, }; /** -- cgit v1.2.3 From 8617a5c5bc001e52c40d6b2ece78e8f332039217 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:05 +0100 Subject: powerpc/dma: handle iommu bypass in dma_iommu_ops Add a new iommu_bypass flag to struct dev_archdata so that the dma_iommu implementation can handle the direct mapping transparently instead of switiching ops around. Setting of this flag is controlled by new pci_controller_ops method. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/device.h | 5 +++ arch/powerpc/include/asm/dma-mapping.h | 8 ++++ arch/powerpc/include/asm/pci-bridge.h | 2 + arch/powerpc/kernel/dma-iommu.c | 70 +++++++++++++++++++++++++++++++--- arch/powerpc/kernel/dma.c | 19 ++++----- 5 files changed, 87 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 0245bfcaac32..1aa53318b4bc 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -19,6 +19,11 @@ struct iommu_table; * drivers/macintosh/macio_asic.c */ struct dev_archdata { + /* + * Set to %true if the dma_iommu_ops are requested to use a direct + * window instead of dynamically mapping memory. + */ + bool iommu_bypass : 1; /* * These two used to be a union. However, with the hybrid ops we need * both so here we store both a DMA offset for direct mappings and diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index ebf66809f2d3..ff86b863eceb 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -29,6 +29,14 @@ extern int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs); +int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs); +dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +int dma_nommu_dma_supported(struct device *dev, u64 mask); +u64 dma_nommu_get_required_mask(struct device *dev); #ifdef CONFIG_NOT_COHERENT_CACHE /* diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index aee4fcc24990..d7492dca6599 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -19,6 +19,8 @@ struct device_node; struct pci_controller_ops { void (*dma_dev_setup)(struct pci_dev *pdev); void (*dma_bus_setup)(struct pci_bus *bus); + bool (*iommu_bypass_supported)(struct pci_dev *pdev, + u64 mask); int (*probe_mode)(struct pci_bus *bus); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index dd8601cd20df..fda92156b194 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -6,12 +6,30 @@ * busses using the iommu infrastructure */ +#include +#include #include /* * Generic iommu implementation */ +/* + * The coherent mask may be smaller than the real mask, check if we can + * really use a direct window. + */ +static inline bool dma_iommu_alloc_bypass(struct device *dev) +{ + return dev->archdata.iommu_bypass && + dma_nommu_dma_supported(dev, dev->coherent_dma_mask); +} + +static inline bool dma_iommu_map_bypass(struct device *dev, + unsigned long attrs) +{ + return dev->archdata.iommu_bypass; +} + /* Allocates a contiguous real buffer and creates mappings over it. * Returns the virtual address of the buffer and sets dma_handle * to the dma address (mapping) of the first page. @@ -20,6 +38,9 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { + if (dma_iommu_alloc_bypass(dev)) + return __dma_nommu_alloc_coherent(dev, size, dma_handle, flag, + attrs); return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, dev->coherent_dma_mask, flag, dev_to_node(dev)); @@ -29,7 +50,11 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); + if (dma_iommu_alloc_bypass(dev)) + __dma_nommu_free_coherent(dev, size, vaddr, dma_handle, attrs); + else + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, + dma_handle); } /* Creates TCEs for a user provided buffer. The user buffer must be @@ -42,6 +67,9 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction direction, unsigned long attrs) { + if (dma_iommu_map_bypass(dev, attrs)) + return dma_nommu_map_page(dev, page, offset, size, direction, + attrs); return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, size, device_to_mask(dev), direction, attrs); } @@ -51,8 +79,9 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs) { - iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, - attrs); + if (!dma_iommu_map_bypass(dev, attrs)) + iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, + direction, attrs); } @@ -60,6 +89,8 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, unsigned long attrs) { + if (dma_iommu_map_bypass(dev, attrs)) + return dma_nommu_map_sg(dev, sglist, nelems, direction, attrs); return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); } @@ -68,10 +99,20 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, unsigned long attrs) { - ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, + if (!dma_iommu_map_bypass(dev, attrs)) + ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); } +static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_controller *phb = pci_bus_to_host(pdev->bus); + + return phb->controller_ops.iommu_bypass_supported && + phb->controller_ops.iommu_bypass_supported(pdev, mask); +} + /* We support DMA to/from any memory page via the iommu */ int dma_iommu_dma_supported(struct device *dev, u64 mask) { @@ -83,22 +124,39 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } + if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { + dev->archdata.iommu_bypass = true; + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + return 1; + } + if (tbl->it_offset > (mask >> tbl->it_page_shift)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", mask, tbl->it_offset << tbl->it_page_shift); return 0; - } else - return 1; + } + + dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); + dev->archdata.iommu_bypass = false; + return 1; } u64 dma_iommu_get_required_mask(struct device *dev) { struct iommu_table *tbl = get_iommu_table_base(dev); u64 mask; + if (!tbl) return 0; + if (dev_is_pci(dev)) { + u64 bypass_mask = dma_nommu_get_required_mask(dev); + + if (dma_iommu_bypass_supported(dev, bypass_mask)) + return bypass_mask; + } + mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); mask += mask - 1; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1903ebb2e9c..e5db4d3f8bea 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -40,7 +40,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) return pfn; } -static int dma_nommu_dma_supported(struct device *dev, u64 mask) +int dma_nommu_dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PPC64 u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1); @@ -178,9 +178,9 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot); } -static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs) +int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) { struct scatterlist *sg; int i; @@ -209,7 +209,7 @@ static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -static u64 dma_nommu_get_required_mask(struct device *dev) +u64 dma_nommu_get_required_mask(struct device *dev) { u64 end, mask; @@ -221,12 +221,9 @@ static u64 dma_nommu_get_required_mask(struct device *dev) return mask; } -static inline dma_addr_t dma_nommu_map_page(struct device *dev, - struct page *page, - unsigned long offset, - size_t size, - enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) { if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) __dma_sync_page(page, offset, size, dir); -- cgit v1.2.3 From ba767b5283c06e1a2fcdd1835c33e42b8fccd09c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:09 +0100 Subject: powerpc/cell: use the generic iommu bypass code This gets rid of a lot of clumsy code and finally allows us to mark dma_iommu_ops const. Includes fixes from Michael Ellerman. Signed-off-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 2 +- arch/powerpc/include/asm/iommu.h | 6 ++ arch/powerpc/kernel/dma-iommu.c | 7 +- arch/powerpc/platforms/cell/iommu.c | 140 +++------------------------------ 4 files changed, 20 insertions(+), 135 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index ff86b863eceb..1d80174db8a4 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -74,7 +74,7 @@ static inline unsigned long device_to_mask(struct device *dev) * Available generic sets of operations */ #ifdef CONFIG_PPC64 -extern struct dma_map_ops dma_iommu_ops; +extern const struct dma_map_ops dma_iommu_ops; #endif extern const struct dma_map_ops dma_nommu_ops; diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index bd069a6542ab..6f00a892ebdf 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -319,5 +319,11 @@ extern void iommu_release_ownership(struct iommu_table *tbl); extern enum dma_data_direction iommu_tce_direction(unsigned long tce); extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir); +#ifdef CONFIG_PPC_CELL_NATIVE +extern bool iommu_fixed_is_weak; +#else +#define iommu_fixed_is_weak false +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index fda92156b194..5a0b5e863b08 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -20,14 +20,15 @@ */ static inline bool dma_iommu_alloc_bypass(struct device *dev) { - return dev->archdata.iommu_bypass && + return dev->archdata.iommu_bypass && !iommu_fixed_is_weak && dma_nommu_dma_supported(dev, dev->coherent_dma_mask); } static inline bool dma_iommu_map_bypass(struct device *dev, unsigned long attrs) { - return dev->archdata.iommu_bypass; + return dev->archdata.iommu_bypass && + (!iommu_fixed_is_weak || (attrs & DMA_ATTR_WEAK_ORDERING)); } /* Allocates a contiguous real buffer and creates mappings over it. @@ -163,7 +164,7 @@ u64 dma_iommu_get_required_mask(struct device *dev) return mask; } -struct dma_map_ops dma_iommu_ops = { +const struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, .mmap = dma_nommu_mmap_coherent, diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4c609c0db5af..6663cd3e6bb6 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -546,7 +546,7 @@ static unsigned long cell_dma_nommu_offset; static unsigned long dma_iommu_fixed_base; /* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ -static int iommu_fixed_is_weak; +bool iommu_fixed_is_weak; static struct iommu_table *cell_get_iommu_table(struct device *dev) { @@ -568,94 +568,6 @@ static struct iommu_table *cell_get_iommu_table(struct device *dev) return &window->table; } -/* A coherent allocation implies strong ordering */ - -static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - if (iommu_fixed_is_weak) - return iommu_alloc_coherent(dev, cell_get_iommu_table(dev), - size, dma_handle, - device_to_mask(dev), flag, - dev_to_node(dev)); - else - return dma_nommu_ops.alloc(dev, size, dma_handle, flag, - attrs); -} - -static void dma_fixed_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - if (iommu_fixed_is_weak) - iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr, - dma_handle); - else - dma_nommu_ops.free(dev, size, vaddr, dma_handle, attrs); -} - -static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - return dma_nommu_ops.map_page(dev, page, offset, size, - direction, attrs); - else - return iommu_map_page(dev, cell_get_iommu_table(dev), page, - offset, size, device_to_mask(dev), - direction, attrs); -} - -static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, - unsigned long attrs) -{ - if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - dma_nommu_ops.unmap_page(dev, dma_addr, size, direction, - attrs); - else - iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size, - direction, attrs); -} - -static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - return dma_nommu_ops.map_sg(dev, sg, nents, direction, attrs); - else - return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg, - nents, device_to_mask(dev), - direction, attrs); -} - -static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - dma_nommu_ops.unmap_sg(dev, sg, nents, direction, attrs); - else - ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, - direction, attrs); -} - -static int dma_suported_and_switch(struct device *dev, u64 dma_mask); - -static const struct dma_map_ops dma_iommu_fixed_ops = { - .alloc = dma_fixed_alloc_coherent, - .free = dma_fixed_free_coherent, - .map_sg = dma_fixed_map_sg, - .unmap_sg = dma_fixed_unmap_sg, - .dma_supported = dma_suported_and_switch, - .map_page = dma_fixed_map_page, - .unmap_page = dma_fixed_unmap_page, -}; - static u64 cell_iommu_get_fixed_address(struct device *dev); static void cell_dma_dev_setup(struct device *dev) @@ -956,22 +868,10 @@ out: return dev_addr; } -static int dma_suported_and_switch(struct device *dev, u64 dma_mask) +static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask) { - if (dma_mask == DMA_BIT_MASK(64) && - cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) { - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); - set_dma_ops(dev, &dma_iommu_fixed_ops); - return 1; - } - - if (dma_iommu_dma_supported(dev, dma_mask)) { - dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); - set_dma_ops(dev, &dma_iommu_ops); - return 1; - } - - return 0; + return mask == DMA_BIT_MASK(64) && + cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR; } static void insert_16M_pte(unsigned long addr, unsigned long *ptab, @@ -1125,9 +1025,8 @@ static int __init cell_iommu_fixed_mapping_init(void) cell_iommu_setup_window(iommu, np, dbase, dsize, 0); } - dma_iommu_ops.dma_supported = dma_suported_and_switch; - set_pci_dma_ops(&dma_iommu_ops); - + cell_pci_controller_ops.iommu_bypass_supported = + cell_pci_iommu_bypass_supported; return 0; } @@ -1148,7 +1047,7 @@ static int __init setup_iommu_fixed(char *str) pciep = of_find_node_by_type(NULL, "pcie-endpoint"); if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) - iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING; + iommu_fixed_is_weak = true; of_node_put(pciep); @@ -1156,26 +1055,6 @@ static int __init setup_iommu_fixed(char *str) } __setup("iommu_fixed=", setup_iommu_fixed); -static u64 cell_dma_get_required_mask(struct device *dev) -{ - const struct dma_map_ops *dma_ops; - - if (!dev->dma_mask) - return 0; - - if (!iommu_fixed_disabled && - cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) - return DMA_BIT_MASK(64); - - dma_ops = get_dma_ops(dev); - if (dma_ops->get_required_mask) - return dma_ops->get_required_mask(dev); - - WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops); - - return DMA_BIT_MASK(64); -} - static int __init cell_iommu_init(void) { struct device_node *np; @@ -1192,10 +1071,9 @@ static int __init cell_iommu_init(void) /* Setup various callbacks */ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; - ppc_md.dma_get_required_mask = cell_dma_get_required_mask; if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) - goto bail; + goto done; /* Create an iommu for each /axon node. */ for_each_node_by_name(np, "axon") { @@ -1212,7 +1090,7 @@ static int __init cell_iommu_init(void) continue; cell_iommu_init_one(np, SPIDER_DMA_OFFSET); } - + done: /* Setup default PCI iommu ops */ set_pci_dma_ops(&dma_iommu_ops); -- cgit v1.2.3 From ffe3dfd4e3598651a87651f3d59f144ee31f60fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:15 +0100 Subject: powerpc/dma: stop overriding dma_get_required_mask The ppc_md and pci_controller_ops methods are unused now and can be removed. The dma_nommu implementation is generic to the generic one except for using max_pfn instead of calling into the memblock API, and all other dma_map_ops instances implement a method of their own. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/device.h | 2 -- arch/powerpc/include/asm/dma-mapping.h | 2 -- arch/powerpc/include/asm/machdep.h | 2 -- arch/powerpc/include/asm/pci-bridge.h | 1 - arch/powerpc/kernel/dma.c | 29 ----------------------------- kernel/dma/mapping.c | 2 -- 6 files changed, 38 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 1aa53318b4bc..3814e1c2d4bc 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -59,6 +59,4 @@ struct pdev_archdata { u64 dma_mask; }; -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 1d80174db8a4..dc7f7bcdf65d 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -112,7 +112,5 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) #define HAVE_ARCH_DMA_SET_MASK 1 -extern u64 __dma_get_required_mask(struct device *dev); - #endif /* __KERNEL__ */ #endif /* _ASM_DMA_MAPPING_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 8311869005fa..7b70dcbce1b9 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -47,9 +47,7 @@ struct machdep_calls { #endif #endif /* CONFIG_PPC64 */ - /* Platform set_dma_mask and dma_get_required_mask overrides */ int (*dma_set_mask)(struct device *dev, u64 dma_mask); - u64 (*dma_get_required_mask)(struct device *dev); int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index d7492dca6599..236a7460b6ec 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -46,7 +46,6 @@ struct pci_controller_ops { #endif int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask); - u64 (*dma_get_required_mask)(struct pci_dev *pdev); void (*shutdown)(struct pci_controller *hose); }; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index e5db4d3f8bea..0d52107b90f0 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -318,35 +318,6 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); -u64 __dma_get_required_mask(struct device *dev) -{ - const struct dma_map_ops *dma_ops = get_dma_ops(dev); - - if (unlikely(dma_ops == NULL)) - return 0; - - if (dma_ops->get_required_mask) - return dma_ops->get_required_mask(dev); - - return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); -} - -u64 dma_get_required_mask(struct device *dev) -{ - if (ppc_md.dma_get_required_mask) - return ppc_md.dma_get_required_mask(dev); - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_controller *phb = pci_bus_to_host(pdev->bus); - if (phb->controller_ops.dma_get_required_mask) - return phb->controller_ops.dma_get_required_mask(pdev); - } - - return __dma_get_required_mask(dev); -} -EXPORT_SYMBOL_GPL(dma_get_required_mask); - static int __init dma_init(void) { #ifdef CONFIG_IBMVIO diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index a11006b6d8e8..40c0af744692 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -207,7 +207,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, } EXPORT_SYMBOL(dma_mmap_attrs); -#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK static u64 dma_default_get_required_mask(struct device *dev) { u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); @@ -238,7 +237,6 @@ u64 dma_get_required_mask(struct device *dev) return dma_default_get_required_mask(dev); } EXPORT_SYMBOL_GPL(dma_get_required_mask); -#endif #ifndef arch_dma_alloc_attrs #define arch_dma_alloc_attrs(dev) (true) -- cgit v1.2.3 From 662acad4067a2d2de8864c1231630945321aeef1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:16 +0100 Subject: powerpc/pci: remove the dma_set_mask pci_controller ops methods Unused now. Signed-off-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 -- arch/powerpc/kernel/dma.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 236a7460b6ec..98e8b46aff97 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -45,8 +45,6 @@ struct pci_controller_ops { void (*teardown_msi_irqs)(struct pci_dev *pdev); #endif - int (*dma_set_mask)(struct pci_dev *pdev, u64 dma_mask); - void (*shutdown)(struct pci_controller *hose); }; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 0d52107b90f0..5eca02315b2e 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -304,13 +304,6 @@ int dma_set_mask(struct device *dev, u64 dma_mask) if (ppc_md.dma_set_mask) return ppc_md.dma_set_mask(dev, dma_mask); - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_controller *phb = pci_bus_to_host(pdev->bus); - if (phb->controller_ops.dma_set_mask) - return phb->controller_ops.dma_set_mask(pdev, dma_mask); - } - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; *dev->dma_mask = dma_mask; -- cgit v1.2.3 From 7c1013b48778e203d4b17ea49ef0e450dd921664 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:18 +0100 Subject: powerpc/dma: remove get_pci_dma_ops This function is only used by the Cell iommu code, which can keep track if it is using the iommu internally just as good. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci.h | 2 -- arch/powerpc/kernel/pci-common.c | 6 ------ arch/powerpc/platforms/cell/iommu.c | 17 ++++++++--------- 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 0c72f1897063..6a1861a6301e 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -52,10 +52,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) #ifdef CONFIG_PCI extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); -extern const struct dma_map_ops *get_pci_dma_ops(void); #else /* CONFIG_PCI */ #define set_pci_dma_ops(d) -#define get_pci_dma_ops() NULL #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 88e4f69a09e5..a84707680525 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -69,12 +69,6 @@ void set_pci_dma_ops(const struct dma_map_ops *dma_ops) pci_dma_ops = dma_ops; } -const struct dma_map_ops *get_pci_dma_ops(void) -{ - return pci_dma_ops; -} -EXPORT_SYMBOL(get_pci_dma_ops); - /* * This function should run under locking protection, specifically * hose_spinlock. diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 6663cd3e6bb6..a3c4057a8f65 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -544,6 +544,7 @@ static struct cbe_iommu *cell_iommu_for_node(int nid) static unsigned long cell_dma_nommu_offset; static unsigned long dma_iommu_fixed_base; +static bool cell_iommu_enabled; /* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ bool iommu_fixed_is_weak; @@ -572,16 +573,14 @@ static u64 cell_iommu_get_fixed_address(struct device *dev); static void cell_dma_dev_setup(struct device *dev) { - if (get_pci_dma_ops() == &dma_iommu_ops) { + if (cell_iommu_enabled) { u64 addr = cell_iommu_get_fixed_address(dev); if (addr != OF_BAD_ADDR) set_dma_offset(dev, addr + dma_iommu_fixed_base); set_iommu_table_base(dev, cell_get_iommu_table(dev)); - } else if (get_pci_dma_ops() == &dma_nommu_ops) { - set_dma_offset(dev, cell_dma_nommu_offset); } else { - BUG(); + set_dma_offset(dev, cell_dma_nommu_offset); } } @@ -599,11 +598,11 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, if (action != BUS_NOTIFY_ADD_DEVICE) return 0; - /* We use the PCI DMA ops */ - dev->dma_ops = get_pci_dma_ops(); - + if (cell_iommu_enabled) + dev->dma_ops = &dma_iommu_ops; + else + dev->dma_ops = &dma_nommu_ops; cell_dma_dev_setup(dev); - return 0; } @@ -1093,7 +1092,7 @@ static int __init cell_iommu_init(void) done: /* Setup default PCI iommu ops */ set_pci_dma_ops(&dma_iommu_ops); - + cell_iommu_enabled = true; bail: /* Register callbacks on OF platform device addition/removal * to handle linking them to the right DMA operations -- cgit v1.2.3 From 391133fd5adaba319795cd96882d1ea405c41cf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:19 +0100 Subject: powerpc/dma: move pci_dma_dev_setup_swiotlb to fsl_pci.c pci_dma_dev_setup_swiotlb is only used by the fsl_pci code, and closely related to it, so fsl_pci.c seems like a better place for it. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/swiotlb.h | 2 -- arch/powerpc/kernel/dma-swiotlb.c | 11 ----------- arch/powerpc/sysdev/fsl_pci.c | 9 +++++++++ 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index f65ecf57b66c..26a0f12b835b 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -18,8 +18,6 @@ extern const struct dma_map_ops powerpc_swiotlb_dma_ops; extern unsigned int ppc_swiotlb_enable; int __init swiotlb_setup_bus_notifier(void); -extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev); - #ifdef CONFIG_SWIOTLB void swiotlb_detect_4g(void); #else diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 7d5fc9751622..42badc4bf536 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -62,17 +62,6 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .get_required_mask = swiotlb_powerpc_get_required, }; -void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) -{ - struct pci_controller *hose; - struct dev_archdata *sd; - - hose = pci_bus_to_host(pdev->bus); - sd = &pdev->dev.archdata; - sd->max_direct_dma_addr = - hose->dma_window_base_cur + hose->dma_window_size; -} - static int ppc_swiotlb_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 918be816b097..561f97d698cc 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -114,6 +114,15 @@ static struct pci_ops fsl_indirect_pcie_ops = static u64 pci64_dma_offset; #ifdef CONFIG_SWIOTLB +static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct dev_archdata *sd = &pdev->dev.archdata; + + sd->max_direct_dma_addr = + hose->dma_window_base_cur + hose->dma_window_size; +} + static void setup_swiotlb_ops(struct pci_controller *hose) { if (ppc_swiotlb_enable) { -- cgit v1.2.3 From 74194cdaac41f6dfaacd9433f739dcbd83125d0b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:20 +0100 Subject: powerpc/dma: remove max_direct_dma_addr The max_direct_dma_addr duplicates the bus_dma_mask field in struct device. Use the generic field instead. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/device.h | 3 --- arch/powerpc/include/asm/dma-direct.h | 4 +--- arch/powerpc/kernel/dma-swiotlb.c | 21 +-------------------- arch/powerpc/kernel/dma.c | 5 ++--- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- 5 files changed, 6 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 3814e1c2d4bc..a130be13ee83 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -38,9 +38,6 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu_domain; #endif -#ifdef CONFIG_SWIOTLB - dma_addr_t max_direct_dma_addr; -#endif #ifdef CONFIG_PPC64 struct pci_dn *pci_data; #endif diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index 7702875aabb7..e00ab5d0612d 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -5,9 +5,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { #ifdef CONFIG_SWIOTLB - struct dev_archdata *sd = &dev->archdata; - - if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr) + if (dev->bus_dma_mask && addr + size > dev->bus_dma_mask) return false; #endif diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 42badc4bf536..0e21c318300e 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -24,21 +24,6 @@ unsigned int ppc_swiotlb_enable; -static u64 swiotlb_powerpc_get_required(struct device *dev) -{ - u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr; - - end = memblock_end_of_DRAM(); - if (max_direct_dma_addr && end > max_direct_dma_addr) - end = max_direct_dma_addr; - end += get_dma_offset(dev); - - mask = 1ULL << (fls64(end) - 1); - mask += mask - 1; - - return mask; -} - /* * At the moment, all platforms that use this code only require * swiotlb to be used if we're operating on HIGHMEM. Since @@ -59,22 +44,18 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .sync_single_for_device = dma_direct_sync_single_for_device, .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, .sync_sg_for_device = dma_direct_sync_sg_for_device, - .get_required_mask = swiotlb_powerpc_get_required, + .get_required_mask = dma_direct_get_required_mask, }; static int ppc_swiotlb_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct dev_archdata *sd; /* We are only intereted in device addition */ if (action != BUS_NOTIFY_ADD_DEVICE) return 0; - sd = &dev->archdata; - sd->max_direct_dma_addr = 0; - /* May need to bounce if the device can't address all of DRAM */ if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) set_dma_ops(dev, &powerpc_swiotlb_dma_ops); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 9def69c8f602..1e191eb3f0ec 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -30,11 +30,10 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) { u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; - struct dev_archdata __maybe_unused *sd = &dev->archdata; #ifdef CONFIG_SWIOTLB - if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops) - pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); + if (dev->bus_dma_mask && dev->dma_ops == &powerpc_swiotlb_dma_ops) + pfn = min_t(u64, pfn, dev->bus_dma_mask >> PAGE_SHIFT); #endif return pfn; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 561f97d698cc..b710cee023a2 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -117,9 +117,8 @@ static u64 pci64_dma_offset; static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct dev_archdata *sd = &pdev->dev.archdata; - sd->max_direct_dma_addr = + pdev->dev.bus_dma_mask = hose->dma_window_base_cur + hose->dma_window_size; } @@ -144,6 +143,7 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) * mapping that allows addressing any RAM address from across PCI. */ if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { + dev->bus_dma_mask = 0; set_dma_ops(dev, &dma_nommu_ops); set_dma_offset(dev, pci64_dma_offset); } -- cgit v1.2.3 From 9b18114c0be0193ebe772e45b3731602f056d54e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:21 +0100 Subject: powerpc/dma: fix an off-by-one in dma_capable We need to compare the last byte in the dma range and not the one after it for the bus_dma_mask, just like we do for the regular dma_mask. Fix this cleanly by merging the two comparisms into one. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-direct.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index e00ab5d0612d..92d8aed86422 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -4,15 +4,11 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { -#ifdef CONFIG_SWIOTLB - if (dev->bus_dma_mask && addr + size > dev->bus_dma_mask) - return false; -#endif - if (!dev->dma_mask) return false; - return addr + size - 1 <= *dev->dma_mask; + return addr + size - 1 <= + min_not_zero(*dev->dma_mask, dev->bus_dma_mask); } static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) -- cgit v1.2.3 From 11ddce15451eb5e3cb2c951dc5c8d86a2802017a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:22 +0100 Subject: dma-mapping, powerpc: simplify the arch dma_set_mask override Instead of letting the architecture supply all of dma_set_mask just give it an additional hook selected by Kconfig. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/dma-mapping.h | 2 -- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/dma-mask.c | 12 ++++++++++++ arch/powerpc/kernel/dma.c | 12 ------------ arch/powerpc/sysdev/fsl_pci.c | 8 +------- kernel/dma/Kconfig | 3 +++ kernel/dma/mapping.c | 9 +++++++-- 9 files changed, 26 insertions(+), 24 deletions(-) create mode 100644 arch/powerpc/kernel/dma-mask.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b238c63a75cc..39d07c02f7d8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -886,6 +886,7 @@ config FSL_SOC config FSL_PCI bool + select ARCH_HAS_DMA_SET_MASK select PPC_INDIRECT_PCI select PCI_QUIRKS diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index dc7f7bcdf65d..16d45518d9bb 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -110,7 +110,5 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) dev->archdata.dma_offset = off; } -#define HAVE_ARCH_DMA_SET_MASK 1 - #endif /* __KERNEL__ */ #endif /* _ASM_DMA_MAPPING_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7b70dcbce1b9..2f0ca6560e47 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -47,7 +47,7 @@ struct machdep_calls { #endif #endif /* CONFIG_PPC64 */ - int (*dma_set_mask)(struct device *dev, u64 dma_mask); + void (*dma_set_mask)(struct device *dev, u64 dma_mask); int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index cb7f0bb9ee71..9bb12cd642ef 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -105,6 +105,7 @@ obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o +obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c new file mode 100644 index 000000000000..ffbbbc432612 --- /dev/null +++ b/arch/powerpc/kernel/dma-mask.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +void arch_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (ppc_md.dma_set_mask) + ppc_md.dma_set_mask(dev, dma_mask); +} +EXPORT_SYMBOL(arch_dma_set_mask); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 1e191eb3f0ec..e422ca65d1cf 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -234,18 +234,6 @@ const struct dma_map_ops dma_nommu_ops = { }; EXPORT_SYMBOL(dma_nommu_ops); -int dma_set_mask(struct device *dev, u64 dma_mask) -{ - if (ppc_md.dma_set_mask) - return ppc_md.dma_set_mask(dev, dma_mask); - - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - *dev->dma_mask = dma_mask; - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - static int __init dma_init(void) { #ifdef CONFIG_IBMVIO diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index b710cee023a2..0c6510f340cb 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -133,11 +133,8 @@ static void setup_swiotlb_ops(struct pci_controller *hose) static inline void setup_swiotlb_ops(struct pci_controller *hose) {} #endif -static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) +static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) { - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - /* * Fix up PCI devices that are able to DMA to the large inbound * mapping that allows addressing any RAM address from across PCI. @@ -147,9 +144,6 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) set_dma_ops(dev, &dma_nommu_ops); set_dma_offset(dev, pci64_dma_offset); } - - *dev->dma_mask = dma_mask; - return 0; } static int setup_one_atmu(struct ccsr_pci __iomem *pci, diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index ca88b867e7fe..0711d18645de 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -16,6 +16,9 @@ config ARCH_DMA_ADDR_T_64BIT config ARCH_HAS_DMA_COHERENCE_H bool +config ARCH_HAS_DMA_SET_MASK + bool + config HAVE_GENERIC_DMA_COHERENT bool diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 40c0af744692..ef2aba503467 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -316,18 +316,23 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -#ifndef HAVE_ARCH_DMA_SET_MASK +#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK +void arch_dma_set_mask(struct device *dev, u64 mask); +#else +#define arch_dma_set_mask(dev, mask) do { } while (0) +#endif + int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; + arch_dma_set_mask(dev, mask); dma_check_mask(dev, mask); *dev->dma_mask = mask; return 0; } EXPORT_SYMBOL(dma_set_mask); -#endif #ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) -- cgit v1.2.3 From 6666cc17d7802b7dcbb073e7be1eee2cf6fa64d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:24 +0100 Subject: powerpc/dma: remove dma_nommu_mmap_coherent The coherent cache version of this function already is functionally identicall to the default version, and by defining the arch_dma_coherent_to_pfn hook the same is ture for the noncoherent version as well. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 4 ---- arch/powerpc/kernel/dma-iommu.c | 1 - arch/powerpc/kernel/dma-swiotlb.c | 1 - arch/powerpc/kernel/dma.c | 19 ------------------- arch/powerpc/mm/dma-noncoherent.c | 7 +++++-- arch/powerpc/platforms/Kconfig.cputype | 1 + arch/powerpc/platforms/pseries/vio.c | 1 - 7 files changed, 6 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 16d45518d9bb..f19c486e7b3f 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -25,10 +25,6 @@ extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, extern void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); -extern int dma_nommu_mmap_coherent(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, - size_t size, unsigned long attrs); int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, unsigned long attrs); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 5a0b5e863b08..ed8b60829a90 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -167,7 +167,6 @@ u64 dma_iommu_get_required_mask(struct device *dev) const struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, - .mmap = dma_nommu_mmap_coherent, .map_sg = dma_iommu_map_sg, .unmap_sg = dma_iommu_unmap_sg, .dma_supported = dma_iommu_dma_supported, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 0e21c318300e..d5950a0cb758 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -34,7 +34,6 @@ unsigned int ppc_swiotlb_enable; const struct dma_map_ops powerpc_swiotlb_dma_ops = { .alloc = __dma_nommu_alloc_coherent, .free = __dma_nommu_free_coherent, - .mmap = dma_nommu_mmap_coherent, .map_sg = dma_direct_map_sg, .unmap_sg = dma_direct_unmap_sg, .dma_supported = swiotlb_dma_supported, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 10fa4e18b4e9..841c43355a7e 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -114,24 +114,6 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, } #endif /* !CONFIG_NOT_COHERENT_CACHE */ -int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ - unsigned long pfn; - -#ifdef CONFIG_NOT_COHERENT_CACHE - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); -#else - pfn = page_to_pfn(virt_to_page(cpu_addr)); -#endif - return remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, unsigned long attrs) @@ -218,7 +200,6 @@ static inline void dma_nommu_sync_single(struct device *dev, const struct dma_map_ops dma_nommu_ops = { .alloc = __dma_nommu_alloc_coherent, .free = __dma_nommu_free_coherent, - .mmap = dma_nommu_mmap_coherent, .map_sg = dma_nommu_map_sg, .unmap_sg = dma_nommu_unmap_sg, .dma_supported = dma_nommu_dma_supported, diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index e955539686a4..ee95da19c82d 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -400,14 +401,16 @@ EXPORT_SYMBOL(__dma_sync_page); /* * Return the PFN for a given cpu virtual address returned by - * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent() + * __dma_nommu_alloc_coherent. */ -unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr) +long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, + dma_addr_t dma_addr) { /* This should always be populated, so we don't test every * level. If that fails, we'll have a nice crash which * will be as good as a BUG_ON() */ + unsigned long cpu_addr = (unsigned long)vaddr; pgd_t *pgd = pgd_offset_k(cpu_addr); pud_t *pud = pud_offset(pgd, cpu_addr); pmd_t *pmd = pmd_offset(pud, cpu_addr); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 8c7464c3f27f..48cd5aa90ad2 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -402,6 +402,7 @@ config NOT_COHERENT_CACHE bool depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE + select ARCH_HAS_DMA_COHERENT_TO_PFN default n if PPC_47x default y diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 7870bf99168c..b7dc8bd41fd0 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -603,7 +603,6 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, static const struct dma_map_ops vio_dma_mapping_ops = { .alloc = vio_dma_iommu_alloc_coherent, .free = vio_dma_iommu_free_coherent, - .mmap = dma_nommu_mmap_coherent, .map_sg = vio_dma_iommu_map_sg, .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, -- cgit v1.2.3 From 5a47910d76f26e5fe6e9837872efdf8282ea76fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:25 +0100 Subject: powerpc/dma: remove dma_nommu_get_required_mask This function is identical to the generic dma_direct_get_required_mask, except that the generic version also takes the bus_dma_mask account, which could lead to incorrect results in the powerpc version. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 1 - arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/kernel/dma.c | 14 +------------- 3 files changed, 2 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index f19c486e7b3f..af9a32d4fcf8 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -32,7 +32,6 @@ dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); int dma_nommu_dma_supported(struct device *dev, u64 mask); -u64 dma_nommu_get_required_mask(struct device *dev); #ifdef CONFIG_NOT_COHERENT_CACHE /* diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index ed8b60829a90..4377b69a9d42 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -152,7 +152,7 @@ u64 dma_iommu_get_required_mask(struct device *dev) return 0; if (dev_is_pci(dev)) { - u64 bypass_mask = dma_nommu_get_required_mask(dev); + u64 bypass_mask = dma_direct_get_required_mask(dev); if (dma_iommu_bypass_supported(dev, bypass_mask)) return bypass_mask; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 841c43355a7e..46afc66cc271 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -145,18 +145,6 @@ static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -u64 dma_nommu_get_required_mask(struct device *dev) -{ - u64 end, mask; - - end = memblock_end_of_DRAM() + get_dma_offset(dev); - - mask = 1ULL << (fls64(end) - 1); - mask += mask - 1; - - return mask; -} - dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -205,7 +193,7 @@ const struct dma_map_ops dma_nommu_ops = { .dma_supported = dma_nommu_dma_supported, .map_page = dma_nommu_map_page, .unmap_page = dma_nommu_unmap_page, - .get_required_mask = dma_nommu_get_required_mask, + .get_required_mask = dma_direct_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE .sync_single_for_cpu = dma_nommu_sync_single, .sync_single_for_device = dma_nommu_sync_single, -- cgit v1.2.3 From 65a21b71f948406201e4f62e41f06513350ca390 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:26 +0100 Subject: powerpc/dma: remove dma_nommu_dma_supported This function is largely identical to the generic version used everywhere else. Replace it with the generic version. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 1 - arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/kernel/dma.c | 25 +------------------------ 3 files changed, 2 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index af9a32d4fcf8..cdf70aaeafeb 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -31,7 +31,6 @@ int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); -int dma_nommu_dma_supported(struct device *dev, u64 mask); #ifdef CONFIG_NOT_COHERENT_CACHE /* diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 4377b69a9d42..67fbfaa4e3b2 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -21,7 +21,7 @@ static inline bool dma_iommu_alloc_bypass(struct device *dev) { return dev->archdata.iommu_bypass && !iommu_fixed_is_weak && - dma_nommu_dma_supported(dev, dev->coherent_dma_mask); + dma_direct_supported(dev, dev->coherent_dma_mask); } static inline bool dma_iommu_map_bypass(struct device *dev, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 46afc66cc271..a3546a82f6d7 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -39,29 +39,6 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) return pfn; } -int dma_nommu_dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PPC64 - u64 limit = phys_to_dma(dev, (memblock_end_of_DRAM() - 1)); - - /* Limit fits in the mask, we are good */ - if (mask >= limit) - return 1; - -#ifdef CONFIG_FSL_SOC - /* - * Freescale gets another chance via ZONE_DMA, however - * that will have to be refined if/when they support iommus - */ - return 1; -#endif - /* Sorry ... */ - return 0; -#else - return 1; -#endif -} - #ifndef CONFIG_NOT_COHERENT_CACHE void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, @@ -190,7 +167,7 @@ const struct dma_map_ops dma_nommu_ops = { .free = __dma_nommu_free_coherent, .map_sg = dma_nommu_map_sg, .unmap_sg = dma_nommu_unmap_sg, - .dma_supported = dma_nommu_dma_supported, + .dma_supported = dma_direct_supported, .map_page = dma_nommu_map_page, .unmap_page = dma_nommu_unmap_page, .get_required_mask = dma_direct_get_required_mask, -- cgit v1.2.3 From 31f940afda6add7a7bb182adde97e615e5355c6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:28 +0100 Subject: powerpc/dma: use the dma-direct allocator for coherent platforms The generic code allows a few nice things such as node local allocations and dipping into the CMA area. The lookup of the right zone for a given dma mask works a little different, but the results should be the same. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable.h | 1 - arch/powerpc/kernel/dma-iommu.c | 5 ++- arch/powerpc/kernel/dma-swiotlb.c | 4 +-- arch/powerpc/kernel/dma.c | 69 +++----------------------------------- arch/powerpc/mm/mem.c | 22 ------------ 5 files changed, 9 insertions(+), 92 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index dad1d27e196d..505550fb2935 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -66,7 +66,6 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; -int dma_pfn_limit_to_zone(u64 pfn_limit); extern void paging_init(void); /* diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 67fbfaa4e3b2..c75ba4e3a50c 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -40,8 +40,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { if (dma_iommu_alloc_bypass(dev)) - return __dma_nommu_alloc_coherent(dev, size, dma_handle, flag, - attrs); + return dma_direct_alloc(dev, size, dma_handle, flag, attrs); return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, dma_handle, dev->coherent_dma_mask, flag, dev_to_node(dev)); @@ -52,7 +51,7 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size, unsigned long attrs) { if (dma_iommu_alloc_bypass(dev)) - __dma_nommu_free_coherent(dev, size, vaddr, dma_handle, attrs); + dma_direct_free(dev, size, vaddr, dma_handle, attrs); else iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 6d2677b2daa6..3a15a7d945e9 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -32,8 +32,8 @@ unsigned int ppc_swiotlb_enable; * for everything else. */ const struct dma_map_ops powerpc_swiotlb_dma_ops = { - .alloc = __dma_nommu_alloc_coherent, - .free = __dma_nommu_free_coherent, + .alloc = dma_direct_alloc, + .free = dma_direct_free, .map_sg = dma_direct_map_sg, .unmap_sg = dma_direct_unmap_sg, .dma_supported = dma_direct_supported, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index a3546a82f6d7..f983f8d435a6 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -27,70 +27,6 @@ * default the offset is PCI_DRAM_OFFSET. */ -static u64 __maybe_unused get_pfn_limit(struct device *dev) -{ - u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1; - -#ifdef CONFIG_SWIOTLB - if (dev->bus_dma_mask && dev->dma_ops == &powerpc_swiotlb_dma_ops) - pfn = min_t(u64, pfn, dev->bus_dma_mask >> PAGE_SHIFT); -#endif - - return pfn; -} - -#ifndef CONFIG_NOT_COHERENT_CACHE -void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - void *ret; - struct page *page; - int node = dev_to_node(dev); -#ifdef CONFIG_FSL_SOC - u64 pfn = get_pfn_limit(dev); - int zone; - - /* - * This code should be OK on other platforms, but we have drivers that - * don't set coherent_dma_mask. As a workaround we just ifdef it. This - * whole routine needs some serious cleanup. - */ - - zone = dma_pfn_limit_to_zone(pfn); - if (zone < 0) { - dev_err(dev, "%s: No suitable zone for pfn %#llx\n", - __func__, pfn); - return NULL; - } - - switch (zone) { -#ifdef CONFIG_ZONE_DMA - case ZONE_DMA: - flag |= GFP_DMA; - break; -#endif - }; -#endif /* CONFIG_FSL_SOC */ - - page = alloc_pages_node(node, flag, get_order(size)); - if (page == NULL) - return NULL; - ret = page_address(page); - memset(ret, 0, size); - *dma_handle = phys_to_dma(dev,__pa(ret)); - - return ret; -} - -void __dma_nommu_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} -#endif /* !CONFIG_NOT_COHERENT_CACHE */ - int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, unsigned long attrs) @@ -163,8 +99,13 @@ static inline void dma_nommu_sync_single(struct device *dev, #endif const struct dma_map_ops dma_nommu_ops = { +#ifdef CONFIG_NOT_COHERENT_CACHE .alloc = __dma_nommu_alloc_coherent, .free = __dma_nommu_free_coherent, +#else + .alloc = dma_direct_alloc, + .free = dma_direct_free, +#endif .map_sg = dma_nommu_map_sg, .unmap_sg = dma_nommu_unmap_sg, .dma_supported = dma_direct_supported, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 33cc6f676fa6..a10ee3645a6c 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -69,15 +69,12 @@ pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); pgprot_t kmap_prot; EXPORT_SYMBOL(kmap_prot); -#define TOP_ZONE ZONE_HIGHMEM static inline pte_t *virt_to_kpte(unsigned long vaddr) { return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), vaddr), vaddr); } -#else -#define TOP_ZONE ZONE_NORMAL #endif int page_is_ram(unsigned long pfn) @@ -261,25 +258,6 @@ static int __init mark_nonram_nosave(void) */ static unsigned long max_zone_pfns[MAX_NR_ZONES]; -/* - * Find the least restrictive zone that is entirely below the - * specified pfn limit. Returns < 0 if no suitable zone is found. - * - * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit - * systems -- the DMA limit can be higher than any possible real pfn. - */ -int dma_pfn_limit_to_zone(u64 pfn_limit) -{ - int i; - - for (i = TOP_ZONE; i >= 0; i--) { - if (max_zone_pfns[i] <= pfn_limit) - return i; - } - - return -EPERM; -} - /* * paging_init() sets up the page tables - in fact we've already done this. */ -- cgit v1.2.3 From 461db2bdbf3c978e76dd10a04a63fa06bb29114f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:29 +0100 Subject: powerpc/dma: use the dma_direct mapping routines Switch the streaming DMA mapping and ownership transfer methods to the functionally identical dma_direct_ versions. Factor the cache maintainance helpers into the form expected by the common code for that. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 30 ------------ arch/powerpc/kernel/dma-iommu.c | 4 +- arch/powerpc/kernel/dma.c | 87 ++++------------------------------ arch/powerpc/mm/dma-noncoherent.c | 29 ++++++++---- arch/powerpc/platforms/Kconfig.cputype | 2 + 5 files changed, 32 insertions(+), 120 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index cdf70aaeafeb..4de9d4ee23c1 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -25,36 +25,6 @@ extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, extern void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); -int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs); -dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs); - -#ifdef CONFIG_NOT_COHERENT_CACHE -/* - * DMA-consistent mapping functions for PowerPCs that don't support - * cache snooping. These allocate/free a region of uncached mapped - * memory space for use with DMA devices. Alternatively, you could - * allocate the space "normally" and use the cache management functions - * to ensure it is consistent. - */ -struct device; -extern void __dma_sync(void *vaddr, size_t size, int direction); -extern void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction); -extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr); - -#else /* ! CONFIG_NOT_COHERENT_CACHE */ -/* - * Cache coherent cores. - */ - -#define __dma_sync(addr, size, rw) ((void)0) -#define __dma_sync_page(pg, off, sz, rw) ((void)0) - -#endif /* ! CONFIG_NOT_COHERENT_CACHE */ static inline unsigned long device_to_mask(struct device *dev) { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index c75ba4e3a50c..09231ef06d01 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -68,7 +68,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, unsigned long attrs) { if (dma_iommu_map_bypass(dev, attrs)) - return dma_nommu_map_page(dev, page, offset, size, direction, + return dma_direct_map_page(dev, page, offset, size, direction, attrs); return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, size, device_to_mask(dev), direction, attrs); @@ -90,7 +90,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, unsigned long attrs) { if (dma_iommu_map_bypass(dev, attrs)) - return dma_nommu_map_sg(dev, sglist, nelems, direction, attrs); + return dma_direct_map_sg(dev, sglist, nelems, direction, attrs); return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, device_to_mask(dev), direction, attrs); } diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index f983f8d435a6..b9f7283e7224 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -27,77 +27,6 @@ * default the offset is PCI_DRAM_OFFSET. */ -int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = phys_to_dma(dev, sg_phys(sg)); - sg->dma_length = sg->length; - - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - continue; - - __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); - } - - return nents; -} - -static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); -} - -dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - __dma_sync_page(page, offset, size, dir); - - return phys_to_dma(dev, page_to_phys(page)) + offset; -} - -static inline void dma_nommu_unmap_page(struct device *dev, - dma_addr_t dma_address, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - __dma_sync(bus_to_virt(dma_address), size, direction); -} - -#ifdef CONFIG_NOT_COHERENT_CACHE -static inline void dma_nommu_sync_sg(struct device *dev, - struct scatterlist *sgl, int nents, - enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); -} - -static inline void dma_nommu_sync_single(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - __dma_sync(bus_to_virt(dma_handle), size, direction); -} -#endif - const struct dma_map_ops dma_nommu_ops = { #ifdef CONFIG_NOT_COHERENT_CACHE .alloc = __dma_nommu_alloc_coherent, @@ -106,17 +35,17 @@ const struct dma_map_ops dma_nommu_ops = { .alloc = dma_direct_alloc, .free = dma_direct_free, #endif - .map_sg = dma_nommu_map_sg, - .unmap_sg = dma_nommu_unmap_sg, + .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_supported, - .map_page = dma_nommu_map_page, - .unmap_page = dma_nommu_unmap_page, + .map_page = dma_direct_map_page, .get_required_mask = dma_direct_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE - .sync_single_for_cpu = dma_nommu_sync_single, - .sync_single_for_device = dma_nommu_sync_single, - .sync_sg_for_cpu = dma_nommu_sync_sg, - .sync_sg_for_device = dma_nommu_sync_sg, + .unmap_sg = dma_direct_unmap_sg, + .unmap_page = dma_direct_unmap_page, + .sync_single_for_cpu = dma_direct_sync_single_for_cpu, + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, + .sync_sg_for_device = dma_direct_sync_sg_for_device, #endif }; EXPORT_SYMBOL(dma_nommu_ops); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index ee95da19c82d..c3d15d718a58 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -314,7 +314,7 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, /* * make an area consistent. */ -void __dma_sync(void *vaddr, size_t size, int direction) +static void __dma_sync(void *vaddr, size_t size, int direction) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; @@ -340,7 +340,6 @@ void __dma_sync(void *vaddr, size_t size, int direction) break; } } -EXPORT_SYMBOL(__dma_sync); #ifdef CONFIG_HIGHMEM /* @@ -387,21 +386,33 @@ static inline void __dma_sync_page_highmem(struct page *page, * __dma_sync_page makes memory consistent. identical to __dma_sync, but * takes a struct page instead of a virtual address */ -void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) +static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir) { + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned offset = paddr & ~PAGE_MASK; + #ifdef CONFIG_HIGHMEM - __dma_sync_page_highmem(page, offset, size, direction); + __dma_sync_page_highmem(page, offset, size, dir); #else unsigned long start = (unsigned long)page_address(page) + offset; - __dma_sync((void *)start, size, direction); + __dma_sync((void *)start, size, dir); #endif } -EXPORT_SYMBOL(__dma_sync_page); + +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_sync_page(paddr, size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_sync_page(paddr, size, dir); +} /* - * Return the PFN for a given cpu virtual address returned by - * __dma_nommu_alloc_coherent. + * Return the PFN for a given cpu virtual address returned by arch_dma_alloc. */ long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, dma_addr_t dma_addr) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 48cd5aa90ad2..47db4934c1cf 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -403,6 +403,8 @@ config NOT_COHERENT_CACHE depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \ GAMECUBE_COMMON || AMIGAONE select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU default n if PPC_47x default y -- cgit v1.2.3 From 68005b67d15a1ee5b5ddff965175728e65fa73e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:30 +0100 Subject: powerpc/dma: use the generic direct mapping bypass Now that we've switched all the powerpc nommu and swiotlb methods to use the generic dma_direct_* calls we can remove these ops vectors entirely and rely on the common direct mapping bypass that avoids indirect function calls entirely. This also allows to remove a whole lot of boilerplate code related to setting up these operations. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 9 ---- arch/powerpc/include/asm/swiotlb.h | 3 -- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 58 ------------------------- arch/powerpc/kernel/dma.c | 62 --------------------------- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/setup-common.c | 1 - arch/powerpc/mm/dma-noncoherent.c | 6 +-- arch/powerpc/platforms/44x/warp.c | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 4 -- arch/powerpc/platforms/85xx/ge_imp3a.c | 2 - arch/powerpc/platforms/85xx/mpc8536_ds.c | 2 - arch/powerpc/platforms/85xx/mpc85xx_ds.c | 4 -- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 4 -- arch/powerpc/platforms/85xx/p1010rdb.c | 1 - arch/powerpc/platforms/85xx/p1022_ds.c | 2 - arch/powerpc/platforms/85xx/p1022_rdk.c | 2 - arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 1 - arch/powerpc/platforms/cell/iommu.c | 3 -- arch/powerpc/platforms/pasemi/iommu.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 51 ---------------------- arch/powerpc/platforms/powernv/npu-dma.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 7 +++ arch/powerpc/sysdev/fsl_pci.c | 5 +-- drivers/misc/cxl/vphb.c | 1 - 25 files changed, 16 insertions(+), 222 deletions(-) delete mode 100644 arch/powerpc/kernel/dma.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 4de9d4ee23c1..93e57e28be28 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -18,14 +18,6 @@ #include #include -/* Some dma direct funcs must be visible for use in other dma_ops */ -extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs); -extern void __dma_nommu_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs); - static inline unsigned long device_to_mask(struct device *dev) { if (dev->dma_mask && *dev->dma_mask) @@ -40,7 +32,6 @@ static inline unsigned long device_to_mask(struct device *dev) #ifdef CONFIG_PPC64 extern const struct dma_map_ops dma_iommu_ops; #endif -extern const struct dma_map_ops dma_nommu_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 26a0f12b835b..b7d082c0ec25 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -13,10 +13,7 @@ #include -extern const struct dma_map_ops powerpc_swiotlb_dma_ops; - extern unsigned int ppc_swiotlb_enable; -int __init swiotlb_setup_bus_notifier(void); #ifdef CONFIG_SWIOTLB void swiotlb_detect_4g(void); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9bb12cd642ef..8809e287b80d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -36,7 +36,7 @@ obj-y := cputable.o ptrace.o syscalls.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ - udbg.o misc.o io.o dma.o misc_$(BITS).o \ + udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 3a15a7d945e9..132d61c91629 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -10,70 +10,12 @@ * option) any later version. * */ - -#include #include -#include -#include -#include -#include - #include #include -#include unsigned int ppc_swiotlb_enable; -/* - * At the moment, all platforms that use this code only require - * swiotlb to be used if we're operating on HIGHMEM. Since - * we don't ever call anything other than map_sg, unmap_sg, - * map_page, and unmap_page on highmem, use normal dma_ops - * for everything else. - */ -const struct dma_map_ops powerpc_swiotlb_dma_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_sg = dma_direct_map_sg, - .unmap_sg = dma_direct_unmap_sg, - .dma_supported = dma_direct_supported, - .map_page = dma_direct_map_page, - .unmap_page = dma_direct_unmap_page, - .sync_single_for_cpu = dma_direct_sync_single_for_cpu, - .sync_single_for_device = dma_direct_sync_single_for_device, - .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, - .sync_sg_for_device = dma_direct_sync_sg_for_device, - .get_required_mask = dma_direct_get_required_mask, -}; - -static int ppc_swiotlb_bus_notify(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - /* We are only intereted in device addition */ - if (action != BUS_NOTIFY_ADD_DEVICE) - return 0; - - /* May need to bounce if the device can't address all of DRAM */ - if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) - set_dma_ops(dev, &powerpc_swiotlb_dma_ops); - - return NOTIFY_DONE; -} - -static struct notifier_block ppc_swiotlb_plat_bus_notifier = { - .notifier_call = ppc_swiotlb_bus_notify, - .priority = 0, -}; - -int __init swiotlb_setup_bus_notifier(void) -{ - bus_register_notifier(&platform_bus_type, - &ppc_swiotlb_plat_bus_notifier); - return 0; -} - void __init swiotlb_detect_4g(void) { if ((memblock_end_of_DRAM() - 1) > 0xffffffff) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c deleted file mode 100644 index b9f7283e7224..000000000000 --- a/arch/powerpc/kernel/dma.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation - * - * Provide default implementations of the DMA mapping callbacks for - * directly mapped busses. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Generic direct DMA implementation - * - * This implementation supports a per-device offset that can be applied if - * the address at which memory is visible to devices is not 0. Platform code - * can set archdata.dma_data to an unsigned long holding the offset. By - * default the offset is PCI_DRAM_OFFSET. - */ - -const struct dma_map_ops dma_nommu_ops = { -#ifdef CONFIG_NOT_COHERENT_CACHE - .alloc = __dma_nommu_alloc_coherent, - .free = __dma_nommu_free_coherent, -#else - .alloc = dma_direct_alloc, - .free = dma_direct_free, -#endif - .map_sg = dma_direct_map_sg, - .dma_supported = dma_direct_supported, - .map_page = dma_direct_map_page, - .get_required_mask = dma_direct_get_required_mask, -#ifdef CONFIG_NOT_COHERENT_CACHE - .unmap_sg = dma_direct_unmap_sg, - .unmap_page = dma_direct_unmap_page, - .sync_single_for_cpu = dma_direct_sync_single_for_cpu, - .sync_single_for_device = dma_direct_sync_single_for_device, - .sync_sg_for_cpu = dma_direct_sync_sg_for_cpu, - .sync_sg_for_device = dma_direct_sync_sg_for_device, -#endif -}; -EXPORT_SYMBOL(dma_nommu_ops); - -static int __init dma_init(void) -{ -#ifdef CONFIG_IBMVIO - dma_debug_add_bus(&vio_bus_type); -#endif - - return 0; -} -fs_initcall(dma_init); - diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a84707680525..23989175349c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -62,7 +62,7 @@ resource_size_t isa_mem_base; EXPORT_SYMBOL(isa_mem_base); -static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops; +static const struct dma_map_ops *pci_dma_ops; void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ca00fbb97cf8..fa606aa98f6d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -791,7 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) { pdev->archdata.dma_mask = DMA_BIT_MASK(32); pdev->dev.dma_mask = &pdev->archdata.dma_mask; - set_dma_ops(&pdev->dev, &dma_nommu_ops); } static __init void print_system_info(void) diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index c3d15d718a58..b5d2658c26af 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -152,8 +152,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { struct page *page; struct ppc_vm_region *c; @@ -254,7 +254,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, /* * free a page as defined by the above mapping. */ -void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { struct ppc_vm_region *c; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index f467247fd1c4..18422dbd061a 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -47,7 +47,7 @@ static int __init warp_probe(void) if (!of_machine_is_compatible("pika,warp")) return 0; - /* For __dma_nommu_alloc_coherent */ + /* For arch_dma_alloc */ ISA_DMA_THRESHOLD = ~0L; return 1; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index b0dac307bebf..808da1e9c0a7 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -223,7 +223,3 @@ define_machine(corenet_generic) { }; machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index f29c6f0909f3..c64fa2483ea9 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -202,8 +202,6 @@ static int __init ge_imp3a_probe(void) machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices); -machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier); - define_machine(ge_imp3a) { .name = "GE_IMP3A", .probe = ge_imp3a_probe, diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index 94a7f92c858f..94194bad4954 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -57,8 +57,6 @@ static void __init mpc8536_ds_setup_arch(void) machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices); -machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier); - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index dc9e035cc637..b7e29ce1f266 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -174,10 +174,6 @@ machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices); machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices); machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices); -machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier); -machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier); -machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier); - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index d7e440e6dba3..80939a425de5 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -367,10 +367,6 @@ machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices); machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices); machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices); -machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier); -machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier); -machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier); - static void __init mpc85xx_mds_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 78d13b364cd6..33ca373322e1 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -55,7 +55,6 @@ static void __init p1010_rdb_setup_arch(void) } machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices); -machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 9fb57f78cdbe..1f1af0557470 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -548,8 +548,6 @@ static void __init p1022_ds_setup_arch(void) machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices); -machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier); - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index 276e00ab3dde..fd9e3e7ef234 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -128,8 +128,6 @@ static void __init p1022_rdk_setup_arch(void) machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices); -machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier); - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 17c6cd3d02e6..775a92353c83 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -121,7 +121,6 @@ static int __init declare_of_platform_devices(void) return 0; } machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices); -machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier); define_machine(mpc86xx_hpcn) { .name = "MPC86xx HPCN", diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index a3c4057a8f65..06abd432b830 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -600,8 +600,6 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, if (cell_iommu_enabled) dev->dma_ops = &dma_iommu_ops; - else - dev->dma_ops = &dma_nommu_ops; cell_dma_dev_setup(dev); return 0; } @@ -727,7 +725,6 @@ static int __init cell_iommu_init_disabled(void) unsigned long base = 0, size; /* When no iommu is present, we use direct DMA ops */ - set_pci_dma_ops(&dma_nommu_ops); /* First make sure all IOC translation is turned off */ cell_disable_iommus(); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index f2971522fb4a..bbeb6a1b0393 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) */ if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { - dev->dev.dma_ops = &dma_nommu_ops; + dev->dev.dma_ops = NULL; /* * Set the coherent DMA mask to prevent the iommu * being used unnecessarily diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index c0532999f854..46dd463faaa7 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -411,55 +411,6 @@ out: return !!(srr1 & 0x2); } -#ifdef CONFIG_PCMCIA -static int pcmcia_notify(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct device *dev = data; - struct device *parent; - struct pcmcia_device *pdev = to_pcmcia_dev(dev); - - /* We are only intereted in device addition */ - if (action != BUS_NOTIFY_ADD_DEVICE) - return 0; - - parent = pdev->socket->dev.parent; - - /* We know electra_cf devices will always have of_node set, since - * electra_cf is an of_platform driver. - */ - if (!parent->of_node) - return 0; - - if (!of_device_is_compatible(parent->of_node, "electra-cf")) - return 0; - - /* We use the direct ops for localbus */ - dev->dma_ops = &dma_nommu_ops; - - return 0; -} - -static struct notifier_block pcmcia_notifier = { - .notifier_call = pcmcia_notify, -}; - -static inline void pasemi_pcmcia_init(void) -{ - extern struct bus_type pcmcia_bus_type; - - bus_register_notifier(&pcmcia_bus_type, &pcmcia_notifier); -} - -#else - -static inline void pasemi_pcmcia_init(void) -{ -} - -#endif - - static const struct of_device_id pasemi_bus_ids[] = { /* Unfortunately needed for legacy firmwares */ { .type = "localbus", }, @@ -472,8 +423,6 @@ static const struct of_device_id pasemi_bus_ids[] = { static int __init pasemi_publish_devices(void) { - pasemi_pcmcia_init(); - /* Publish OF platform devices for SDC and other non-PCI devices */ of_platform_bus_probe(NULL, pasemi_bus_ids, NULL); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index d7f742ed48ba..4e87e13fa0fc 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -220,7 +220,7 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) * their parent device so drivers shouldn't be doing DMA * operations directly on these devices. */ - set_dma_ops(&npe->pdev->dev, NULL); + set_dma_ops(&npe->pdev->dev, &dma_dummy_ops); } /* diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index b7dc8bd41fd0..141795275ccb 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1699,3 +1699,10 @@ int vio_disable_interrupts(struct vio_dev *dev) } EXPORT_SYMBOL(vio_disable_interrupts); #endif /* CONFIG_PPC_PSERIES */ + +static int __init vio_init(void) +{ + dma_debug_add_bus(&vio_bus_type); + return 0; +} +fs_initcall(vio_init); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 0c6510f340cb..23000ca7f688 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -124,10 +124,8 @@ static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) static void setup_swiotlb_ops(struct pci_controller *hose) { - if (ppc_swiotlb_enable) { + if (ppc_swiotlb_enable) hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb; - set_pci_dma_ops(&powerpc_swiotlb_dma_ops); - } } #else static inline void setup_swiotlb_ops(struct pci_controller *hose) {} @@ -141,7 +139,6 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) */ if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { dev->bus_dma_mask = 0; - set_dma_ops(dev, &dma_nommu_ops); set_dma_offset(dev, pci64_dma_offset); } } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 49da2f744bbf..b64adc0f0865 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -43,7 +43,6 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) return false; } - set_dma_ops(&dev->dev, &dma_nommu_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); /* -- cgit v1.2.3 From 7610fdf5e056ad5764d19f39db49b11608334610 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:31 +0100 Subject: powerpc/dma: remove get_dma_offset Just fold the calculation into __phys_to_dma/__dma_to_phys as those are the only places that should know about it. Signed-off-by: Christoph Hellwig Acked-by: Benjamin Herrenschmidt Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-direct.h | 8 ++++++-- arch/powerpc/include/asm/dma-mapping.h | 16 ---------------- 2 files changed, 6 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index 92d8aed86422..a2912b47102c 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -13,11 +13,15 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { - return paddr + get_dma_offset(dev); + if (!dev) + return paddr + PCI_DRAM_OFFSET; + return paddr + dev->archdata.dma_offset; } static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { - return daddr - get_dma_offset(dev); + if (!dev) + return daddr - PCI_DRAM_OFFSET; + return daddr - dev->archdata.dma_offset; } #endif /* ASM_POWERPC_DMA_DIRECT_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 93e57e28be28..c70f55d2f5e0 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -43,22 +43,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -/* - * get_dma_offset() - * - * Get the dma offset on configurations where the dma address can be determined - * from the physical address by looking at a simple offset. Direct dma and - * swiotlb use this function, but it is typically not used by implementations - * with an iommu. - */ -static inline dma_addr_t get_dma_offset(struct device *dev) -{ - if (dev) - return dev->archdata.dma_offset; - - return PCI_DRAM_OFFSET; -} - static inline void set_dma_offset(struct device *dev, dma_addr_t off) { if (dev) -- cgit v1.2.3 From 0617fc0ca412b535c0ab0e5e7b03180067f0f7fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:32 +0100 Subject: powerpc/dma: remove set_dma_offset There is no good reason for this helper, just opencode it. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 6 ------ arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/platforms/cell/iommu.c | 4 ++-- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- arch/powerpc/platforms/pseries/iommu.c | 7 ++----- arch/powerpc/sysdev/dart_iommu.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- drivers/misc/cxl/vphb.c | 2 +- 8 files changed, 11 insertions(+), 20 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index c70f55d2f5e0..a59c42879194 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -43,11 +43,5 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -static inline void set_dma_offset(struct device *dev, dma_addr_t off) -{ - if (dev) - dev->archdata.dma_offset = off; -} - #endif /* __KERNEL__ */ #endif /* _ASM_DMA_MAPPING_H */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 23989175349c..cbdf13d86227 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -966,7 +966,7 @@ static void pcibios_setup_device(struct pci_dev *dev) /* Hook up default DMA ops */ set_dma_ops(&dev->dev, pci_dma_ops); - set_dma_offset(&dev->dev, PCI_DRAM_OFFSET); + dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET; /* Additional platform DMA/iommu setup */ phb = pci_bus_to_host(dev->bus); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 06abd432b830..54e012e1f720 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -577,10 +577,10 @@ static void cell_dma_dev_setup(struct device *dev) u64 addr = cell_iommu_get_fixed_address(dev); if (addr != OF_BAD_ADDR) - set_dma_offset(dev, addr + dma_iommu_fixed_base); + dev->archdata.dma_offset = addr + dma_iommu_fixed_base; set_iommu_table_base(dev, cell_get_iommu_table(dev)); } else { - set_dma_offset(dev, cell_dma_nommu_offset); + dev->archdata.dma_offset = cell_dma_nommu_offset; } } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2de7fcf54c40..6438f38235e8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1746,7 +1746,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_dma_offset(&pdev->dev, pe->tce_bypass_base); + pdev->dev.archdata.dma_offset = pe->tce_bypass_base; set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); /* * Note: iommu_add_device() will fail here as @@ -1859,7 +1859,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, if (rc) return rc; /* 4GB offset bypasses 32-bit space */ - set_dma_offset(&pdev->dev, (1ULL << 32)); + pdev->dev.archdata.dma_offset = (1ULL << 32); return true; } @@ -1872,7 +1872,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); - set_dma_offset(&dev->dev, pe->tce_bypass_base); + dev->dev.archdata.dma_offset = pe->tce_bypass_base; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 37d2ce3f55a3..36eb1ddbac69 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1202,7 +1202,6 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) { struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; const __be32 *dma_window = NULL; - u64 dma_offset; /* only attempt to use a new window if 64-bit DMA is requested */ if (dma_mask < DMA_BIT_MASK(64)) @@ -1224,11 +1223,9 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) } if (pdn && PCI_DN(pdn)) { - dma_offset = enable_ddw(pdev, pdn); - if (dma_offset != 0) { - set_dma_offset(&pdev->dev, dma_offset); + pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn); + if (pdev->dev.archdata.dma_offset) return true; - } } return false; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index d42ba645d51d..809797dbe169 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -386,7 +386,7 @@ static bool dart_device_on_pcie(struct device *dev) static void pci_dma_dev_setup_dart(struct pci_dev *dev) { if (dart_is_u4 && dart_device_on_pcie(&dev->dev)) - set_dma_offset(&dev->dev, DART_U4_BYPASS_BASE); + dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE; set_iommu_table_base(&dev->dev, &iommu_table_dart); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 23000ca7f688..a04c6dde6ed0 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -139,7 +139,7 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) */ if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { dev->bus_dma_mask = 0; - set_dma_offset(dev, pci64_dma_offset); + dev->archdata.dma_offset = pci64_dma_offset; } } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index b64adc0f0865..631c5df246d4 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -43,7 +43,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) return false; } - set_dma_offset(&dev->dev, PAGE_OFFSET); + dev->dev.archdata.dma_offset = PAGE_OFFSET; /* * Allocate a context to do cxl things too. If we eventually do real -- cgit v1.2.3 From 4a605e2d1a69f5aea06da10d81e22802a90812a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Feb 2019 08:01:33 +0100 Subject: powerpc/dma: trim the fat from There is no need to provide anything but get_arch_dma_ops to . More the remaining declarations to and drop all the includes. Signed-off-by: Christoph Hellwig Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/dma-mapping.h | 29 --------------------------- arch/powerpc/include/asm/iommu.h | 10 +++++++++ arch/powerpc/platforms/44x/ppc476.c | 1 + arch/powerpc/platforms/85xx/corenet_generic.c | 1 + arch/powerpc/platforms/85xx/qemu_e500.c | 1 + arch/powerpc/sysdev/fsl_pci.c | 1 + 6 files changed, 14 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index a59c42879194..565d6f74b189 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -1,37 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2004 IBM - * - * Implements the generic device dma API for powerpc. - * the pci and vio busses */ #ifndef _ASM_DMA_MAPPING_H #define _ASM_DMA_MAPPING_H -#ifdef __KERNEL__ - -#include -#include -/* need struct page definitions */ -#include -#include -#include -#include -#include - -static inline unsigned long device_to_mask(struct device *dev) -{ - if (dev->dma_mask && *dev->dma_mask) - return *dev->dma_mask; - /* Assume devices without mask can take 32 bit addresses */ - return 0xfffffffful; -} - -/* - * Available generic sets of operations - */ -#ifdef CONFIG_PPC64 -extern const struct dma_map_ops dma_iommu_ops; -#endif static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { @@ -43,5 +15,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -#endif /* __KERNEL__ */ #endif /* _ASM_DMA_MAPPING_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 6f00a892ebdf..0ac52392ed99 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -325,5 +325,15 @@ extern bool iommu_fixed_is_weak; #define iommu_fixed_is_weak false #endif +extern const struct dma_map_ops dma_iommu_ops; + +static inline unsigned long device_to_mask(struct device *dev) +{ + if (dev->dma_mask && *dev->dma_mask) + return *dev->dma_mask; + /* Assume devices without mask can take 32 bit addresses */ + return 0xfffffffful; +} + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index e55933f9cd55..a5e61e5c16e2 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 808da1e9c0a7..785e9641220d 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 27631c607f3d..c52c8f9e8385 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include "smp.h" diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index a04c6dde6ed0..f49aec251a5a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 884dfb722db899e36d8c382783347aab57f96caa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 21 Feb 2019 13:38:49 +1100 Subject: KVM: PPC: Book3S HV: Simplify machine check handling This makes the handling of machine check interrupts that occur inside a guest simpler and more robust, with less done in assembler code and in real mode. Now, when a machine check occurs inside a guest, we always get the machine check event struct and put a copy in the vcpu struct for the vcpu where the machine check occurred. We no longer call machine_check_queue_event() from kvmppc_realmode_mc_power7(), because on POWER8, when a vcpu is running on an offline secondary thread and we call machine_check_queue_event(), that calls irq_work_queue(), which doesn't work because the CPU is offline, but instead triggers the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which fires again and again because nothing clears the condition). All that machine_check_queue_event() actually does is to cause the event to be printed to the console. For a machine check occurring in the guest, we now print the event in kvmppc_handle_exit_hv() instead. The assembly code at label machine_check_realmode now just calls C code and then continues exiting the guest. We no longer either synthesize a machine check for the guest in assembly code or return to the guest without a machine check. The code in kvmppc_handle_exit_hv() is extended to handle the case where the guest is not FWNMI-capable. In that case we now always synthesize a machine check interrupt for the guest. Previously, if the host thinks it has recovered the machine check fully, it would return to the guest without any notification that the machine check had occurred. If the machine check was caused by some action of the guest (such as creating duplicate SLB entries), it is much better to tell the guest that it has caused a problem. Therefore we now always generate a machine check interrupt for guests that are not FWNMI-capable. Reviewed-by: Aravinda Prasad Reviewed-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kvm_ppc.h | 3 +- arch/powerpc/kvm/book3s.c | 7 ++++ arch/powerpc/kvm/book3s_hv.c | 18 ++++++++-- arch/powerpc/kvm/book3s_hv_ras.c | 58 ++++++++------------------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++--------------------- 5 files changed, 42 insertions(+), 84 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index eb0d79f0ca45..a6c8548ed9fa 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -141,6 +141,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); @@ -632,7 +633,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, unsigned int yield_count); long kvmppc_h_random(struct kvm_vcpu *vcpu); void kvmhv_commence_exit(int trap); -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); void kvmppc_subcore_exit_guest(void); long kvmppc_realmode_hmi_handler(void); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index bd1a677dd9e4..9a7dadbe1f17 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -192,6 +192,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) } EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags); +} +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check); + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 105a3f78a760..53b202415395 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: + /* Print the MCE event to host console. */ + machine_check_print_event_info(&vcpu->arch.mce_evt, false); + + /* + * If the guest can do FWNMI, exit to userspace so it can + * deliver a FWNMI to the guest. + * Otherwise we synthesize a machine check for the guest + * so that it knows that the machine check occurred. + */ + if (!vcpu->kvm->arch.fwnmi_enabled) { + ulong flags = vcpu->arch.shregs.msr & 0x083c0000; + kvmppc_core_queue_machine_check(vcpu, flags); + r = RESUME_GUEST; + break; + } + /* Exit to guest with KVM_EXIT_NMI as exit reason */ run->exit_reason = KVM_EXIT_NMI; run->hw.hardware_exit_reason = vcpu->arch.trap; @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV; r = RESUME_HOST; - /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false); break; case BOOK3S_INTERRUPT_PROGRAM: { diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 0787f12c1a1b..8c24c3bea0bf 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -66,10 +66,8 @@ static void reload_slb(struct kvm_vcpu *vcpu) /* * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. - * - * Returns: 0 => exit guest, 1 => deliver machine check to guest */ -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; struct machine_check_event mce_evt; @@ -111,52 +109,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) } /* - * See if we have already handled the condition in the linux host. - * We assume that if the condition is recovered then linux host - * will have generated an error log event that we will pick - * up and log later. - * Don't release mce event now. We will queue up the event so that - * we can log the MCE event info on host console. + * Now get the event and stash it in the vcpu struct so it can + * be handled by the primary thread in virtual mode. We can't + * call machine_check_queue_event() here if we are running on + * an offline secondary thread. */ - if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) - goto out; - - if (mce_evt.version == MCE_V1 && - (mce_evt.severity == MCE_SEV_NO_ERROR || - mce_evt.disposition == MCE_DISPOSITION_RECOVERED)) - handled = 1; - -out: - /* - * For guest that supports FWNMI capability, hook the MCE event into - * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI - * exit reason. On our way to exit we will pull this event from vcpu - * structure and print it from thread 0 of the core/subcore. - * - * For guest that does not support FWNMI capability (old QEMU): - * We are now going enter guest either through machine check - * interrupt (for unhandled errors) or will continue from - * current HSRR0 (for handled errors) in guest. Hence - * queue up the event so that we can log it from host console later. - */ - if (vcpu->kvm->arch.fwnmi_enabled) { - /* - * Hook up the mce event on to vcpu structure. - * First clear the old event. - */ - memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt)); - if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { - vcpu->arch.mce_evt = mce_evt; - } - } else - machine_check_queue_event(); + if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { + if (handled && mce_evt.version == MCE_V1) + mce_evt.disposition = MCE_DISPOSITION_RECOVERED; + } else { + memset(&mce_evt, 0, sizeof(mce_evt)); + } - return handled; + vcpu->arch.mce_evt = mce_evt; } -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { - return kvmppc_realmode_mc_power7(vcpu); + kvmppc_realmode_mc_power7(vcpu); } /* Check if dynamic split is in force and return subcore size accordingly. */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9b8d50a7cbaf..f24f6a2f8eb5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2826,49 +2826,15 @@ kvm_cede_exit: #endif /* CONFIG_KVM_XICS */ 3: b guest_exit_cont - /* Try to handle a machine check in real mode */ + /* Try to do machine check recovery in real mode */ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop + /* all machine checks go to virtual mode for further handling */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - /* - * For the guest that is FWNMI capable, deliver all the MCE errors - * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit - * reason. This new approach injects machine check errors in guest - * address space to guest with additional information in the form - * of RTAS event, thus enabling guest kernel to suitably handle - * such errors. - * - * For the guest that is not FWNMI capable (old QEMU) fallback - * to old behaviour for backward compatibility: - * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either - * through machine check interrupt (set HSRR0 to 0x200). - * For handled errors (no-fatal), just go back to guest execution - * with current HSRR0. - * if we receive machine check with MSR(RI=0) then deliver it to - * guest as machine check causing guest to crash. - */ - ld r11, VCPU_MSR(r9) - rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ - bne guest_exit_cont /* if so, exit to host */ - /* Check if guest is capable of handling NMI exit */ - ld r10, VCPU_KVM(r9) - lbz r10, KVM_FWNMI(r10) - cmpdi r10, 1 /* FWNMI capable? */ - beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */ - - /* if not, fall through for backward compatibility. */ - andi. r10, r11, MSR_RI /* check for unrecoverable exception */ - beq 1f /* Deliver a machine check to guest */ - ld r10, VCPU_PC(r9) - cmpdi r3, 0 /* Did we handle MCE ? */ - bne 2f /* Continue guest execution. */ - /* If not, deliver a machine check. SRR0/1 are already set */ -1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - bl kvmppc_msr_interrupt -2: b fast_interrupt_c_return + b guest_exit_cont /* * Call C code to handle a HMI in real mode. -- cgit v1.2.3 From c05772018491e5294f55d63b239ab0d532e96616 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 21 Feb 2019 13:40:20 +1100 Subject: powerpc/64s: Better printing of machine check info for guest MCEs This adds an "in_guest" parameter to machine_check_print_event_info() so that we can avoid trying to translate guest NIP values into symbolic form using the host kernel's symbol table. Reviewed-by: Aravinda Prasad Reviewed-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 2 +- arch/powerpc/kernel/mce.c | 8 +++++--- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/platforms/powernv/opal.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index a8b8903e1844..17996bc9382b 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -209,7 +209,7 @@ extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, - bool user_mode); + bool user_mode, bool in_guest); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index bd933a75f0bc..d501b48f287e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -301,13 +301,13 @@ static void machine_check_process_queued_event(struct irq_work *work) while (__this_cpu_read(mce_queue_count) > 0) { index = __this_cpu_read(mce_queue_count) - 1; evt = this_cpu_ptr(&mce_event_queue[index]); - machine_check_print_event_info(evt, false); + machine_check_print_event_info(evt, false, false); __this_cpu_dec(mce_queue_count); } } void machine_check_print_event_info(struct machine_check_event *evt, - bool user_mode) + bool user_mode, bool in_guest) { const char *level, *sevstr, *subtype; static const char *mc_ue_types[] = { @@ -387,7 +387,9 @@ void machine_check_print_event_info(struct machine_check_event *evt, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); - if (user_mode) { + if (in_guest) { + printk("%s Guest NIP: %016llx\n", level, evt->srr0); + } else if (user_mode) { printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, evt->srr0, current->pid, current->comm); } else { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 53b202415395..a3d5318f5d1e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1216,7 +1216,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_MACHINE_CHECK: /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false); + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); /* * If the guest can do FWNMI, exit to userspace so it can @@ -1406,7 +1406,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Pass the machine check to the L1 guest */ r = RESUME_HOST; /* Print the MCE event to host console. */ - machine_check_print_event_info(&vcpu->arch.mce_evt, false); + machine_check_print_event_info(&vcpu->arch.mce_evt, false, true); break; /* * We get these next two if the guest accesses a page which it thinks diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 79586f127521..05c85be0370f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -587,7 +587,7 @@ int opal_machine_check(struct pt_regs *regs) evt.version); return 0; } - machine_check_print_event_info(&evt, user_mode(regs)); + machine_check_print_event_info(&evt, user_mode(regs), false); if (opal_recover_mce(regs, &evt)) return 1; -- cgit v1.2.3 From 3d8810e02b7f811be3bc9ad2f433be4e245e8267 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Feb 2019 16:45:09 +0530 Subject: powerpc/mm/hash: Increase vmalloc space to 512T with hash MMU This patch updates the kernel non-linear virtual map to 512TB when we're built with 64K page size and are using the hash MMU. We allocate one context for the vmalloc region and hence the max virtual area size is limited by the context map size (512TB for 64K and 64TB for 4K page size). This patch fixes boot failures with large amounts of system RAM where we need large vmalloc space to handle per cpu allocations. Signed-off-by: Michael Ellerman Signed-off-by: Aneesh Kumar K.V Tested-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/book3s/64/hash.h | 32 ++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 247aff9cc6ba..54b7af6cd27f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,22 +40,36 @@ #else #define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE) #endif + /* - * Define the address range of the kernel non-linear virtual area + * Define the address range of the kernel non-linear virtual area. In contrast + * to the linear mapping, this is managed using the kernel page tables and then + * inserted into the hash page table to actually take effect, similarly to user + * mappings. */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */ /* - * The vmalloc space starts at the beginning of that region, and - * occupies half of it on hash CPUs and a quarter of it on Book3E - * (we keep a quarter for the virtual memmap) + * Allow virtual mapping of one context size. + * 512TB for 64K page size + * 64TB for 4K page size + */ +#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) + +/* + * 8TB IO mapping size + */ +#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */ + +/* + * The vmalloc space starts at the beginning of the kernel non-linear virtual + * region, and occupies 504T (64K) or 56T (4K) */ -#define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */ -#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_START H_VMALLOC_END /* * Region IDs -- cgit v1.2.3 From d065ee93aab6ef4c2a5af5c455b5044bd5136547 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Feb 2019 10:32:02 +0000 Subject: powerpc: drop unused GENERIC_CSUM Kconfig item Commit d4fde568a34a ("powerpc/64: Use optimized checksum routines on little-endian") converted last powerpc user of GENERIC_CSUM. This patch does a final cleanup dropping the Kconfig GENERIC_CSUM option which is always 'n', and associated piece of code in asm/checksum.h Fixes: d4fde568a34a ("powerpc/64: Use optimized checksum routines on little-endian") Reported-by: Christoph Hellwig Signed-off-by: Christophe Leroy Reviewed-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 3 --- arch/powerpc/include/asm/checksum.h | 4 ---- 2 files changed, 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8af6a7d93148..3db58fcfb0b2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -247,9 +247,6 @@ config PPC_BARRIER_NOSPEC default y depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E -config GENERIC_CSUM - def_bool n - config EARLY_PRINTK bool default y diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index a78a57e5058d..72a65d744a28 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -9,9 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#ifdef CONFIG_GENERIC_CSUM -#include -#else #include #include /* @@ -217,6 +214,5 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum sum); -#endif #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 46ee7c3c5212b0f4f8713d60cfd595721efdf0d3 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 15 Feb 2019 11:48:11 +1100 Subject: powerpc/eeh: Use debugfs_create_u32 for eeh_max_freezes There's no need to the custom getter/setter functions so we should remove them in favour of using the generic one. While we're here, change the type of eeh_max_freeze to u32 and print the value in decimal rather than hex because printing it in hex makes no sense. Signed-off-by: Oliver O'Halloran Reviewed-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 +- arch/powerpc/kernel/eeh.c | 21 +++------------------ 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 0b655810f32d..f3b3c3537792 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -219,7 +219,7 @@ struct eeh_ops { }; extern int eeh_subsystem_flags; -extern int eeh_max_freezes; +extern u32 eeh_max_freezes; extern struct eeh_ops *eeh_ops; extern raw_spinlock_t confirm_error_lock; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 98d8755ac4c8..15e2734b4854 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -109,7 +109,7 @@ EXPORT_SYMBOL(eeh_subsystem_flags); * frozen count in last hour exceeds this limit, the PE will * be forced to be offline permanently. */ -int eeh_max_freezes = 5; +u32 eeh_max_freezes = 5; /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; @@ -1829,22 +1829,8 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val) return 0; } -static int eeh_freeze_dbgfs_set(void *data, u64 val) -{ - eeh_max_freezes = val; - return 0; -} - -static int eeh_freeze_dbgfs_get(void *data, u64 *val) -{ - *val = eeh_max_freezes; - return 0; -} - DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, eeh_enable_dbgfs_set, "0x%llx\n"); -DEFINE_DEBUGFS_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, - eeh_freeze_dbgfs_set, "0x%llx\n"); #endif static int __init eeh_init_proc(void) @@ -1855,9 +1841,8 @@ static int __init eeh_init_proc(void) debugfs_create_file_unsafe("eeh_enable", 0600, powerpc_debugfs_root, NULL, &eeh_enable_dbgfs_ops); - debugfs_create_file_unsafe("eeh_max_freezes", 0600, - powerpc_debugfs_root, NULL, - &eeh_freeze_dbgfs_ops); + debugfs_create_u32("eeh_max_freezes", 0600, + powerpc_debugfs_root, &eeh_max_freezes); #endif } -- cgit v1.2.3 From 5ca85ae6318df34874999e3fd1760a88208e2a8e Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 15 Feb 2019 11:48:13 +1100 Subject: powerpc/eeh_cache: Add a way to dump the EEH address cache Adds a debugfs file that can be read to view the contents of the EEH address cache. This is pretty similar to the existing eeh_addr_cache_print() function, but that function is intended to debug issues inside of the kernel since it's #ifdef`ed out by default, and writes into the kernel log. Signed-off-by: Oliver O'Halloran Reviewed-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 3 +++ arch/powerpc/kernel/eeh.c | 1 + arch/powerpc/kernel/eeh_cache.c | 30 ++++++++++++++++++++++++++---- 3 files changed, 30 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f3b3c3537792..e42d643a20ac 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -460,6 +460,9 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, eeh_check_failure(addr); } + +void eeh_cache_debugfs_init(void); + #endif /* CONFIG_PPC64 */ #endif /* __KERNEL__ */ #endif /* _POWERPC_EEH_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 15e2734b4854..8d36c50e906f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1843,6 +1843,7 @@ static int __init eeh_init_proc(void) &eeh_enable_dbgfs_ops); debugfs_create_u32("eeh_max_freezes", 0600, powerpc_debugfs_root, &eeh_max_freezes); + eeh_cache_debugfs_init(); #endif } diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index b2c320e0fcef..5c5697cced41 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -298,9 +299,30 @@ void eeh_addr_cache_build(void) eeh_addr_cache_insert_dev(dev); eeh_sysfs_add_device(dev); } +} -#ifdef DEBUG - /* Verify tree built up above, echo back the list of addrs. */ - eeh_addr_cache_print(&pci_io_addr_cache_root); -#endif +static int eeh_addr_cache_show(struct seq_file *s, void *v) +{ + struct pci_io_addr_range *piar; + struct rb_node *n; + + spin_lock(&pci_io_addr_cache_root.piar_lock); + for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { + piar = rb_entry(n, struct pci_io_addr_range, rb_node); + + seq_printf(s, "%s addr range [%pap-%pap]: %s\n", + (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", + &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); + } + spin_unlock(&pci_io_addr_cache_root.piar_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); + +void eeh_cache_debugfs_init(void) +{ + debugfs_create_file_unsafe("eeh_address_cache", 0400, + powerpc_debugfs_root, NULL, + &eeh_addr_cache_fops); } -- cgit v1.2.3 From 67060cb1ffa474c4fa1ae4db865ac1c7ed1fa899 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 15 Feb 2019 11:48:15 +1100 Subject: powerpc/pci: Add pci_find_controller_for_domain() Add a helper to find the pci_controller structure based on the domain number / phb id. Signed-off-by: Oliver O'Halloran Reviewed-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci-common.c | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 98e8b46aff97..6c0039f3a3a6 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -273,6 +273,8 @@ extern int pcibios_map_io_space(struct pci_bus *bus); extern struct pci_controller *pci_find_hose_for_OF_device( struct device_node* node); +extern struct pci_controller *pci_find_controller_for_domain(int domain_nr); + /* Fill up host controller resources from the OF node */ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, struct device_node *dev, int primary); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cbdf13d86227..60f20c2e559a 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -351,6 +351,17 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) return NULL; } +struct pci_controller *pci_find_controller_for_domain(int domain_nr) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) + if (hose->global_number == domain_nr) + return hose; + + return NULL; +} + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the -- cgit v1.2.3 From 6b493f6079a430fd41f66933b68d1bb1ad37ca8c Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 15 Feb 2019 11:48:16 +1100 Subject: powerpc/eeh: Allow disabling recovery Currently when we detect an error we automatically invoke the EEH recovery handler. This can be annoying when debugging EEH problems, or when working on EEH itself so this patch adds a debugfs knob that will prevent a recovery event from being queued up when an issue is detected. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 10 ++++++++++ arch/powerpc/kernel/eeh_event.c | 9 +++++++++ 3 files changed, 20 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index e42d643a20ac..94cfcf33030a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -220,6 +220,7 @@ struct eeh_ops { extern int eeh_subsystem_flags; extern u32 eeh_max_freezes; +extern bool eeh_debugfs_no_recover; extern struct eeh_ops *eeh_ops; extern raw_spinlock_t confirm_error_lock; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8d36c50e906f..0996f22b2612 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -111,6 +111,13 @@ EXPORT_SYMBOL(eeh_subsystem_flags); */ u32 eeh_max_freezes = 5; +/* + * Controls whether a recovery event should be scheduled when an + * isolated device is discovered. This is only really useful for + * debugging problems with the EEH core. + */ +bool eeh_debugfs_no_recover; + /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; @@ -1843,6 +1850,9 @@ static int __init eeh_init_proc(void) &eeh_enable_dbgfs_ops); debugfs_create_u32("eeh_max_freezes", 0600, powerpc_debugfs_root, &eeh_max_freezes); + debugfs_create_bool("eeh_disable_recovery", 0600, + powerpc_debugfs_root, + &eeh_debugfs_no_recover); eeh_cache_debugfs_init(); #endif } diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 227e57f980df..19837798bb1d 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -126,6 +126,15 @@ int eeh_send_failure_event(struct eeh_pe *pe) unsigned long flags; struct eeh_event *event; + /* + * If we've manually supressed recovery events via debugfs + * then just drop it on the floor. + */ + if (eeh_debugfs_no_recover) { + pr_err("EEH: Event dropped due to no_recover setting\n"); + return 0; + } + event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) { pr_err("EEH: out of memory, event not handled\n"); -- cgit v1.2.3 From 954bd99435b8ba99e86665d6a2ec1baa1d128325 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 15 Feb 2019 11:48:17 +1100 Subject: powerpc/eeh: Add eeh_force_recover to debugfs This patch adds a debugfs interface to force scheduling a recovery event. This can be used to recover a specific PE or schedule a "special" recovery even that checks for errors at the PHB level. To force a recovery of a normal PE, use: echo '<#pe>:<#phb>' > /sys/kernel/debug/powerpc/eeh_force_recover To force a scan for broken PHBs: echo 'hwcheck' > /sys/kernel/debug/powerpc/eeh_force_recover Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh_event.h | 1 + arch/powerpc/kernel/eeh.c | 59 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/eeh_event.c | 25 +++++++++------ 3 files changed, 75 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 9884e872686f..6d0412b846ac 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -33,6 +33,7 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); +int __eeh_send_failure_event(struct eeh_pe *pe); void eeh_remove_event(struct eeh_pe *pe, bool force); void eeh_handle_normal_event(struct eeh_pe *pe); void eeh_handle_special_event(void); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 0996f22b2612..289c0b37d845 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1838,6 +1838,62 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, eeh_enable_dbgfs_set, "0x%llx\n"); + +static ssize_t eeh_force_recover_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose; + uint32_t phbid, pe_no; + struct eeh_pe *pe; + char buf[20]; + int ret; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* + * When PE is NULL the event is a "special" event. Rather than + * recovering a specific PE it forces the EEH core to scan for failed + * PHBs and recovers each. This needs to be done before any device + * recoveries can occur. + */ + if (!strncmp(buf, "hwcheck", 7)) { + __eeh_send_failure_event(NULL); + return count; + } + + ret = sscanf(buf, "%x:%x", &phbid, &pe_no); + if (ret != 2) + return -EINVAL; + + hose = pci_find_controller_for_domain(phbid); + if (!hose) + return -ENODEV; + + /* Retrieve PE */ + pe = eeh_pe_get(hose, pe_no, 0); + if (!pe) + return -ENODEV; + + /* + * We don't do any state checking here since the detection + * process is async to the recovery process. The recovery + * thread *should* not break even if we schedule a recovery + * from an odd state (e.g. PE removed, or recovery of a + * non-isolated PE) + */ + __eeh_send_failure_event(pe); + + return ret < 0 ? ret : count; +} + +static const struct file_operations eeh_force_recover_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = eeh_force_recover_write, +}; #endif static int __init eeh_init_proc(void) @@ -1853,6 +1909,9 @@ static int __init eeh_init_proc(void) debugfs_create_bool("eeh_disable_recovery", 0600, powerpc_debugfs_root, &eeh_debugfs_no_recover); + debugfs_create_file_unsafe("eeh_force_recover", 0600, + powerpc_debugfs_root, NULL, + &eeh_force_recover_fops); eeh_cache_debugfs_init(); #endif } diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 19837798bb1d..539aca055d70 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -121,20 +121,11 @@ int eeh_event_init(void) * the actual event will be delivered in a normal context * (from a workqueue). */ -int eeh_send_failure_event(struct eeh_pe *pe) +int __eeh_send_failure_event(struct eeh_pe *pe) { unsigned long flags; struct eeh_event *event; - /* - * If we've manually supressed recovery events via debugfs - * then just drop it on the floor. - */ - if (eeh_debugfs_no_recover) { - pr_err("EEH: Event dropped due to no_recover setting\n"); - return 0; - } - event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) { pr_err("EEH: out of memory, event not handled\n"); @@ -153,6 +144,20 @@ int eeh_send_failure_event(struct eeh_pe *pe) return 0; } +int eeh_send_failure_event(struct eeh_pe *pe) +{ + /* + * If we've manually supressed recovery events via debugfs + * then just drop it on the floor. + */ + if (eeh_debugfs_no_recover) { + pr_err("EEH: Event dropped due to no_recover setting\n"); + return 0; + } + + return __eeh_send_failure_event(pe); +} + /** * eeh_remove_event - Remove EEH event from the queue * @pe: Event binding to the PE -- cgit v1.2.3 From c746ca00f5eac6224eda02f39ebdc48fabfad3c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 14 Feb 2019 12:15:40 +0530 Subject: powerpc/book3s: Remove pgd/pud/pmd_set() interfaces When updating page tables, we need to make sure we fill the page table entry valid bits. We do this by or'ing in one of PGD/PUD/PMD_VAL_BITS. The page table 'set' interfaces allow updating the raw value of page table entries without setting the valid bits, so remove those interfaces to avoid incorrect usage in future. Signed-off-by: Aneesh Kumar K.V [mpe: Reword commit message based on mailing list discussion] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 8 ++++---- arch/powerpc/include/asm/book3s/64/pgtable.h | 14 -------------- 2 files changed, 4 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 9c1173283b96..138bc2ecc0c4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -111,7 +111,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS); + *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -138,7 +138,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS); + *pud = __pud(__pgtable_ptr_val(pmd) | PUD_VAL_BITS); } static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, @@ -176,13 +176,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS); + *pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS); + *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } static inline pgtable_t pmd_pgtable(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 404e0f48f3f3..49c2c2888274 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -851,11 +851,6 @@ static inline bool pte_ci(pte_t pte) return false; } -static inline void pmd_set(pmd_t *pmdp, unsigned long val) -{ - *pmdp = __pmd(val); -} - static inline void pmd_clear(pmd_t *pmdp) { *pmdp = __pmd(0); @@ -887,11 +882,6 @@ static inline int pmd_bad(pmd_t pmd) return hash__pmd_bad(pmd); } -static inline void pud_set(pud_t *pudp, unsigned long val) -{ - *pudp = __pud(val); -} - static inline void pud_clear(pud_t *pudp) { *pudp = __pud(0); @@ -934,10 +924,6 @@ static inline bool pud_access_permitted(pud_t pud, bool write) } #define pgd_write(pgd) pte_write(pgd_pte(pgd)) -static inline void pgd_set(pgd_t *pgdp, unsigned long val) -{ - *pgdp = __pgd(val); -} static inline void pgd_clear(pgd_t *pgdp) { -- cgit v1.2.3 From 19f8a5b5be2898573a5e1dc1db93e8d40117606a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 12 Feb 2019 11:58:29 +1100 Subject: powerpc/powernv: Don't reprogram SLW image on every KVM guest entry/exit Commit 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug", 2017-07-21) added two calls to opal_slw_set_reg() inside pnv_cpu_offline(), with the aim of changing the LPCR value in the SLW image to disable wakeups from the decrementer while a CPU is offline. However, pnv_cpu_offline() gets called each time a secondary CPU thread is woken up to participate in running a KVM guest, that is, not just when a CPU is offlined. Since opal_slw_set_reg() is a very slow operation (with observed execution times around 20 milliseconds), this means that an offline secondary CPU can often be busy doing the opal_slw_set_reg() call when the primary CPU wants to grab all the secondary threads so that it can run a KVM guest. This leads to messages like "KVM: couldn't grab CPU n" being printed and guest execution failing. There is no need to reprogram the SLW image on every KVM guest entry and exit. So that we do it only when a CPU is really transitioning between online and offline, this moves the calls to pnv_program_cpu_hotplug_lpcr() into pnv_smp_cpu_kill_self(). Fixes: 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/powernv.h | 2 ++ arch/powerpc/platforms/powernv/idle.c | 27 ++------------------------- arch/powerpc/platforms/powernv/smp.c | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index 362ea12a4501..05b552418519 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -23,6 +23,8 @@ extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, unsigned long *flags, unsigned long *status, int count); +void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val); + void pnv_tm_init(void); #else static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 35f699ebb662..e52f9b06dd9c 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -458,7 +458,8 @@ EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_HOTPLUG_CPU -static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) + +void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) { u64 pir = get_hard_smp_processor_id(cpu); @@ -481,20 +482,6 @@ unsigned long pnv_cpu_offline(unsigned int cpu) { unsigned long srr1; u32 idle_states = pnv_get_supported_cpuidle_states(); - u64 lpcr_val; - - /* - * We don't want to take decrementer interrupts while we are - * offline, so clear LPCR:PECE1. We keep PECE2 (and - * LPCR_PECE_HVEE on P9) enabled as to let IPIs in. - * - * If the CPU gets woken up by a special wakeup, ensure that - * the SLW engine sets LPCR with decrementer bit cleared, else - * the CPU will come back to the kernel due to a spurious - * wakeup. - */ - lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; - pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); __ppc64_runlatch_off(); @@ -526,16 +513,6 @@ unsigned long pnv_cpu_offline(unsigned int cpu) __ppc64_runlatch_on(); - /* - * Re-enable decrementer interrupts in LPCR. - * - * Further, we want stop states to be woken up by decrementer - * for non-hotplug cases. So program the LPCR via stop api as - * well. - */ - lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; - pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); - return srr1; } #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 0d354e19ef92..db09c7022635 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "powernv.h" @@ -153,6 +154,7 @@ static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; unsigned long srr1, wmask; + u64 lpcr_val; /* Standard hot unplug procedure */ /* @@ -174,6 +176,19 @@ static void pnv_smp_cpu_kill_self(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) wmask = SRR1_WAKEMASK_P8; + /* + * We don't want to take decrementer interrupts while we are + * offline, so clear LPCR:PECE1. We keep PECE2 (and + * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in. + * + * If the CPU gets woken up by a special wakeup, ensure that + * the SLW engine sets LPCR with decrementer bit cleared, else + * the CPU will come back to the kernel due to a spurious + * wakeup. + */ + lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); + while (!generic_check_cpu_restart(cpu)) { /* * Clear IPI flag, since we don't handle IPIs while @@ -246,6 +261,16 @@ static void pnv_smp_cpu_kill_self(void) } + /* + * Re-enable decrementer interrupts in LPCR. + * + * Further, we want stop states to be woken up by decrementer + * for non-hotplug cases. So program the LPCR via stop api as + * well. + */ + lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); + DBG("CPU%d coming online...\n", cpu); } -- cgit v1.2.3 From 40058337f23f79212f92ed5ef066e90a032905b1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 10:37:53 +0000 Subject: powerpc: simplify BDI switch There is no reason to re-read each time the pointer at location 0xf0 as it is fixed and known. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 2 ++ arch/powerpc/kernel/head_32.S | 5 ++--- arch/powerpc/kernel/head_40x.S | 5 ++--- arch/powerpc/kernel/head_8xx.S | 1 + arch/powerpc/mm/8xx_mmu.c | 7 ++----- 5 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 25607604a7a5..6d22a8e78fe2 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -356,6 +356,8 @@ extern void early_init_mmu_secondary(void); extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); static inline void mmu_early_init_devtree(void) { } + +extern void *abatron_pteptrs[2]; #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 05b08db3901d..c2f564690778 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -1027,9 +1027,8 @@ _ENTRY(switch_mmu_context) * The PGDIR is passed as second argument. */ lwz r4,MM_PGD(r4) - lis r5, KERNELBASE@h - lwz r5, 0xf0(r5) - stw r4, 0x4(r5) + lis r5, abatron_pteptrs@ha + stw r4, abatron_pteptrs@l + 0x4(r5) #endif li r4,0 isync diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b19d78410511..11dd09d0ce1a 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -953,9 +953,8 @@ _GLOBAL(set_context) /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is the second parameter. */ - lis r5, KERNELBASE@h - lwz r5, 0xf0(r5) - stw r4, 0x4(r5) + lis r5, abatron_pteptrs@ha + stw r4, abatron_pteptrs@l + 0x4(r5) #endif sync mtspr SPRN_PID,r3 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 67cbae30ebf2..7e14796bea81 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -989,5 +989,6 @@ swapper_pg_dir: /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). */ + .globl abatron_pteptrs abatron_pteptrs: .space 8 diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index bfa503cff351..f12ec85e965c 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -162,14 +162,11 @@ void set_context(unsigned long id, pgd_t *pgd) { s16 offset = (s16)(__pa(swapper_pg_dir)); -#ifdef CONFIG_BDI_SWITCH - pgd_t **ptr = *(pgd_t ***)(KERNELBASE + 0xf0); - /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ - *(ptr + 1) = pgd; -#endif + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = pgd; /* Register M_TWB will contain base address of level 1 table minus the * lower part of the kernel PGDIR base address, so that all accesses to -- cgit v1.2.3 From 0df977eafc792a5365a7f81d8d5920132e03afad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 10:37:54 +0000 Subject: powerpc/6xx: Don't use SPRN_SPRG2 for storing stack pointer while in RTAS When calling RTAS, the stack pointer is stored in SPRN_SPRG2 in order to be able to restore it in case of machine check in RTAS. As machine check is not a perfomance critical path, this patch frees SPRN_SPRG2 by using a field in thread struct instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 3 +++ arch/powerpc/include/asm/reg.h | 1 - arch/powerpc/kernel/asm-offsets.c | 3 +++ arch/powerpc/kernel/entry_32.S | 5 +++-- arch/powerpc/kernel/head_32.S | 22 ++++++++++++---------- 5 files changed, 21 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ee58526cb6c2..e8682122ea3d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -250,6 +250,9 @@ struct thread_struct { #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ +#ifdef CONFIG_PPC_RTAS + unsigned long rtas_sp; /* stack pointer for when in RTAS */ +#endif #endif /* Debug Registers */ struct debug_reg debug; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1c98ef1f2d5b..371ef6e8248e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1167,7 +1167,6 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 -#define SPRN_SPRG_RTAS SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9ffc72ded73a..d6f9bdb1eb2e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -93,6 +93,9 @@ int main(void) OFFSET(THREAD_INFO, task_struct, stack); DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); OFFSET(KSP_LIMIT, thread_struct, ksp_limit); +#ifdef CONFIG_PPC_RTAS + OFFSET(RTAS_SP, thread_struct, rtas_sp); +#endif #endif /* CONFIG_PPC64 */ #ifdef CONFIG_LIVEPATCH diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index c2b66fbbf7f0..6c671ceb5a06 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1332,7 +1332,7 @@ _GLOBAL(enter_rtas) MTMSRD(r0) /* don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - mtspr SPRN_SPRG_RTAS,r7 + stw r7, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI @@ -1341,7 +1341,8 @@ _GLOBAL(enter_rtas) lwz r9,8(r9) /* original msr value */ addi r1,r1,INT_FRAME_SIZE li r0,0 - mtspr SPRN_SPRG_RTAS,r0 + tophys(r7, r2) + stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI /* return to caller */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index c2f564690778..04128899a0a5 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -352,9 +352,8 @@ i##n: \ * registers that might have bad values includes all the GPRs * and all the BATs. We indicate that we are in RTAS by putting * a non-zero value, the address of the exception frame to use, - * in SPRG2. The machine check handler checks SPRG2 and uses its - * value if it is non-zero. If we ever needed to free up SPRG2, - * we could use a field in the thread_info or thread_struct instead. + * in thread.rtas_sp. The machine check handler checks thread.rtas_sp + * and uses its value if it is non-zero. * (Other exception handlers assume that r1 is a valid kernel stack * pointer when we take an exception from supervisor mode.) * -- paulus. @@ -365,16 +364,15 @@ i##n: \ mtspr SPRN_SPRG_SCRATCH1,r11 mfcr r10 #ifdef CONFIG_PPC_CHRP - mfspr r11,SPRN_SPRG_RTAS - cmpwi 0,r11,0 - bne 7f + mfspr r11, SPRN_SPRG_THREAD + lwz r11, RTAS_SP(r11) + cmpwi cr1, r11, 0 + bne cr1, 7f #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP - mfspr r4,SPRN_SPRG_RTAS - cmpwi cr1,r4,0 bne cr1,1f #endif EXC_XFER_STD(0x200, machine_check_exception) @@ -865,8 +863,10 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 +#ifdef CONFIG_PPC_RTAS li r3,0 - mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ + stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ +#endif /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -950,8 +950,10 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 +#ifdef CONFIG_PPC_RTAS li r3,0 - mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ + stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ +#endif /* stack */ lis r1,init_thread_union@ha -- cgit v1.2.3 From 93c4a162b014d238a287f8264adb25c009c79e61 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 10:37:55 +0000 Subject: powerpc/6xx: Store PGDIR physical address in a SPRG Use SPRN_SPRG2 to store the current thread PGDIR and avoid reading thread_struct.pgdir at every TLB miss. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/cpu_setup_6xx.S | 4 ++++ arch/powerpc/kernel/head_32.S | 25 ++++++++++++------------- arch/powerpc/mm/hash_low_32.S | 3 +-- 4 files changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 371ef6e8248e..1f79e1d8fb0b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1167,6 +1167,7 @@ #ifdef CONFIG_PPC_BOOK3S_32 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_PGDIR SPRN_SPRG2 #define SPRN_SPRG_603_LRU SPRN_SPRG4 #endif diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 8c069e96c478..6f1c11e0691f 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -24,6 +24,10 @@ BEGIN_MMU_FTR_SECTION li r10,0 mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + lis r10, (swapper_pg_dir - PAGE_OFFSET)@h + ori r10, r10, (swapper_pg_dir - PAGE_OFFSET)@l + mtspr SPRN_SPRG_PGDIR, r10 + BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 04128899a0a5..2b0a26f66115 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -500,16 +500,15 @@ InstructionTLBMiss: mfspr r3,SPRN_IMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG_THREAD + mfspr r2, SPRN_SPRG_PGDIR li r1,_PAGE_USER|_PAGE_PRESENT|_PAGE_EXEC /* low addresses tested as user */ - lwz r2,PGDIR(r2) bge- 112f mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ lis r2,swapper_pg_dir@ha /* if kernel address, use */ addi r2,r2,swapper_pg_dir@l /* kernel page table */ -112: tophys(r2,r2) - rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + tophys(r2,r2) +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ @@ -574,16 +573,15 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG_THREAD + mfspr r2, SPRN_SPRG_PGDIR li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ - lwz r2,PGDIR(r2) bge- 112f mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ lis r2,swapper_pg_dir@ha /* if kernel address, use */ addi r2,r2,swapper_pg_dir@l /* kernel page table */ -112: tophys(r2,r2) - rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + tophys(r2,r2) +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ @@ -658,16 +656,15 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2,SPRN_SPRG_THREAD + mfspr r2, SPRN_SPRG_PGDIR li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */ - lwz r2,PGDIR(r2) bge- 112f mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ lis r2,swapper_pg_dir@ha /* if kernel address, use */ addi r2,r2,swapper_pg_dir@l /* kernel page table */ -112: tophys(r2,r2) - rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + tophys(r2,r2) +112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- DataAddressInvalid /* return if no mapping */ @@ -1024,14 +1021,16 @@ _ENTRY(switch_mmu_context) li r0,NUM_USER_SEGMENTS mtctr r0 + lwz r4, MM_PGD(r4) #ifdef CONFIG_BDI_SWITCH /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ - lwz r4,MM_PGD(r4) lis r5, abatron_pteptrs@ha stw r4, abatron_pteptrs@l + 0x4(r5) #endif + tophys(r4, r4) + mtspr SPRN_SPRG_PGDIR, r4 li r4,0 isync 3: diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 1e2df3e9f9ea..82e7dd0c0220 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -70,9 +70,8 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0,KERNELBASE@h /* check if kernel address */ cmplw 0,r4,r0 - mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - lwz r5,PGDIR(r8) /* virt page-table root */ + mfspr r5, SPRN_SPRG_PGDIR /* virt page-table root */ blt+ 112f /* assume user more likely */ lis r5,swapper_pg_dir@ha /* if kernel address, use */ addi r5,r5,swapper_pg_dir@l /* kernel page table */ -- cgit v1.2.3 From 78ca1108b10927b3d068c8da91352b0f4cd01fc5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jan 2019 12:34:20 +0000 Subject: powerpc/book3s32: Reorder _PAGE_XXX flags to simplify TLB handling For pages without _PAGE_USER, PP field is 00 For pages with _PAGE_USER, PP field is 10 for RW and 11 for RO. This patch sets _PAGE_USER to 0x002 and _PAGE_RW to 0x001 is order to simplify TLB handling by reducing amount of shifts. The location of _PAGE_PRESENT and _PAGE_HASHPTE doesn't matter as they are only SW related flags. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/hash.h | 8 ++++---- arch/powerpc/kernel/head_32.S | 5 +---- arch/powerpc/mm/hash_low_32.S | 6 ++---- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index 2a0a467d2985..a5907ea4fb40 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -17,9 +17,9 @@ * updating the accessed and modified bits in the page table tree. */ -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_RW 0x001 /* PP = x1: user write access allowed */ +#define _PAGE_USER 0x002 /* PP = 1x: usermode access allowed */ +#define _PAGE_HASHPTE 0x004 /* software: hash_page has made an HPTE for this pte */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -27,7 +27,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_PRESENT 0x400 /* software: pte contains a translation */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index fdb587c96a80..e7a5b312a7db 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -522,7 +522,6 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ ori r1, r1, 0xe05 /* clear out reserved bits */ andc r1, r0, r1 /* PP = user? 2 : 0 */ BEGIN_FTR_SECTION @@ -590,8 +589,7 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwinm r1, r0, 0, 31, 31 /* _PAGE_RW -> PP lsb */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ @@ -670,7 +668,6 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ li r1,0xe05 /* clear out reserved bits & PP lsb */ andc r1,r0,r1 /* PP = user? 2: 0 */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index d94fef524ef5..f4294edeca9d 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -310,11 +310,9 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ - and r8,r8,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + and r8, r5, r0 /* writable if _RW & _DIRTY */ + rlwimi r5, r5, 32 - 1, 31, 31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION -- cgit v1.2.3 From f68e7927212fa0dbe44c00c144b643c87ab0cf43 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 23 Feb 2019 20:30:50 +1100 Subject: Revert "powerpc/book3s32: Reorder _PAGE_XXX flags to simplify TLB handling" This reverts commit 78ca1108b10927b3d068c8da91352b0f4cd01fc5. It is causing boot failures with qemu mac99 in at least some configurations. --- arch/powerpc/include/asm/book3s/32/hash.h | 8 ++++---- arch/powerpc/kernel/head_32.S | 5 ++++- arch/powerpc/mm/hash_low_32.S | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index a5907ea4fb40..2a0a467d2985 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -17,9 +17,9 @@ * updating the accessed and modified bits in the page table tree. */ -#define _PAGE_RW 0x001 /* PP = x1: user write access allowed */ -#define _PAGE_USER 0x002 /* PP = 1x: usermode access allowed */ -#define _PAGE_HASHPTE 0x004 /* software: hash_page has made an HPTE for this pte */ +#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ +#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ +#define _PAGE_USER 0x004 /* usermode access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -27,7 +27,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_PRESENT 0x400 /* software: pte contains a translation */ +#define _PAGE_RW 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e7a5b312a7db..fdb587c96a80 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -522,6 +522,7 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ ori r1, r1, 0xe05 /* clear out reserved bits */ andc r1, r0, r1 /* PP = user? 2 : 0 */ BEGIN_FTR_SECTION @@ -589,7 +590,8 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1, r0, 0, 31, 31 /* _PAGE_RW -> PP lsb */ + rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ @@ -668,6 +670,7 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ li r1,0xe05 /* clear out reserved bits & PP lsb */ andc r1,r0,r1 /* PP = user? 2: 0 */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index f4294edeca9d..d94fef524ef5 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -310,9 +310,11 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ - and r8, r5, r0 /* writable if _RW & _DIRTY */ - rlwimi r5, r5, 32 - 1, 31, 31 /* _PAGE_USER -> PP lsb */ + and r8,r8,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ BEGIN_FTR_SECTION -- cgit v1.2.3 From 78a8da0600940d679bb727cea7e153685e211723 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Wed, 20 Feb 2019 12:27:00 +0530 Subject: powerpc: sstep: Add tests for addc[.] instruction This adds test cases for the addc[.] instruction. Signed-off-by: Sandipan Das Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/lib/test_emulate_step.c | 192 ++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 19a8834e0398..87b73aa56b53 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -326,6 +326,7 @@ #define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 +#define PPC_INST_ADDC 0x7c000014 #define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 #define PPC_INST_BLRL 0x4e800021 diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index ee6d5ac3b615..9992c1ea7a1d 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,6 +53,10 @@ ___PPC_RA(a) | ___PPC_RB(b)) #define TEST_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | \ ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define TEST_ADDC(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_ADDC_DOT(t, a, b) (PPC_INST_ADDC | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define MAX_SUBTESTS 16 @@ -649,6 +653,194 @@ static struct compute_test compute_tests[] = { } } } + }, + { + .mnemonic = "addc", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", + .instr = TEST_ADDC(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN | (uint)INT_MIN, + .gpr[22] = LONG_MIN | (uint)INT_MIN, + } + } + } + }, + { + .mnemonic = "addc.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .flags = IGNORE_CCR, + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX, RB = LONG_MAX", + .flags = IGNORE_CCR, + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = LONG_MAX, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = ULONG_MAX", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, RB = 0x1", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = ULONG_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MIN", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, RB = INT_MAX", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = INT_MIN, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = INT_MAX, RB = INT_MAX", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = INT_MAX, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = UINT_MAX", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, RB = 0x1", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = UINT_MAX, + .gpr[22] = 0x1, + } + }, + { + .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", + .instr = TEST_ADDC_DOT(20, 21, 22), + .regs = { + .gpr[21] = LONG_MIN | (uint)INT_MIN, + .gpr[22] = LONG_MIN | (uint)INT_MIN, + } + } + } } }; -- cgit v1.2.3 From 02d5d13b4544dff5fc0c0aa0179085ed52b72ecd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:44 +0000 Subject: powerpc/32: add helper to write into segment registers This patch add an helper which wraps 'mtsrin' instruction to write into segment registers. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1f79e1d8fb0b..c25880e6a16a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1425,6 +1425,11 @@ static inline void msr_check_and_clear(unsigned long bits) #define mfsrin(v) ({unsigned int rval; \ asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ rval;}) + +static inline void mtsrin(u32 val, u32 idx) +{ + asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx)); +} #endif #define proc_trap() asm volatile("trap") -- cgit v1.2.3 From 28ea38b9cba68eec55cf550acd6b36b6f507cd17 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:45 +0000 Subject: powerpc/mmu: add is_strict_kernel_rwx() helper Add a helper to know whether STRICT_KERNEL_RWX is enabled. This is based on rodata_enabled flag which is defined only when CONFIG_STRICT_KERNEL_RWX is selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 11 +++++++++++ arch/powerpc/mm/init_32.c | 4 +--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 6d22a8e78fe2..d34ad1657d7b 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -289,6 +289,17 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) } #endif /* CONFIG_PPC_MEM_KEYS */ +#ifdef CONFIG_STRICT_KERNEL_RWX +static inline bool strict_kernel_rwx_enabled(void) +{ + return rodata_enabled; +} +#else +static inline bool strict_kernel_rwx_enabled(void) +{ + return false; +} +#endif #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3e59e5d64b01..ee5a430b9a18 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -108,12 +108,10 @@ static void __init MMU_setup(void) __map_without_bats = 1; __map_without_ltlbs = 1; } -#ifdef CONFIG_STRICT_KERNEL_RWX - if (rodata_enabled) { + if (strict_kernel_rwx_enabled()) { __map_without_bats = 1; __map_without_ltlbs = 1; } -#endif } /* -- cgit v1.2.3 From 555f4fdb93e70d39e664fcc52cda23c5b62a46cc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:46 +0000 Subject: powerpc/kconfig: define PAGE_SHIFT inside Kconfig This patch defined CONFIG_PPC_PAGE_SHIFT in order to be able to use PAGE_SHIFT value inside Kconfig. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 7 +++++++ arch/powerpc/include/asm/page.h | 13 ++----------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3d5d63c9b797..0f933797c376 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -704,6 +704,13 @@ config PPC_256K_PAGES endchoice +config PPC_PAGE_SHIFT + int + default 18 if PPC_256K_PAGES + default 16 if PPC_64K_PAGES + default 14 if PPC_16K_PAGES + default 12 + config THREAD_SHIFT int "Thread shift" if EXPERT range 13 15 diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index aa4497175bd3..ed870468ef6f 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -20,20 +20,11 @@ /* * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages - * on PPC44x). For PPC64 we support either 4K or 64K software + * on PPC44x and 4K/16K on 8xx). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#if defined(CONFIG_PPC_256K_PAGES) -#define PAGE_SHIFT 18 -#elif defined(CONFIG_PPC_64K_PAGES) -#define PAGE_SHIFT 16 -#elif defined(CONFIG_PPC_16K_PAGES) -#define PAGE_SHIFT 14 -#else -#define PAGE_SHIFT 12 -#endif - +#define PAGE_SHIFT CONFIG_PPC_PAGE_SHIFT #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 5e04ae85fbed8eef209a40a63f8ef507fe623064 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:48 +0000 Subject: powerpc/mm/32s: add setibat() clearibat() and update_bats() setibat() and clearibat() allows to manipulate IBATs independently of DBATs. update_bats() allows to update bats after init. This is done with MMU off. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 2 ++ arch/powerpc/kernel/head_32.S | 35 +++++++++++++++++++++++++++ arch/powerpc/mm/ppc_mmu_32.c | 32 ++++++++++++++++++++++++ 3 files changed, 69 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 0c261ba2c826..5cb588395fdc 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -92,6 +92,8 @@ typedef struct { unsigned long vdso_base; } mm_context_t; +void update_bats(void); + /* patch sites */ extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2; extern s32 patch__hash_page_B, patch__hash_page_C; diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index fdb587c96a80..613900bb8c39 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -1096,6 +1096,41 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +_ENTRY(update_bats) + lis r4, 1f@h + ori r4, r4, 1f@l + tophys(r4, r4) + mfmsr r6 + mflr r7 + li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR) + rlwinm r0, r6, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + mtspr SPRN_SRR0, r4 + mtspr SPRN_SRR1, r3 + SYNC + RFI +1: bl clear_bats + lis r3, BATS@ha + addi r3, r3, BATS@l + tophys(r3, r3) + LOAD_BAT(0, r3, r4, r5) + LOAD_BAT(1, r3, r4, r5) + LOAD_BAT(2, r3, r4, r5) + LOAD_BAT(3, r3, r4, r5) +BEGIN_MMU_FTR_SECTION + LOAD_BAT(4, r3, r4, r5) + LOAD_BAT(5, r3, r4, r5) + LOAD_BAT(6, r3, r4, r5) + LOAD_BAT(7, r3, r4, r5) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + mtmsr r3 + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r6 + SYNC + RFI + flush_tlbs: lis r10, 0x40 1: addic. r10, r10, -0x1000 diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 9225da8bae4c..7b011280d076 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -106,6 +106,38 @@ static unsigned int block_size(unsigned long base, unsigned long top) return min3(max_size, 1U << base_shift, 1U << block_shift); } +/* + * Set up one of the IBAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 2 between 128k and 256M. + * Only for 603+ ... + */ +static void setibat(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, pgprot_t prot) +{ + unsigned int bl = (size >> 17) - 1; + int wimgxpp; + struct ppc_bat *bat = BATS[index]; + unsigned long flags = pgprot_val(prot); + + if (!cpu_has_feature(CPU_FTR_NEED_COHERENT)) + flags &= ~_PAGE_COHERENT; + + wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX); + bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp; + if (flags & _PAGE_USER) + bat[0].batu |= 1; /* Vp = 1 */ +} + +static void clearibat(int index) +{ + struct ppc_bat *bat = BATS[index]; + + bat[0].batu = 0; + bat[0].batl = 0; +} + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { int idx; -- cgit v1.2.3 From 63b2bc619565ef7078e7b12fafb82f51867f002b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:49 +0000 Subject: powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX Today, STRICT_KERNEL_RWX is based on the use of regular pages to map kernel pages. On Book3s 32, it has three consequences: - Using pages instead of BAT for mapping kernel linear memory severely impacts performance. - Exec protection is not effective because no-execute cannot be set at page level (except on 603 which doesn't have hash tables) - Write protection is not effective because PP bits do not provide RO mode for kernel-only pages (except on 603 which handles it in software via PAGE_DIRTY) On the 603+, we have: - Independent IBAT and DBAT allowing limitation of exec parts. - NX bit can be set in segment registers to forbit execution on memory mapped by pages. - RO mode on DBATs even for kernel-only blocks. On the 601, there is nothing much we can do other than warn the user about it, because: - BATs are common to instructions and data. - BAT do not provide RO mode for kernel-only blocks. - segment registers don't have the NX bit. In order to use IBAT for exec protection, this patch: - Aligns _etext to BAT block sizes (128kb) - Set NX bit in kernel segment register (Except on vmalloc area when CONFIG_MODULES is selected) - Maps kernel text with IBATs. In order to use DBAT for exec protection, this patch: - Aligns RW DATA to BAT block sizes (4M) - Maps kernel RO area with write prohibited DBATs - Maps remaining memory with remaining DBATs Here is what we get with this patch on a 832x when activating STRICT_KERNEL_RWX: Symbols: c0000000 T _stext c0680000 R __start_rodata c0680000 R _etext c0800000 T __init_begin c0800000 T _sinittext ~# cat /sys/kernel/debug/block_address_translation ---[ Instruction Block Address Translation ]--- 0: 0xc0000000-0xc03fffff 0x00000000 Kernel EXEC coherent 1: 0xc0400000-0xc05fffff 0x00400000 Kernel EXEC coherent 2: 0xc0600000-0xc067ffff 0x00600000 Kernel EXEC coherent 3: - 4: - 5: - 6: - 7: - ---[ Data Block Address Translation ]--- 0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent 1: 0xc0800000-0xc0ffffff 0x00800000 Kernel RW coherent 2: 0xc1000000-0xc1ffffff 0x01000000 Kernel RW coherent 3: 0xc2000000-0xc3ffffff 0x02000000 Kernel RW coherent 4: 0xc4000000-0xc7ffffff 0x04000000 Kernel RW coherent 5: 0xc8000000-0xcfffffff 0x08000000 Kernel RW coherent 6: 0xd0000000-0xdfffffff 0x10000000 Kernel RW coherent 7: - ~# cat /sys/kernel/debug/segment_registers ---[ User Segments ]--- 0x00000000-0x0fffffff Kern key 1 User key 1 VSID 0xa085d0 0x10000000-0x1fffffff Kern key 1 User key 1 VSID 0xa086e1 0x20000000-0x2fffffff Kern key 1 User key 1 VSID 0xa087f2 0x30000000-0x3fffffff Kern key 1 User key 1 VSID 0xa08903 0x40000000-0x4fffffff Kern key 1 User key 1 VSID 0xa08a14 0x50000000-0x5fffffff Kern key 1 User key 1 VSID 0xa08b25 0x60000000-0x6fffffff Kern key 1 User key 1 VSID 0xa08c36 0x70000000-0x7fffffff Kern key 1 User key 1 VSID 0xa08d47 0x80000000-0x8fffffff Kern key 1 User key 1 VSID 0xa08e58 0x90000000-0x9fffffff Kern key 1 User key 1 VSID 0xa08f69 0xa0000000-0xafffffff Kern key 1 User key 1 VSID 0xa0907a 0xb0000000-0xbfffffff Kern key 1 User key 1 VSID 0xa0918b ---[ Kernel Segments ]--- 0xc0000000-0xcfffffff Kern key 0 User key 1 No Exec VSID 0x000ccc 0xd0000000-0xdfffffff Kern key 0 User key 1 No Exec VSID 0x000ddd 0xe0000000-0xefffffff Kern key 0 User key 1 No Exec VSID 0x000eee 0xf0000000-0xffffffff Kern key 0 User key 1 No Exec VSID 0x000fff Aligning _etext to 128kb allows to map up to 32Mb text with 8 IBATs: 16Mb + 8Mb + 4Mb + 2Mb + 1Mb + 512kb + 256kb + 128kb (+ 128kb) = 32Mb (A 9th IBAT is unneeded as 32Mb would need only a single 32Mb block) Aligning data to 4M allows to map up to 512Mb data with 8 DBATs: 16Mb + 8Mb + 4Mb + 4Mb + 32Mb + 64Mb + 128Mb + 256Mb = 512Mb Because some processors only have 4 BATs and because some targets need DBATs for mapping other areas, the following patch will allow to modify _etext and data alignment. Signed-off-by: Christophe Leroy Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 + arch/powerpc/include/asm/book3s/32/pgtable.h | 11 ++++ arch/powerpc/mm/init_32.c | 4 +- arch/powerpc/mm/mmu_decl.h | 8 +++ arch/powerpc/mm/pgtable_32.c | 10 +++- arch/powerpc/mm/ppc_mmu_32.c | 87 ++++++++++++++++++++++++++-- 6 files changed, 112 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 14223363a33d..2544dae471e1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -723,11 +723,13 @@ config THREAD_SHIFT config ETEXT_SHIFT int + default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default PPC_PAGE_SHIFT config DATA_SHIFT int default 24 if STRICT_KERNEL_RWX && PPC64 + default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default PPC_PAGE_SHIFT config FORCE_MAX_ZONEORDER diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 49d76adb9bc5..aa8406b8f7ba 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -174,7 +174,18 @@ static inline bool pte_user(pte_t pte) * of RAM. -- Cort */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ + +/* + * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules + * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear + * memory shall not share segments. + */ +#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) +#define VMALLOC_START ((_ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ + ~(VMALLOC_OFFSET - 1)) +#else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) +#endif #define VMALLOC_END ioremap_bot #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index ee5a430b9a18..bc28995a37ea 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -108,10 +108,8 @@ static void __init MMU_setup(void) __map_without_bats = 1; __map_without_ltlbs = 1; } - if (strict_kernel_rwx_enabled()) { - __map_without_bats = 1; + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; - } } /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 61730023dde3..98fc94affc29 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -165,3 +165,11 @@ unsigned long p_block_mapped(phys_addr_t pa); static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif + +#if defined(CONFIG_PPC_BOOK3S_32) +void mmu_mark_initmem_nx(void); +void mmu_mark_rodata_ro(void); +#else +static inline void mmu_mark_initmem_nx(void) { } +static inline void mmu_mark_rodata_ro(void) { } +#endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a000768a5cc9..6e56a6240bfa 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -353,7 +353,10 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - change_page_attr(page, numpages, PAGE_KERNEL); + if (v_block_mapped((unsigned long)_stext) + 1) + mmu_mark_initmem_nx(); + else + change_page_attr(page, numpages, PAGE_KERNEL); } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -362,6 +365,11 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; + if (v_block_mapped((unsigned long)_sinittext)) { + mmu_mark_rodata_ro(); + return; + } + page = virt_to_page(_stext); numpages = PFN_UP((unsigned long)_etext) - PFN_DOWN((unsigned long)_stext); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 7b011280d076..2d5b0d50fb31 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -138,15 +139,10 @@ static void clearibat(int index) bat[0].batl = 0; } -unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top) { int idx; - if (__map_without_bats) { - printk(KERN_DEBUG "RAM mapped without BATs\n"); - return base; - } - while ((idx = find_free_bat()) != -1 && base != top) { unsigned int size = block_size(base, top); @@ -159,6 +155,85 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return base; } +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + int done; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; + + if (__map_without_bats) { + pr_debug("RAM mapped without BATs\n"); + return base; + } + + if (!strict_kernel_rwx_enabled() || base >= border || top <= border) + return __mmu_mapin_ram(base, top); + + done = __mmu_mapin_ram(base, border); + if (done != border - base) + return done; + + return done + __mmu_mapin_ram(border, top); +} + +void mmu_mark_initmem_nx(void) +{ + int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + int i; + unsigned long base = (unsigned long)_stext - PAGE_OFFSET; + unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long size; + + if (cpu_has_feature(CPU_FTR_601)) + return; + + for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { + size = block_size(base, top); + setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); + base += size; + } + if (base < top) { + size = block_size(base, top); + size = max(size, 128UL << 10); + if ((top - base) > size) { + if (strict_kernel_rwx_enabled()) + pr_warn("Kernel _etext not properly aligned\n"); + size <<= 1; + } + setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); + base += size; + } + for (; i < nb; i++) + clearibat(i); + + update_bats(); + + for (i = TASK_SIZE >> 28; i < 16; i++) { + /* Do not set NX on VM space for modules */ + if (IS_ENABLED(CONFIG_MODULES) && + (VMALLOC_START & 0xf0000000) == i << 28) + break; + mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); + } +} + +void mmu_mark_rodata_ro(void) +{ + int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + int i; + + if (cpu_has_feature(CPU_FTR_601)) + return; + + for (i = 0; i < nb; i++) { + struct ppc_bat *bat = BATS[i]; + + if (bat_addrs[i].start < (unsigned long)__init_begin) + bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX; + } + + update_bats(); +} + /* * Set up one of the I/D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power -- cgit v1.2.3 From d5f17ee96447736a84bc44ffc4b0dddb1b519222 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Feb 2019 19:08:51 +0000 Subject: powerpc/8xx: don't disable large TLBs with CONFIG_STRICT_KERNEL_RWX This patch implements handling of STRICT_KERNEL_RWX with large TLBs directly in the TLB miss handlers. To do so, etext and sinittext are aligned on 512kB boundaries and the miss handlers use 512kB pages instead of 8Mb pages for addresses close to the boundaries. It sets RO PP flags for addresses under sinittext. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 ++ arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 3 +- arch/powerpc/kernel/head_8xx.S | 54 +++++++++++++++++++++------- arch/powerpc/mm/8xx_mmu.c | 31 +++++++++++++++- arch/powerpc/mm/init_32.c | 2 +- arch/powerpc/mm/mmu_decl.h | 2 +- 6 files changed, 78 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 81df0dbc8a9a..43fa82e409bf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -735,6 +735,7 @@ config ETEXT_SHIFT int "_etext shift" if ETEXT_SHIFT_BOOL range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 19 if STRICT_KERNEL_RWX && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), IBATs are used to map kernel text. @@ -755,6 +756,7 @@ config DATA_SHIFT default 24 if STRICT_KERNEL_RWX && PPC64 range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 19 if STRICT_KERNEL_RWX && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index b0f764c827c0..0a1a3fc54e54 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -231,9 +231,10 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) } /* patch sites */ -extern s32 patch__itlbmiss_linmem_top; +extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8; extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; extern s32 patch__fixupdar_linmem_top; +extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8; extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52c92913e39b..3f2d1afba2d1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -292,6 +292,17 @@ SystemCall: */ EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) +/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +#endif + . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction @@ -405,10 +416,20 @@ InstructionTLBMiss: #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: mtcr r11 +#ifdef CONFIG_STRICT_KERNEL_RWX + patch_site 0f, patch__itlbmiss_linmem_top8 + + mfspr r10, SPRN_SRR0 +0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha + rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K + ori r11, r11, MI_PS512K | MI_SVALID + rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ +#else /* Set 8M byte page and mark it valid */ li r11, MI_PS8MEG | MI_SVALID - mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ +#endif + mtspr SPRN_MI_TWC, r11 ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -494,16 +515,6 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi -#endif - DTLBMissIMMR: mtcr r11 /* Set 512k byte guarded page and mark it valid */ @@ -525,10 +536,29 @@ DTLBMissIMMR: DTLBMissLinear: mtcr r11 + rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ +#ifdef CONFIG_STRICT_KERNEL_RWX + patch_site 0f, patch__dtlbmiss_romem_top8 + +0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha + rlwinm r11, r11, 0, 0xff800000 + neg r10, r11 + or r11, r11, r10 + rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K + ori r11, r11, MI_PS512K | MI_SVALID + mfspr r10, SPRN_MD_EPN + rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ +#else /* Set 8M byte page and mark it valid */ li r11, MD_PS8MEG | MD_SVALID +#endif mtspr SPRN_MD_TWC, r11 - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ +#ifdef CONFIG_STRICT_KERNEL_RWX + patch_site 0f, patch__dtlbmiss_romem_top + +0: subis r11, r10, 0 + rlwimi r10, r11, 11, _PAGE_RO +#endif ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index ce11cbaa25d8..fe1f6443d57f 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -94,11 +94,20 @@ static void __init mmu_mapin_immr(void) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } -static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) { modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } +static void mmu_patch_addis(s32 *site, long simm) +{ + unsigned int instr = *(unsigned int *)patch_site_addr(site); + + instr &= 0xffff0000; + instr |= ((unsigned long)simm) >> 16; + patch_instruction_site(site, instr); +} + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long mapped; @@ -135,6 +144,26 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return mapped; } +void mmu_mark_initmem_nx(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) + mmu_patch_addis(&patch__itlbmiss_linmem_top8, + -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); +} + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mmu_mark_rodata_ro(void) +{ + if (CONFIG_DATA_SHIFT < 23) + mmu_patch_addis(&patch__dtlbmiss_romem_top8, + -__pa(((unsigned long)_sinittext) & + ~(LARGE_PAGE_SIZE_8M - 1))); + mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); +} +#endif + void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index bc28995a37ea..41a3513cadc9 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -108,7 +108,7 @@ static void __init MMU_setup(void) __map_without_bats = 1; __map_without_ltlbs = 1; } - if (strict_kernel_rwx_enabled()) + if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) __map_without_ltlbs = 1; } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 98fc94affc29..74ff61dabcb1 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -166,7 +166,7 @@ static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif -#if defined(CONFIG_PPC_BOOK3S_32) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) void mmu_mark_initmem_nx(void); void mmu_mark_rodata_ro(void); #else -- cgit v1.2.3 From 92ab45c5f2db0caa68243be8cfa5e390a1de8c3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Jan 2019 10:08:48 +0000 Subject: powerpc: Avoid circular header inclusion in mmu-hash.h When activating CONFIG_THREAD_INFO_IN_TASK, linux/sched.h includes asm/current.h. This generates a circular dependency. To avoid that, asm/processor.h shall not be included in mmu-hash.h. In order to do that, this patch moves into a new header called asm/task_size_64/32.h all the TASK_SIZE related constants, which can then be included in mmu-hash.h directly. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Split out all the TASK_SIZE constants not just 64-bit ones] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/processor.h | 100 ++------------------------ arch/powerpc/include/asm/task_size_32.h | 21 ++++++ arch/powerpc/include/asm/task_size_64.h | 79 ++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_hmi.c | 1 + 5 files changed, 107 insertions(+), 96 deletions(-) create mode 100644 arch/powerpc/include/asm/task_size_32.h create mode 100644 arch/powerpc/include/asm/task_size_64.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 12e522807f9f..a28a28079edb 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -23,7 +23,7 @@ */ #include #include -#include +#include #include /* diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e8682122ea3d..2edab34ee288 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -77,105 +77,15 @@ extern int _chrp_type; #ifdef __KERNEL__ -struct task_struct; -void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); -void release_thread(struct task_struct *); - -#ifdef CONFIG_PPC32 - -#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START -#error User TASK_SIZE overlaps with KERNEL_START address -#endif -#define TASK_SIZE (CONFIG_TASK_SIZE) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) -#endif - #ifdef CONFIG_PPC64 -/* - * 64-bit user address space can have multiple limits - * For now supported values are: - */ -#define TASK_SIZE_64TB (0x0000400000000000UL) -#define TASK_SIZE_128TB (0x0000800000000000UL) -#define TASK_SIZE_512TB (0x0002000000000000UL) -#define TASK_SIZE_1PB (0x0004000000000000UL) -#define TASK_SIZE_2PB (0x0008000000000000UL) -/* - * With 52 bits in the address we can support - * upto 4PB of range. - */ -#define TASK_SIZE_4PB (0x0010000000000000UL) - -/* - * For now 512TB is only supported with book3s and 64K linux page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) -/* - * Max value currently used: - */ -#define TASK_SIZE_USER64 TASK_SIZE_4PB -#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB -#define TASK_CONTEXT_SIZE TASK_SIZE_512TB -#else -#define TASK_SIZE_USER64 TASK_SIZE_64TB -#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB -/* - * We don't need to allocate extended context ids for 4K page size, because - * we limit the max effective address on this config to 64TB. - */ -#define TASK_CONTEXT_SIZE TASK_SIZE_64TB -#endif - -/* - * 32-bit user address space is 4GB - 1 page - * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT - */ -#define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE)) - -#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ - TASK_SIZE_USER32 : TASK_SIZE_USER64) -#define TASK_SIZE TASK_SIZE_OF(current) -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) -#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) - -#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \ - TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 ) -#endif - -/* - * Initial task size value for user applications. For book3s 64 we start - * with 128TB and conditionally enable upto 512TB - */ -#ifdef CONFIG_PPC_BOOK3S_64 -#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \ - TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64) +#include #else -#define DEFAULT_MAP_WINDOW TASK_SIZE +#include #endif -#ifdef __powerpc64__ - -#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64 -#define STACK_TOP_USER32 TASK_SIZE_USER32 - -#define STACK_TOP (is_32bit_task() ? \ - STACK_TOP_USER32 : STACK_TOP_USER64) - -#define STACK_TOP_MAX TASK_SIZE_USER64 - -#else /* __powerpc64__ */ - -#define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP - -#endif /* __powerpc64__ */ +struct task_struct; +void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); +void release_thread(struct task_struct *); typedef struct { unsigned long seg; diff --git a/arch/powerpc/include/asm/task_size_32.h b/arch/powerpc/include/asm/task_size_32.h new file mode 100644 index 000000000000..de7290ee770f --- /dev/null +++ b/arch/powerpc/include/asm/task_size_32.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_TASK_SIZE_32_H +#define _ASM_POWERPC_TASK_SIZE_32_H + +#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START +#error User TASK_SIZE overlaps with KERNEL_START address +#endif + +#define TASK_SIZE (CONFIG_TASK_SIZE) + +/* + * This decides where the kernel will search for a free chunk of vm space during + * mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) + +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + +#endif /* _ASM_POWERPC_TASK_SIZE_32_H */ diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h new file mode 100644 index 000000000000..eab4779f6b84 --- /dev/null +++ b/arch/powerpc/include/asm/task_size_64.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_TASK_SIZE_64_H +#define _ASM_POWERPC_TASK_SIZE_64_H + +/* + * 64-bit user address space can have multiple limits + * For now supported values are: + */ +#define TASK_SIZE_64TB (0x0000400000000000UL) +#define TASK_SIZE_128TB (0x0000800000000000UL) +#define TASK_SIZE_512TB (0x0002000000000000UL) +#define TASK_SIZE_1PB (0x0004000000000000UL) +#define TASK_SIZE_2PB (0x0008000000000000UL) + +/* + * With 52 bits in the address we can support up to 4PB of range. + */ +#define TASK_SIZE_4PB (0x0010000000000000UL) + +/* + * For now 512TB is only supported with book3s and 64K linux page size. + */ +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) +/* + * Max value currently used: + */ +#define TASK_SIZE_USER64 TASK_SIZE_4PB +#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB +#define TASK_CONTEXT_SIZE TASK_SIZE_512TB +#else +#define TASK_SIZE_USER64 TASK_SIZE_64TB +#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB + +/* + * We don't need to allocate extended context ids for 4K page size, because we + * limit the max effective address on this config to 64TB. + */ +#define TASK_CONTEXT_SIZE TASK_SIZE_64TB +#endif + +/* + * 32-bit user address space is 4GB - 1 page + * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT + */ +#define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) + +#define TASK_SIZE_OF(tsk) \ + (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \ + TASK_SIZE_USER64) + +#define TASK_SIZE TASK_SIZE_OF(current) + +#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) +#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) + +/* + * This decides where the kernel will search for a free chunk of vm space during + * mmap's. + */ +#define TASK_UNMAPPED_BASE \ + ((is_32bit_task()) ? TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64) + +/* + * Initial task size value for user applications. For book3s 64 we start + * with 128TB and conditionally enable upto 512TB + */ +#ifdef CONFIG_PPC_BOOK3S_64 +#define DEFAULT_MAP_WINDOW \ + ((is_32bit_task()) ? TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64) +#else +#define DEFAULT_MAP_WINDOW TASK_SIZE +#endif + +#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64 +#define STACK_TOP_USER32 TASK_SIZE_USER32 +#define STACK_TOP_MAX TASK_SIZE_USER64 +#define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64) + +#endif /* _ASM_POWERPC_TASK_SIZE_64_H */ diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index e3f738eb1cac..64b5011475c7 100644 --- a/arch/powerpc/kvm/book3s_hv_hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -24,6 +24,7 @@ #include #include #include +#include void wait_for_subcore_guest_exit(void) { -- cgit v1.2.3 From 1e35f29c6b2eba72521d6f3c38f9c86f331cfd0a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Jan 2019 23:17:56 +1100 Subject: powerpc: call_do_[soft]irq() takes a pointer to the stack The purpose of the pointer given to call_do_softirq() and call_do_irq() is to point the new stack. Currently that's the same thing as the thread_info, but won't be with THREAD_INFO_IN_TASK. So change the parameter to void* and rename it 'sp'. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/irq.h | 4 ++-- arch/powerpc/kernel/misc_32.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index ee39ce56b2a2..2efbae8d93be 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -63,8 +63,8 @@ extern struct thread_info *hardirq_ctx[NR_CPUS]; extern struct thread_info *softirq_ctx[NR_CPUS]; extern void irq_ctx_init(void); -extern void call_do_softirq(struct thread_info *tp); -extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); +void call_do_softirq(void *sp); +void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 02b8cdd73792..242f0c88010e 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -60,7 +60,7 @@ _GLOBAL(call_do_softirq) blr /* - * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); + * void call_do_irq(struct pt_regs *regs, void *sp); */ _GLOBAL(call_do_irq) mflr r0 -- cgit v1.2.3 From 4e67bfd7aa21b4b737a43df627956dba9c742983 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Jan 2019 23:25:53 +1100 Subject: powerpc: Update comments in preparation for THREAD_INFO_IN_TASK Update a few comments that talk about current_thread_info() in preparation for THREAD_INFO_IN_TASK. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 2 +- arch/powerpc/kernel/head_32.S | 2 +- arch/powerpc/kernel/head_44x.S | 2 +- arch/powerpc/kernel/head_fsl_booke.S | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c25880e6a16a..c5b2aff0ce8e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1062,7 +1062,7 @@ * - SPRG9 debug exception scratch * * All 32-bit: - * - SPRG3 current thread_info pointer + * - SPRG3 current thread_struct physical addr pointer * (virtual on BookE, physical on others) * * 32-bit classic: diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e07cfd5756d9..2112805ef1d1 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -831,7 +831,7 @@ __secondary_start: bl init_idle_6xx #endif /* CONFIG_PPC_BOOK3S_32 */ - /* get current_thread_info and current */ + /* get current's stack and current */ lis r1,secondary_ti@ha tophys(r1,r1) lwz r1,secondary_ti@l(r1) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index bf23c19c92d6..4e8c8bf50413 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1019,7 +1019,7 @@ _GLOBAL(start_secondary_47x) /* Now we can get our task struct and real stack pointer */ - /* Get current_thread_info and current */ + /* Get current's stack and current */ lis r1,secondary_ti@ha lwz r1,secondary_ti@l(r1) lwz r2,TI_TASK(r1) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 42d8d6fc00cb..6301bb24889a 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1091,7 +1091,7 @@ __secondary_start: mr r4,r24 /* Why? */ bl call_setup_cpu - /* get current_thread_info and current */ + /* get current's stack and current */ lis r1,secondary_ti@ha lwz r1,secondary_ti@l(r1) lwz r2,TI_TASK(r1) -- cgit v1.2.3 From 5497c2536f09e733bb68362ffeba147203295ae2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Jan 2019 23:27:40 +1100 Subject: powerpc: Use sizeof(struct thread_info) in INIT_SP_LIMIT Currently INIT_SP_LIMIT uses sizeof(init_thread_info), but that symbol won't exist when we enable THREAD_INFO_IN_TASK. So just use the sizeof the type which is the same value but will continue to work. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2edab34ee288..c406ec3b4b3c 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -271,7 +271,7 @@ struct thread_struct { #define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack) #define INIT_SP_LIMIT \ - (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack) + (_ALIGN_UP(sizeof(struct thread_info), 16) + (unsigned long)&init_stack) #ifdef CONFIG_SPE #define SPEFSCR_INIT \ -- cgit v1.2.3 From 3733304048feb9bdfc3daff02ca4da8cfc9c4352 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Jan 2019 23:27:28 +1100 Subject: powerpc: Use linux/thread_info.h in processor.h When we enable THREAD_INFO_IN_TASK we will remove our definition of current_thread_info(). Instead it will come from linux/thread_info.h So switch processor.h to include the latter, so that it can continue to find current_thread_info(). Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Split out of larger patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c406ec3b4b3c..2c740042b8d3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -40,7 +40,7 @@ #ifndef __ASSEMBLY__ #include -#include +#include #include #include -- cgit v1.2.3 From b72cc2e7aea1e42a82358bdc6c41dfaf7a5fa742 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Jan 2019 18:40:34 +1100 Subject: powerpc: Use task_stack_page() in current_pt_regs() Change current_pt_regs() to use task_stack_page() rather than current_thread_info() so that it keeps working once we enable THREAD_INFO_IN_TASK. Signed-off-by: Christophe Leroy [mpe: Split out of large patch] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 0b8a735b6d85..64271e562fed 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -157,7 +157,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); #define current_pt_regs() \ - ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1) + ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) /* * We use the least-significant bit of the trap field to indicate * whether we have saved the full set of registers, or only a -- cgit v1.2.3 From ed1cd6deb013a11959d17a94e35ce159197632da Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Jan 2019 10:08:58 +0000 Subject: powerpc: Activate CONFIG_THREAD_INFO_IN_TASK This patch activates CONFIG_THREAD_INFO_IN_TASK which moves the thread_info into task_struct. Moving thread_info into task_struct has the following advantages: - It protects thread_info from corruption in the case of stack overflows. - Its address is harder to determine if stack addresses are leaked, making a number of attacks more difficult. This has the following consequences: - thread_info is now located at the beginning of task_struct. - The 'cpu' field is now in task_struct, and only exists when CONFIG_SMP is active. - thread_info doesn't have anymore the 'task' field. This patch: - Removes all recopy of thread_info struct when the stack changes. - Changes the CURRENT_THREAD_INFO() macro to point to current. - Selects CONFIG_THREAD_INFO_IN_TASK. - Modifies raw_smp_processor_id() to get ->cpu from current without including linux/sched.h to avoid circular inclusion and without including asm/asm-offsets.h to avoid symbol names duplication between ASM constants and C constants. - Modifies klp_init_thread_info() to take a task_struct pointer argument. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin [mpe: Add task_stack.h to livepatch.h to fix build fails] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 7 +++ arch/powerpc/include/asm/irq.h | 4 -- arch/powerpc/include/asm/livepatch.h | 7 +-- arch/powerpc/include/asm/smp.h | 17 +++++++- arch/powerpc/include/asm/thread_info.h | 17 +------- arch/powerpc/kernel/asm-offsets.c | 7 ++- arch/powerpc/kernel/entry_32.S | 9 ++-- arch/powerpc/kernel/exceptions-64e.S | 11 ----- arch/powerpc/kernel/head_32.S | 6 +-- arch/powerpc/kernel/head_44x.S | 4 +- arch/powerpc/kernel/head_booke.h | 8 +--- arch/powerpc/kernel/head_fsl_booke.S | 7 ++- arch/powerpc/kernel/irq.c | 79 +--------------------------------- arch/powerpc/kernel/kgdb.c | 28 ------------ arch/powerpc/kernel/machine_kexec_64.c | 6 +-- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/kernel/setup_64.c | 21 --------- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/net/bpf_jit32.h | 5 +-- 21 files changed, 56 insertions(+), 194 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5b7945a7bd41..652c25260838 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -238,6 +238,7 @@ config PPC select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK select VIRT_TO_BUS if !PPC64 # # Please keep this list sorted alphabetically. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ac033341ed55..53ffe935f3b0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -427,6 +427,13 @@ else endif endif +ifdef CONFIG_SMP +prepare: task_cpu_prepare + +task_cpu_prepare: prepare0 + $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TI_CPU") print $$3;}' include/generated/asm-offsets.h)) +endif + # Check toolchain versions: # - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 2efbae8d93be..28a7ace0a1b9 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -51,9 +51,6 @@ struct pt_regs; extern struct thread_info *critirq_ctx[NR_CPUS]; extern struct thread_info *dbgirq_ctx[NR_CPUS]; extern struct thread_info *mcheckirq_ctx[NR_CPUS]; -extern void exc_lvl_ctx_init(void); -#else -#define exc_lvl_ctx_init() #endif /* @@ -62,7 +59,6 @@ extern void exc_lvl_ctx_init(void); extern struct thread_info *hardirq_ctx[NR_CPUS]; extern struct thread_info *softirq_ctx[NR_CPUS]; -extern void irq_ctx_init(void); void call_do_softirq(void *sp); void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 47a03b9b528b..5070df19d463 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -21,6 +21,7 @@ #include #include +#include #ifdef CONFIG_LIVEPATCH static inline int klp_check_compiler_support(void) @@ -43,13 +44,13 @@ static inline unsigned long klp_get_ftrace_location(unsigned long faddr) return ftrace_location_range(faddr, faddr + 16); } -static inline void klp_init_thread_info(struct thread_info *ti) +static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ - ti->livepatch_sp = (unsigned long *)(ti + 1) + 1; + task_thread_info(p)->livepatch_sp = end_of_stack(p) + 1; } #else -static void klp_init_thread_info(struct thread_info *ti) { } +static inline void klp_init_thread_info(struct task_struct *p) { } #endif /* CONFIG_LIVEPATCH */ #endif /* _ASM_POWERPC_LIVEPATCH_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 41695745032c..0de717e16dd6 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -83,7 +83,22 @@ int is_cpu_dead(unsigned int cpu); /* 32-bit */ extern int smp_hw_index[]; -#define raw_smp_processor_id() (current_thread_info()->cpu) +/* + * This is particularly ugly: it appears we can't actually get the definition + * of task_struct here, but we need access to the CPU this task is running on. + * Instead of using task_struct we're using _TASK_CPU which is extracted from + * asm-offsets.h by kbuild to get the current processor ID. + * + * This also needs to be safeguarded when building asm-offsets.s because at + * that time _TASK_CPU is not defined yet. It could have been guarded by + * _TASK_CPU itself, but we want the build to fail if _TASK_CPU is missing + * when building something else than asm-offsets.s + */ +#ifdef GENERATING_ASM_OFFSETS +#define raw_smp_processor_id() (0) +#else +#define raw_smp_processor_id() (*(unsigned int *)((void *)current + _TASK_CPU)) +#endif #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) static inline int get_hard_smp_processor_id(int cpu) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 544cac0474cb..d91523c2c7d8 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -18,9 +18,9 @@ #define THREAD_SIZE (1 << THREAD_SHIFT) #ifdef CONFIG_PPC64 -#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(clrrdi dest, sp, THREAD_SHIFT) +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(ld dest, PACACURRENT(r13)) #else -#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT) +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(mr dest, r2) #endif #ifndef __ASSEMBLY__ @@ -34,8 +34,6 @@ * low level task data. */ struct thread_info { - struct task_struct *task; /* main task structure */ - int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ unsigned long local_flags; /* private flags for thread */ @@ -58,8 +56,6 @@ struct thread_info { */ #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ - .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .flags = 0, \ } @@ -67,15 +63,6 @@ struct thread_info { #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - unsigned long val; - - asm (CURRENT_THREAD_INFO(%0,1) : "=r" (val)); - - return (struct thread_info *)val; -} - extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ca55027f47a4..ca3fb836cbb9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -13,6 +13,8 @@ * 2 of the License, or (at your option) any later version. */ +#define GENERATING_ASM_OFFSETS /* asm/smp.h */ + #include #include #include @@ -97,6 +99,9 @@ int main(void) #endif #endif /* CONFIG_PPC64 */ OFFSET(TASK_STACK, task_struct, stack); +#ifdef CONFIG_SMP + OFFSET(TI_CPU, task_struct, cpu); +#endif #ifdef CONFIG_LIVEPATCH OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); @@ -164,8 +169,6 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); OFFSET(TI_PREEMPT, thread_info, preempt_count); - OFFSET(TI_TASK, thread_info, task); - OFFSET(TI_CPU, thread_info, cpu); #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 063100df8325..f3618353c1c4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1165,10 +1165,6 @@ ret_from_debug_exc: mfspr r9,SPRN_SPRG_THREAD lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) - lwz r9,TASK_STACK-THREAD(r9) - CURRENT_THREAD_INFO(r10, r1) - lwz r10,TI_PREEMPT(r10) - stw r10,TI_PREEMPT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; @@ -1291,10 +1287,13 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_601) lwz r3,_TRAP(r1) andi. r0,r3,1 - beq 4f + beq 5f SAVE_NVGPRS(r1) rlwinm r3,r3,0,0,30 stw r3,_TRAP(r1) +5: mfspr r2,SPRN_SPRG_THREAD + addi r2,r2,-THREAD + tovirt(r2,r2) /* set back r2 to current */ 4: addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception /* shouldn't return */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index afb638778f44..20f14996281d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -77,17 +77,6 @@ special_reg_save: andi. r3,r3,MSR_PR bnelr - /* Copy info into temporary exception thread info */ - ld r11,PACAKSAVE(r13) - CURRENT_THREAD_INFO(r11, r11) - CURRENT_THREAD_INFO(r12, r1) - ld r10,TI_FLAGS(r11) - std r10,TI_FLAGS(r12) - ld r10,TI_PREEMPT(r11) - std r10,TI_PREEMPT(r12) - ld r10,TI_TASK(r11) - std r10,TI_TASK(r12) - /* * Advance to the next TLB exception frame for handler * types that don't do it automatically. diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 2112805ef1d1..888fcff3f8cc 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -834,9 +834,9 @@ __secondary_start: /* get current's stack and current */ lis r1,secondary_ti@ha tophys(r1,r1) - lwz r1,secondary_ti@l(r1) - tophys(r2,r1) - lwz r2,TI_TASK(r2) + lwz r2,secondary_ti@l(r1) + tophys(r1,r2) + lwz r1,TASK_STACK(r1) /* stack */ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 4e8c8bf50413..f94a93b6c2f2 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1021,8 +1021,8 @@ _GLOBAL(start_secondary_47x) /* Get current's stack and current */ lis r1,secondary_ti@ha - lwz r1,secondary_ti@l(r1) - lwz r2,TI_TASK(r1) + lwz r2,secondary_ti@l(r1) + lwz r1,TASK_STACK(r2) /* Current stack pointer */ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 69e80e6d0d16..1b22a8dea399 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -155,13 +155,7 @@ END_BTB_FLUSH_SECTION stw r10,GPR11(r11); \ b 2f; \ /* COMING FROM PRIV MODE */ \ -1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \ - lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \ - stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \ - stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \ - lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \ - stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \ - mr r11,r8; \ +1: mr r11, r8; \ 2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \ stw r12,GPR12(r11); /* save various registers */\ mflr r10; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 6301bb24889a..11f38adbe020 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -719,8 +719,7 @@ finish_tlb_load: /* Get the next_tlbcam_idx percpu var */ #ifdef CONFIG_SMP - lwz r12, TASK_STACK-THREAD(r12) - lwz r15, TI_CPU(r12) + lwz r15, TI_CPU-THREAD(r12) lis r14, __per_cpu_offset@h ori r14, r14, __per_cpu_offset@l rlwinm r15, r15, 2, 0, 29 @@ -1093,8 +1092,8 @@ __secondary_start: /* get current's stack and current */ lis r1,secondary_ti@ha - lwz r1,secondary_ti@l(r1) - lwz r2,TI_TASK(r1) + lwz r2,secondary_ti@l(r1) + lwz r1,TASK_STACK(r2) /* stack */ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 531e9ef153c0..85c48911938a 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -673,24 +673,9 @@ void do_IRQ(struct pt_regs *regs) set_irq_regs(old_regs); return; } - - /* Prepare the thread_info in the irq stack */ - irqtp->task = curtp->task; - irqtp->flags = 0; - - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqtp->preempt_count = curtp->preempt_count; - /* Switch stack and call */ call_do_irq(regs, irqtp); - /* Restore stack limit */ - irqtp->task = NULL; - - /* Copy back updates to the thread_info */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); - set_irq_regs(old_regs); } @@ -698,85 +683,23 @@ void __init init_IRQ(void) { if (ppc_md.init_IRQ) ppc_md.init_IRQ(); - - exc_lvl_ctx_init(); - - irq_ctx_init(); } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; - -void exc_lvl_ctx_init(void) -{ - struct thread_info *tp; - int i, cpu_nr; - - for_each_possible_cpu(i) { -#ifdef CONFIG_PPC64 - cpu_nr = i; -#else -#ifdef CONFIG_SMP - cpu_nr = get_hard_smp_processor_id(i); -#else - cpu_nr = 0; -#endif -#endif - - tp = critirq_ctx[cpu_nr]; - tp->cpu = cpu_nr; - tp->preempt_count = 0; - -#ifdef CONFIG_BOOKE - tp = dbgirq_ctx[cpu_nr]; - tp->cpu = cpu_nr; - tp->preempt_count = 0; - - tp = mcheckirq_ctx[cpu_nr]; - tp->cpu = cpu_nr; - tp->preempt_count = HARDIRQ_OFFSET; -#endif - } -} #endif struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; -void irq_ctx_init(void) -{ - struct thread_info *tp; - int i; - - for_each_possible_cpu(i) { - tp = softirq_ctx[i]; - tp->cpu = i; - klp_init_thread_info(tp); - - tp = hardirq_ctx[i]; - tp->cpu = i; - klp_init_thread_info(tp); - } -} - void do_softirq_own_stack(void) { - struct thread_info *curtp, *irqtp; + struct thread_info *irqtp; - curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; - irqtp->task = curtp->task; - irqtp->flags = 0; call_do_softirq(irqtp); - irqtp->task = NULL; - - /* Set any flag that may have been set on the - * alternate stack - */ - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); } irq_hw_number_t virq_to_hw(unsigned int virq) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e1865565f0ae..7dd55eb1259d 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -151,41 +151,13 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 1; } -static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info); static int kgdb_singlestep(struct pt_regs *regs) { - struct thread_info *thread_info, *exception_thread_info; - struct thread_info *backup_current_thread_info = - this_cpu_ptr(&kgdb_thread_info); - if (user_mode(regs)) return 0; - /* - * On Book E and perhaps other processors, singlestep is handled on - * the critical exception stack. This causes current_thread_info() - * to fail, since it it locates the thread_info by masking off - * the low bits of the current stack pointer. We work around - * this issue by copying the thread_info from the kernel stack - * before calling kgdb_handle_exception, and copying it back - * afterwards. On most processors the copy is avoided since - * exception_thread_info == thread_info. - */ - thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); - exception_thread_info = current_thread_info(); - - if (thread_info != exception_thread_info) { - /* Save the original current_thread_info. */ - memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info); - memcpy(exception_thread_info, thread_info, sizeof *thread_info); - } - kgdb_handle_exception(0, SIGTRAP, 0, regs); - if (thread_info != exception_thread_info) - /* Restore current_thread_info lastly. */ - memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); - return 1; } diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a0f6f45005bd..75692c327ba0 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -317,10 +317,8 @@ void default_machine_kexec(struct kimage *image) * We setup preempt_count to avoid using VMX in memcpy. * XXX: the task struct will likely be invalid once we do the copy! */ - kexec_stack.thread_info.task = current_thread_info()->task; - kexec_stack.thread_info.flags = 0; - kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET; - kexec_stack.thread_info.cpu = current_thread_info()->cpu; + current_thread_info()->flags = 0; + current_thread_info()->preempt_count = HARDIRQ_OFFSET; /* We need a static PACA, too; copy this CPU's PACA over and switch to * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dc2aaaf75c87..fd07711035bd 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1634,7 +1634,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; struct thread_info *ti = task_thread_info(p); - klp_init_thread_info(ti); + klp_init_thread_info(p); /* Copy registers */ sp -= sizeof(struct pt_regs); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9a6a0859c1ef..e7534f306c8e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -937,7 +937,7 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); - klp_init_thread_info(&init_thread_info); + klp_init_thread_info(&init_task); init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 080dd515d587..0912948a8ea6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -689,24 +689,6 @@ void __init exc_lvl_early_init(void) } #endif -/* - * Emergency stacks are used for a range of things, from asynchronous - * NMIs (system reset, machine check) to synchronous, process context. - * We set preempt_count to zero, even though that isn't necessarily correct. To - * get the right value we'd need to copy it from the previous thread_info, but - * doing that might fault causing more problems. - * TODO: what to do with accounting? - */ -static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) -{ - ti->task = NULL; - ti->cpu = cpu; - ti->preempt_count = 0; - ti->local_flags = 0; - ti->flags = 0; - klp_init_thread_info(ti); -} - /* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency @@ -737,18 +719,15 @@ void __init emergency_stack_init(void) struct thread_info *ti; ti = alloc_stack(limit, i); - emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = alloc_stack(limit, i); - emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = alloc_stack(limit, i); - emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 829ef5411b50..96c25a89e877 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -988,7 +988,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif - ti->cpu = cpu; + idle->cpu = cpu; secondary_ti = current_set[cpu] = ti; } diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 6f4daacad296..dc50a8d4b3b9 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -106,9 +106,8 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); } while (0) #else #define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); \ - PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)), \ - offsetof(struct thread_info, cpu)); \ + do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \ + PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ } while(0) #endif #else -- cgit v1.2.3 From a7916a1de526162d73e894b6d3ebd895d4302078 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Jan 2019 10:09:00 +0000 Subject: powerpc: regain entire stack space thread_info is not anymore in the stack, so the entire stack can now be used. There is also no risk anymore of corrupting task_cpu(p) with a stack overflow so the patch removes the test. When doing this, an explicit test for NULL stack pointer is needed in validate_sp() as it is not anymore implicitely covered by the sizeof(thread_info) gap. In the meantime, with the previous patch all pointers to the stacks are not anymore pointers to thread_info so this patch changes them to void* Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/irq.h | 10 +++++----- arch/powerpc/include/asm/processor.h | 3 +-- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/entry_32.S | 14 ++++---------- arch/powerpc/kernel/irq.c | 19 +++++++++---------- arch/powerpc/kernel/misc_32.S | 6 ++---- arch/powerpc/kernel/process.c | 32 +++++++++++++------------------- arch/powerpc/kernel/setup_64.c | 8 ++++---- 8 files changed, 38 insertions(+), 55 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 28a7ace0a1b9..c91a60cda4fa 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -48,16 +48,16 @@ struct pt_regs; * Per-cpu stacks for handling critical, debug and machine check * level interrupts. */ -extern struct thread_info *critirq_ctx[NR_CPUS]; -extern struct thread_info *dbgirq_ctx[NR_CPUS]; -extern struct thread_info *mcheckirq_ctx[NR_CPUS]; +extern void *critirq_ctx[NR_CPUS]; +extern void *dbgirq_ctx[NR_CPUS]; +extern void *mcheckirq_ctx[NR_CPUS]; #endif /* * Per-cpu stacks for handling hard and soft interrupts. */ -extern struct thread_info *hardirq_ctx[NR_CPUS]; -extern struct thread_info *softirq_ctx[NR_CPUS]; +extern void *hardirq_ctx[NR_CPUS]; +extern void *softirq_ctx[NR_CPUS]; void call_do_softirq(void *sp); void call_do_irq(struct pt_regs *regs, void *sp); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2c740042b8d3..3351bcf42f2d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -270,8 +270,7 @@ struct thread_struct { #define ARCH_MIN_TASKALIGN 16 #define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack) -#define INIT_SP_LIMIT \ - (_ALIGN_UP(sizeof(struct thread_info), 16) + (unsigned long)&init_stack) +#define INIT_SP_LIMIT ((unsigned long)&init_stack) #ifdef CONFIG_SPE #define SPEFSCR_INIT \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ca3fb836cbb9..1ad0cbcc5f13 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -92,7 +92,6 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); #else - DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f3618353c1c4..424e7265e790 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -97,14 +97,11 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,_SRR1(r11) - /* set the stack limit to the current stack - * and set the limit to protect the thread_info - * struct - */ + /* set the stack limit to the current stack */ mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,SAVED_KSP_LIMIT(r11) - rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ #endif @@ -121,14 +118,11 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,crit_srr1@l(0) - /* set the stack limit to the current stack - * and set the limit to protect the thread_info - * struct - */ + /* set the stack limit to the current stack */ mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,saved_ksp_limit@l(0) - rlwimi r0,r1,0,0,(31-THREAD_SHIFT) + rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) stw r0,KSP_LIMIT(r8) /* fall through */ #endif diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 85c48911938a..938944c6e2ee 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -618,9 +618,8 @@ static inline void check_stack_overflow(void) sp = current_stack_pointer() & (THREAD_SIZE-1); /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - pr_err("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); + if (unlikely(sp < 2048)) { + pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } #endif @@ -660,7 +659,7 @@ void __do_irq(struct pt_regs *regs) void do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct thread_info *curtp, *irqtp, *sirqtp; + void *curtp, *irqtp, *sirqtp; /* Switch to the irq stack to handle this */ curtp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); @@ -686,17 +685,17 @@ void __init init_IRQ(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -struct thread_info *critirq_ctx[NR_CPUS] __read_mostly; -struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly; -struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; +void *critirq_ctx[NR_CPUS] __read_mostly; +void *dbgirq_ctx[NR_CPUS] __read_mostly; +void *mcheckirq_ctx[NR_CPUS] __read_mostly; #endif -struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; -struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; +void *softirq_ctx[NR_CPUS] __read_mostly; +void *hardirq_ctx[NR_CPUS] __read_mostly; void do_softirq_own_stack(void) { - struct thread_info *irqtp; + void *irqtp; irqtp = softirq_ctx[smp_processor_id()]; call_do_softirq(irqtp); diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index b37b50fde828..6f6127c3760c 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -46,11 +46,10 @@ _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) lwz r10,THREAD+KSP_LIMIT(r2) - addi r11,r3,THREAD_INFO_GAP + stw r3, THREAD+KSP_LIMIT(r2) stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 stw r10,8(r1) - stw r11,THREAD+KSP_LIMIT(r2) bl __do_softirq lwz r10,8(r1) lwz r1,0(r1) @@ -66,11 +65,10 @@ _GLOBAL(call_do_irq) mflr r0 stw r0,4(r1) lwz r10,THREAD+KSP_LIMIT(r2) - addi r11,r4,THREAD_INFO_GAP + stw r4, THREAD+KSP_LIMIT(r2) stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) mr r1,r4 stw r10,8(r1) - stw r11,THREAD+KSP_LIMIT(r2) bl __do_irq lwz r10,8(r1) lwz r1,0(r1) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index fd07711035bd..dd9e0d5386ee 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1691,8 +1691,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; #ifdef CONFIG_PPC32 - p->thread.ksp_limit = (unsigned long)task_stack_page(p) + - _ALIGN_UP(sizeof(struct thread_info), 16); + p->thread.ksp_limit = (unsigned long)end_of_stack(p); #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT p->thread.ptrace_bps[0] = NULL; @@ -1995,21 +1994,14 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); - /* - * Avoid crashing if the stack has overflowed and corrupted - * task_cpu(p), which is in the thread_info struct. - */ - if (cpu < NR_CPUS && cpu_possible(cpu)) { - stack_page = (unsigned long) hardirq_ctx[cpu]; - if (sp >= stack_page + sizeof(struct thread_struct) - && sp <= stack_page + THREAD_SIZE - nbytes) - return 1; - - stack_page = (unsigned long) softirq_ctx[cpu]; - if (sp >= stack_page + sizeof(struct thread_struct) - && sp <= stack_page + THREAD_SIZE - nbytes) - return 1; - } + stack_page = (unsigned long)hardirq_ctx[cpu]; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long)softirq_ctx[cpu]; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + return 0; } @@ -2018,8 +2010,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, { unsigned long stack_page = (unsigned long)task_stack_page(p); - if (sp >= stack_page + sizeof(struct thread_struct) - && sp <= stack_page + THREAD_SIZE - nbytes) + if (sp < THREAD_SIZE) + return 0; + + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; return valid_irq_stack(sp, p, nbytes); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0912948a8ea6..2db1c5f7d141 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -716,19 +716,19 @@ void __init emergency_stack_init(void) limit = min(ppc64_bolted_size(), ppc64_rma_size); for_each_possible_cpu(i) { - struct thread_info *ti; + void *ti; ti = alloc_stack(limit, i); - paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->emergency_sp = ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = alloc_stack(limit, i); - paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->nmi_emergency_sp = ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = alloc_stack(limit, i); - paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; + paca_ptrs[i]->mc_emergency_sp = ti + THREAD_SIZE; #endif } } -- cgit v1.2.3 From 7c19c2e5f9c18e364a306253065474e5f6ad960c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Jan 2019 10:09:02 +0000 Subject: powerpc: 'current_set' is now a table of task_struct pointers The table of pointers 'current_set' has been used for retrieving the stack and current. They used to be thread_info pointers as they were pointing to the stack and current was taken from the 'task' field of the thread_info. Now, the pointers of 'current_set' table are now both pointers to task_struct and pointers to thread_info. As they are used to get current, and the stack pointer is retrieved from current's stack field, this patch changes their type to task_struct, and renames secondary_ti to secondary_current. Reviewed-by: Nicholas Piggin Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 4 ++-- arch/powerpc/kernel/head_32.S | 6 +++--- arch/powerpc/kernel/head_44x.S | 4 ++-- arch/powerpc/kernel/head_fsl_booke.S | 4 ++-- arch/powerpc/kernel/smp.c | 10 ++++------ 5 files changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 1d911f68a23b..1484df6779ab 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -23,8 +23,8 @@ #include /* SMP */ -extern struct thread_info *current_set[NR_CPUS]; -extern struct thread_info *secondary_ti; +extern struct task_struct *current_set[NR_CPUS]; +extern struct task_struct *secondary_current; void start_secondary(void *unused); /* kexec */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 888fcff3f8cc..ce6a972f2584 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -832,9 +832,9 @@ __secondary_start: #endif /* CONFIG_PPC_BOOK3S_32 */ /* get current's stack and current */ - lis r1,secondary_ti@ha - tophys(r1,r1) - lwz r2,secondary_ti@l(r1) + lis r2,secondary_current@ha + tophys(r2,r2) + lwz r2,secondary_current@l(r2) tophys(r1,r2) lwz r1,TASK_STACK(r1) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f94a93b6c2f2..37117ab11584 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -1020,8 +1020,8 @@ _GLOBAL(start_secondary_47x) /* Now we can get our task struct and real stack pointer */ /* Get current's stack and current */ - lis r1,secondary_ti@ha - lwz r2,secondary_ti@l(r1) + lis r2,secondary_current@ha + lwz r2,secondary_current@l(r2) lwz r1,TASK_STACK(r2) /* Current stack pointer */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 11f38adbe020..4ed2a7c8e89b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1091,8 +1091,8 @@ __secondary_start: bl call_setup_cpu /* get current's stack and current */ - lis r1,secondary_ti@ha - lwz r2,secondary_ti@l(r1) + lis r2,secondary_current@ha + lwz r2,secondary_current@l(r2) lwz r1,TASK_STACK(r2) /* stack */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 96c25a89e877..e784342bdaa1 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,7 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif -struct thread_info *secondary_ti; +struct task_struct *secondary_current; bool has_big_cores; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); @@ -631,7 +631,7 @@ void smp_send_stop(void) } #endif /* CONFIG_NMI_IPI */ -struct thread_info *current_set[NR_CPUS]; +struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { @@ -896,7 +896,7 @@ void smp_prepare_boot_cpu(void) paca_ptrs[boot_cpuid]->__current = current; #endif set_numa_node(numa_cpu_lookup_table[boot_cpuid]); - current_set[boot_cpuid] = task_thread_info(current); + current_set[boot_cpuid] = current; } #ifdef CONFIG_HOTPLUG_CPU @@ -981,15 +981,13 @@ static bool secondaries_inhibited(void) static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) { - struct thread_info *ti = task_thread_info(idle); - #ifdef CONFIG_PPC64 paca_ptrs[cpu]->__current = idle; paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif idle->cpu = cpu; - secondary_ti = current_set[cpu] = ti; + secondary_current = current_set[cpu] = idle; } int __cpu_up(unsigned int cpu, struct task_struct *tidle) -- cgit v1.2.3 From f7354ccac844da7b1af8cc4f09da330fa3e960e4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 31 Jan 2019 10:09:04 +0000 Subject: powerpc/32: Remove CURRENT_THREAD_INFO and rename TI_CPU Now that thread_info is similar to task_struct, its address is in r2 so CURRENT_THREAD_INFO() macro is useless. This patch removes it. This patch also moves the 'tovirt(r2, r2)' down just before the reactivation of MMU translation, so that we keep the physical address of 'current' in r2 until then. It avoids a few calls to tophys(). At the same time, as the 'cpu' field is not anymore in thread_info, TI_CPU is renamed TASK_CPU by this patch. It also allows to get rid of a couple of '#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE' as ACCOUNT_CPU_USER_ENTRY() and ACCOUNT_CPU_USER_EXIT() are empty when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not defined. Signed-off-by: Christophe Leroy [mpe: Fix a missed conversion of TI_CPU idle_6xx.S] Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 2 +- arch/powerpc/include/asm/thread_info.h | 2 -- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_32.S | 55 +++++++++++----------------------- arch/powerpc/kernel/epapr_hcalls.S | 5 ++-- arch/powerpc/kernel/head_fsl_booke.S | 5 ++-- arch/powerpc/kernel/idle_6xx.S | 9 ++---- arch/powerpc/kernel/idle_e500.S | 8 ++--- arch/powerpc/kernel/misc_32.S | 3 +- arch/powerpc/mm/hash_low_32.S | 13 +++----- arch/powerpc/sysdev/6xx-suspend.S | 5 ++-- 11 files changed, 37 insertions(+), 72 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 53ffe935f3b0..7de49889bd5d 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -431,7 +431,7 @@ ifdef CONFIG_SMP prepare: task_cpu_prepare task_cpu_prepare: prepare0 - $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TI_CPU") print $$3;}' include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) endif # Check toolchain versions: diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d91523c2c7d8..c959b8d66cac 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -19,8 +19,6 @@ #ifdef CONFIG_PPC64 #define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(ld dest, PACACURRENT(r13)) -#else -#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(mr dest, r2) #endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1ad0cbcc5f13..8b688b19776a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -99,7 +99,7 @@ int main(void) #endif /* CONFIG_PPC64 */ OFFSET(TASK_STACK, task_struct, stack); #ifdef CONFIG_SMP - OFFSET(TI_CPU, task_struct, cpu); + OFFSET(TASK_CPU, task_struct, cpu); #endif #ifdef CONFIG_LIVEPATCH diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 424e7265e790..96dce6a4b61e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -151,7 +151,6 @@ transfer_to_handler: stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD addi r2,r12,-THREAD - tovirt(r2,r2) /* set r2 to current */ beq 2f /* if from user, fix up THREAD.regs */ addi r11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) @@ -161,11 +160,7 @@ transfer_to_handler: lwz r12,THREAD_DBCR0(r12) andis. r12,r12,DBCR0_IDM@h #endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - CURRENT_THREAD_INFO(r9, r1) - tophys(r9, r9) - ACCOUNT_CPU_USER_ENTRY(r9, r11, r12) -#endif + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ @@ -175,8 +170,7 @@ transfer_to_handler: tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r9, r1) - lwz r9,TI_CPU(r9) + lwz r9,TASK_CPU(r2) slwi r9,r9,3 add r11,r11,r9 #endif @@ -197,9 +191,7 @@ transfer_to_handler: ble- stack_ovf /* then the kernel stack overflowed */ 5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) - CURRENT_THREAD_INFO(r9, r1) - tophys(r9,r9) /* check local flags */ - lwz r12,TI_LOCAL_FLAGS(r9) + lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f @@ -208,6 +200,7 @@ transfer_to_handler: transfer_to_handler_cont: 3: mflr r9 + tovirt(r2, r2) /* set r2 to current */ lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) @@ -271,11 +264,11 @@ reenable_mmu: /* re-enable mmu so we can */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING - stw r12,TI_LOCAL_FLAGS(r9) + stw r12,TI_LOCAL_FLAGS(r2) b power_save_ppc32_restore 7: rlwinm r12,r12,0,~_TLF_SLEEPING - stw r12,TI_LOCAL_FLAGS(r9) + stw r12,TI_LOCAL_FLAGS(r2) lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ @@ -347,8 +340,7 @@ _GLOBAL(DoSyscall) mtmsr r11 1: #endif /* CONFIG_TRACE_IRQFLAGS */ - CURRENT_THREAD_INFO(r10, r1) - lwz r11,TI_FLAGS(r10) + lwz r11,TI_FLAGS(r2) andi. r11,r11,_TIF_SYSCALL_DOTRACE bne- syscall_dotrace syscall_dotrace_cont: @@ -381,13 +373,12 @@ ret_from_syscall: lwz r3,GPR3(r1) #endif mr r6,r3 - CURRENT_THREAD_INFO(r12, r1) /* disable interrupts so current_thread_info()->flags can't change */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ SYNC MTMSRD(r10) - lwz r9,TI_FLAGS(r12) + lwz r9,TI_FLAGS(r2) li r8,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work @@ -434,8 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE andi. r4,r8,MSR_PR beq 3f - CURRENT_THREAD_INFO(r4, r1) - ACCOUNT_CPU_USER_EXIT(r4, r5, r7) + ACCOUNT_CPU_USER_EXIT(r2, r5, r7) 3: #endif lwz r4,_LINK(r1) @@ -528,7 +518,7 @@ syscall_exit_work: /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK - addi r12,r12,TI_FLAGS + addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 #ifdef CONFIG_IBM405_ERR77 @@ -536,7 +526,6 @@ syscall_exit_work: #endif stwcx. r8,0,r12 bne- 3b - subi r12,r12,TI_FLAGS 4: /* Anything which requires enabling interrupts? */ andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) @@ -815,8 +804,7 @@ ret_from_except: user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ - CURRENT_THREAD_INFO(r9, r1) - lwz r9,TI_FLAGS(r9) + lwz r9,TI_FLAGS(r2) andi. r0,r9,_TIF_USER_WORK_MASK bne do_work @@ -828,18 +816,14 @@ restore_user: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - CURRENT_THREAD_INFO(r9, r1) - ACCOUNT_CPU_USER_EXIT(r9, r10, r11) -#endif + ACCOUNT_CPU_USER_EXIT(r2, r10, r11) b restore /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - CURRENT_THREAD_INFO(r9, r1) - lwz r8,TI_FLAGS(r9) + lwz r8,TI_FLAGS(r2) andis. r0,r8,_TIF_EMULATE_STACK_STORE@h beq+ 1f @@ -865,7 +849,7 @@ resume_kernel: /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS + addi r5,r2,TI_FLAGS 0: lwarx r8,0,r5 andc r8,r8,r11 #ifdef CONFIG_IBM405_ERR77 @@ -877,7 +861,7 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* check current_thread_info->preempt_count */ - lwz r0,TI_PREEMPT(r9) + lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED @@ -893,8 +877,7 @@ resume_kernel: bl trace_hardirqs_off #endif 1: bl preempt_schedule_irq - CURRENT_THREAD_INFO(r9, r1) - lwz r3,TI_FLAGS(r9) + lwz r3,TI_FLAGS(r2) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b #ifdef CONFIG_TRACE_IRQFLAGS @@ -1190,8 +1173,7 @@ load_dbcr0: lis r11,global_dbcr0@ha addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r9, r1) - lwz r9,TI_CPU(r9) + lwz r9,TASK_CPU(r2) slwi r9,r9,3 add r11,r11,r9 #endif @@ -1231,8 +1213,7 @@ recheck: LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ - CURRENT_THREAD_INFO(r9, r1) - lwz r9,TI_FLAGS(r9) + lwz r9,TI_FLAGS(r2) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 52ca2471ee1a..d252f4663a23 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -21,10 +21,9 @@ #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ _GLOBAL(epapr_ev_idle) - CURRENT_THREAD_INFO(r3, r1) - PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */ + PPC_LL r4, TI_LOCAL_FLAGS(r2) /* set napping bit */ ori r4, r4,_TLF_NAPPING /* so when we take an exception */ - PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ wrteei 1 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 4ed2a7c8e89b..1881127682e9 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -244,8 +244,7 @@ set_ivor: stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r22, r1) - stw r24, TI_CPU(r22) + stw r24, TASK_CPU(r2) #endif bl early_init @@ -719,7 +718,7 @@ finish_tlb_load: /* Get the next_tlbcam_idx percpu var */ #ifdef CONFIG_SMP - lwz r15, TI_CPU-THREAD(r12) + lwz r15, TASK_CPU-THREAD(r12) lis r14, __per_cpu_offset@h ori r14, r14, __per_cpu_offset@l rlwinm r15, r15, 2, 0, 29 diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index d9b6e7e0b5e3..c5e7f5bb2e66 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -136,10 +136,9 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - CURRENT_THREAD_INFO(r9, r1) - lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ + lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ - stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ + stw r8,TI_LOCAL_FLAGS(r2) /* it will return to our caller */ mfmsr r7 ori r7,r7,MSR_EE oris r7,r7,MSR_POW@h @@ -159,9 +158,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r12, r1) - tophys(r12, r12) - lwz r11,TI_CPU(r12) /* get cpu number * 4 */ + lwz r11,TASK_CPU(r2) /* get cpu number * 4 */ slwi r11,r11,2 #else li r11,0 diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 583e55ac7d26..69dfcd2ca011 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -22,10 +22,9 @@ .text _GLOBAL(e500_idle) - CURRENT_THREAD_INFO(r3, r1) - lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ + lwz r4,TI_LOCAL_FLAGS(r2) /* set napping bit */ ori r4,r4,_TLF_NAPPING /* so when we take an exception */ - stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + stw r4,TI_LOCAL_FLAGS(r2) /* it will return to our caller */ #ifdef CONFIG_PPC_E500MC wrteei 1 @@ -88,8 +87,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r12, r1) - lwz r11,TI_CPU(r12) /* get cpu number * 4 */ + lwz r11,TASK_CPU(r2) /* get cpu number * 4 */ slwi r11,r11,2 #else li r11,0 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6f6127c3760c..0dda4f8e3d7a 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -183,8 +183,7 @@ _GLOBAL(low_choose_750fx_pll) #ifdef CONFIG_SMP /* Store new HID1 image */ - CURRENT_THREAD_INFO(r6, r1) - lwz r6,TI_CPU(r6) + lwz r6,TASK_CPU(r2) slwi r6,r6,2 #else li r6, 0 diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index d94fef524ef5..1f13494efb2b 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -183,8 +183,7 @@ _GLOBAL(add_hash_page) add r3,r3,r0 /* note create_hpte trims to 24 bits */ #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r8, r1) /* use cpu number to make tag */ - lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ + lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 #endif /* CONFIG_SMP */ @@ -540,9 +539,7 @@ _GLOBAL(flush_hash_pages) #ifdef CONFIG_SMP lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l - CURRENT_THREAD_INFO(r8, r1) - tophys(r8, r8) - lwz r8,TI_CPU(r8) + lwz r8,TASK_CPU(r2) oris r8,r8,9 10: lwarx r0,0,r9 cmpi 0,r0,0 @@ -637,8 +634,7 @@ EXPORT_SYMBOL(flush_hash_pages) */ _GLOBAL(_tlbie) #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r8, r1) - lwz r8,TI_CPU(r8) + lwz r8,TASK_CPU(r2) oris r8,r8,11 mfmsr r10 SYNC @@ -675,8 +671,7 @@ _GLOBAL(_tlbie) */ _GLOBAL(_tlbia) #if defined(CONFIG_SMP) - CURRENT_THREAD_INFO(r8, r1) - lwz r8,TI_CPU(r8) + lwz r8,TASK_CPU(r2) oris r8,r8,10 mfmsr r10 SYNC diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S index cf48e9cb2575..6c4aec25c4ba 100644 --- a/arch/powerpc/sysdev/6xx-suspend.S +++ b/arch/powerpc/sysdev/6xx-suspend.S @@ -29,10 +29,9 @@ _GLOBAL(mpc6xx_enter_standby) ori r5, r5, ret_from_standby@l mtlr r5 - CURRENT_THREAD_INFO(r5, r1) - lwz r6, TI_LOCAL_FLAGS(r5) + lwz r6, TI_LOCAL_FLAGS(r2) ori r6, r6, _TLF_SLEEPING - stw r6, TI_LOCAL_FLAGS(r5) + stw r6, TI_LOCAL_FLAGS(r2) mfmsr r5 ori r5, r5, MSR_EE -- cgit v1.2.3 From c911d2e128e8ab7e789a5488dcb63ae9fe130aca Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 12 Jan 2019 09:55:50 +0000 Subject: powerpc/64: Replace CURRENT_THREAD_INFO with PACA_THREAD_INFO Now that current_thread_info is located at the beginning of 'current' task struct, CURRENT_THREAD_INFO macro is not really needed any more. This patch replaces it by loads of the value at PACA_THREAD_INFO(r13). Signed-off-by: Christophe Leroy [mpe: Add PACA_THREAD_INFO rather than using PACACURRENT] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/include/asm/thread_info.h | 4 ---- arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kernel/entry_64.S | 10 +++++----- arch/powerpc/kernel/exceptions-64e.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kernel/idle_book3e.S | 2 +- arch/powerpc/kernel/idle_power4.S | 2 +- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 6 +++--- 9 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 3b4767ed3ec5..937bb630093f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -671,7 +671,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define RUNLATCH_ON \ BEGIN_FTR_SECTION \ - CURRENT_THREAD_INFO(r3, r1); \ + ld r3, PACA_THREAD_INFO(r13); \ ld r4,TI_LOCAL_FLAGS(r3); \ andi. r0,r4,_TLF_RUNLATCH; \ beql ppc64_runlatch_on_trampoline; \ @@ -721,7 +721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) #ifdef CONFIG_PPC_970_NAP #define FINISH_NAP \ BEGIN_FTR_SECTION \ - CURRENT_THREAD_INFO(r11, r1); \ + ld r11, PACA_THREAD_INFO(r13); \ ld r9,TI_LOCAL_FLAGS(r11); \ andi. r10,r9,_TLF_NAPPING; \ bnel power4_fixup_nap; \ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index c959b8d66cac..8e1d0195ac36 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -17,10 +17,6 @@ #define THREAD_SIZE (1 << THREAD_SHIFT) -#ifdef CONFIG_PPC64 -#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(ld dest, PACACURRENT(r13)) -#endif - #ifndef __ASSEMBLY__ #include #include diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8b688b19776a..86a61e5f8285 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -182,6 +182,8 @@ int main(void) OFFSET(PACAPROCSTART, paca_struct, cpu_start); OFFSET(PACAKSAVE, paca_struct, kstack); OFFSET(PACACURRENT, paca_struct, __current); + DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) + + offsetof(struct task_struct, thread_info)); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); OFFSET(PACAR1, paca_struct, saved_r1); OFFSET(PACATOC, paca_struct, kernel_toc); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 21f1cb4d464e..15c67d2c0534 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -166,7 +166,7 @@ system_call: /* label this so stack traces look sane */ li r10,IRQS_ENABLED std r10,SOFTE(r1) - CURRENT_THREAD_INFO(r11, r1) + ld r11, PACA_THREAD_INFO(r13) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_DOTRACE bne .Lsyscall_dotrace /* does not return */ @@ -213,7 +213,7 @@ system_call: /* label this so stack traces look sane */ ld r3,RESULT(r1) #endif - CURRENT_THREAD_INFO(r12, r1) + ld r12, PACA_THREAD_INFO(r13) ld r8,_MSR(r1) #ifdef CONFIG_PPC_BOOK3S @@ -346,7 +346,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Repopulate r9 and r10 for the syscall path */ addi r9,r1,STACK_FRAME_OVERHEAD - CURRENT_THREAD_INFO(r10, r1) + ld r10, PACA_THREAD_INFO(r13) ld r10,TI_FLAGS(r10) cmpldi r0,NR_syscalls @@ -740,7 +740,7 @@ _GLOBAL(ret_from_except_lite) mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ - CURRENT_THREAD_INFO(r9, r1) + ld r9, PACA_THREAD_INFO(r13) ld r3,_MSR(r1) #ifdef CONFIG_PPC_BOOK3E ld r10,PACACURRENT(r13) @@ -854,7 +854,7 @@ resume_kernel: 1: bl preempt_schedule_irq /* Re-test flags and eventually loop */ - CURRENT_THREAD_INFO(r9, r1) + ld r9, PACA_THREAD_INFO(r13) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_NEED_RESCHED bne 1b diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 20f14996281d..4549ce8d4637 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -493,7 +493,7 @@ exc_##n##_bad_stack: \ * interrupts happen before the wait instruction. */ #define CHECK_NAPPING() \ - CURRENT_THREAD_INFO(r11, r1); \ + ld r11, PACA_THREAD_INFO(r13); \ ld r10,TI_LOCAL_FLAGS(r11); \ andi. r9,r10,_TLF_NAPPING; \ beq+ 1f; \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9e253ce27e08..b179b8b5d3f0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1629,7 +1629,7 @@ do_hash_page: ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ - CURRENT_THREAD_INFO(r11, r1) + ld r11, PACA_THREAD_INFO(r13) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 4e0d94d02030..31e732c378ad 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -63,7 +63,7 @@ _GLOBAL(\name) 1: /* Let's set the _TLF_NAPPING flag so interrupts make us return * to the right spot */ - CURRENT_THREAD_INFO(r11, r1) + ld r11, PACACURRENT(r13) ld r10,TI_LOCAL_FLAGS(r11) ori r10,r10,_TLF_NAPPING std r10,TI_LOCAL_FLAGS(r11) diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index a09b3c7ca176..a2fdb0a34b75 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -68,7 +68,7 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - CURRENT_THREAD_INFO(r9, r1) + ld r9, PACA_THREAD_INFO(r13) ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 32476a6e4e9c..01b1224add49 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -229,7 +229,7 @@ ftrace_call: * - r0, r11 & r12 are free */ livepatch_handler: - CURRENT_THREAD_INFO(r12, r1) + ld r12, PACA_THREAD_INFO(r13) /* Allocate 3 x 8 bytes */ ld r11, TI_livepatch_sp(r12) @@ -256,7 +256,7 @@ livepatch_handler: * restore it. */ - CURRENT_THREAD_INFO(r12, r1) + ld r12, PACA_THREAD_INFO(r13) ld r11, TI_livepatch_sp(r12) @@ -273,7 +273,7 @@ livepatch_handler: ld r2, -24(r11) /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r1) + ld r12, PACA_THREAD_INFO(r13) subi r11, r11, 24 std r11, TI_livepatch_sp(r12) -- cgit v1.2.3 From 930d6288a26787d2e7f633705434171a506db9c5 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 22 Feb 2019 12:23:27 +0530 Subject: powerpc: sstep: Add support for maddhd, maddhdu, maddld instructions This adds emulation support for the following integer instructions: * Multiply-Add High Doubleword (maddhd) * Multiply-Add High Doubleword Unsigned (maddhdu) * Multiply-Add Low Doubleword (maddld) As suggested by Michael, this uses a raw .long for specifying the instruction word when using inline assembly to retain compatibility with older binutils. Signed-off-by: Sandipan Das Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 15 ++++++++++++++- arch/powerpc/lib/sstep.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 87b73aa56b53..2bc949414669 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -335,6 +335,9 @@ #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU 0x7c000016 #define PPC_INST_MULLI 0x1c000000 +#define PPC_INST_MADDHD 0x10000030 +#define PPC_INST_MADDHDU 0x10000031 +#define PPC_INST_MADDLD 0x10000033 #define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLWINM 0x54000000 @@ -377,6 +380,7 @@ /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) #define ___PPC_RB(b) (((b) & 0x1f) << 11) +#define ___PPC_RC(c) (((c) & 0x1f) << 6) #define ___PPC_RS(s) (((s) & 0x1f) << 21) #define ___PPC_RT(t) ___PPC_RS(t) #define ___PPC_R(r) (((r) & 0x1) << 16) @@ -396,7 +400,7 @@ #define __PPC_WS(w) (((w) & 0x1f) << 11) #define __PPC_SH(s) __PPC_WS(s) #define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4)) -#define __PPC_MB(s) (((s) & 0x1f) << 6) +#define __PPC_MB(s) ___PPC_RC(s) #define __PPC_ME(s) (((s) & 0x1f) << 1) #define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20)) #define __PPC_ME64(s) __PPC_MB64(s) @@ -438,6 +442,15 @@ #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b)) +#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \ + ___PPC_RT(t) | ___PPC_RA(a) | \ + ___PPC_RB(b) | ___PPC_RC(c)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) #define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index d81568f783e5..67e69ebd6c00 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1169,7 +1169,7 @@ static nokprobe_inline int trap_compare(long v1, long v2) int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, unsigned int instr) { - unsigned int opcode, ra, rb, rd, spr, u; + unsigned int opcode, ra, rb, rc, rd, spr, u; unsigned long int imm; unsigned long int val, val2; unsigned int mb, me, sh; @@ -1292,6 +1292,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, rd = (instr >> 21) & 0x1f; ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; + rc = (instr >> 6) & 0x1f; switch (opcode) { #ifdef __powerpc64__ @@ -1305,6 +1306,38 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto trap; return 1; +#ifdef __powerpc64__ + case 4: + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -1; + + switch (instr & 0x3f) { + case 48: /* maddhd */ + asm volatile(PPC_MADDHD(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + + case 49: /* maddhdu */ + asm volatile(PPC_MADDHDU(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + + case 51: /* maddld */ + asm volatile(PPC_MADDLD(%0, %1, %2, %3) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb]), "r" (regs->gpr[rc])); + goto compute_done; + } + + /* + * There are other instructions from ISA 3.0 with the same + * primary opcode which do not have emulation support yet. + */ + return -1; +#endif + case 7: /* mulli */ op->val = regs->gpr[ra] * (short) instr; goto compute_done; -- cgit v1.2.3 From ccd477028a202993b9ddca5d2404fdaca3b7a55c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 26 Feb 2019 18:51:07 +1000 Subject: powerpc/64s: Fix HV NMI vs HV interrupt recoverability test HV interrupts that use HSRR registers do not enter with MSR[RI] clear, but their entry code is not recoverable vs NMI, due to shared use of HSPRG1 as a scratch register to save r13. This means that a system reset or machine check that hits in HSRR interrupt entry can cause r13 to be silently corrupted. Fix this by marking NMIs non-recoverable if they land in HV interrupt ranges. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 8 ++++ arch/powerpc/include/asm/nmi.h | 2 + arch/powerpc/kernel/exceptions-64s.S | 8 ++++ arch/powerpc/kernel/mce.c | 3 ++ arch/powerpc/kernel/traps.c | 66 +++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 1484df6779ab..e01f31fb0865 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -51,6 +51,14 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); +/* Exceptions */ +#ifdef CONFIG_PPC_POWERNV +extern unsigned long real_trampolines_start; +extern unsigned long real_trampolines_end; +extern unsigned long virt_trampolines_start; +extern unsigned long virt_trampolines_end; +#endif + /* Traps */ long machine_check_early(struct pt_regs *regs); long hmi_exception_realmode(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index bd9ba8defd72..84b4cfe73edd 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -14,4 +14,6 @@ extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif +extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs); + #endif /* _ASM_NMI_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b179b8b5d3f0..76442af8c191 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -68,6 +68,14 @@ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) + +#ifdef CONFIG_PPC_POWERNV + .globl real_trampolines_start + .globl real_trampolines_end + .globl virt_trampolines_start + .globl virt_trampolines_end +#endif + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * Data area reserved for FWNMI option. diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index d501b48f287e..b5fec1f9751a 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -31,6 +31,7 @@ #include #include +#include static DEFINE_PER_CPU(int, mce_nest_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); @@ -490,6 +491,8 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; + hv_nmi_check_nonrecoverable(regs); + /* * See if platform is capable of handling machine check. */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b25bc8af7d38..eee8f843f3d6 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -369,6 +369,70 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) force_sig_fault(signr, code, (void __user *)addr, current); } +/* + * The interrupt architecture has a quirk in that the HV interrupts excluding + * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing + * that an interrupt handler must do is save off a GPR into a scratch register, + * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch. + * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing + * that it is non-reentrant, which leads to random data corruption. + * + * The solution is for NMI interrupts in HV mode to check if they originated + * from these critical HV interrupt regions. If so, then mark them not + * recoverable. + * + * An alternative would be for HV NMIs to use SPRG for scratch to avoid the + * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux + * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so + * that would work. However any other guest OS that may have the SPRG live + * and MSR[RI]=1 could encounter silent corruption. + * + * Builds that do not support KVM could take this second option to increase + * the recoverability of NMIs. + */ +void hv_nmi_check_nonrecoverable(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_POWERNV + unsigned long kbase = (unsigned long)_stext; + unsigned long nip = regs->nip; + + if (!(regs->msr & MSR_RI)) + return; + if (!(regs->msr & MSR_HV)) + return; + if (regs->msr & MSR_PR) + return; + + /* + * Now test if the interrupt has hit a range that may be using + * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The + * problem ranges all run un-relocated. Test real and virt modes + * at the same time by droping the high bit of the nip (virt mode + * entry points still have the +0x4000 offset). + */ + nip &= ~0xc000000000000000ULL; + if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600)) + goto nonrecoverable; + if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00)) + goto nonrecoverable; + if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0)) + goto nonrecoverable; + if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0)) + goto nonrecoverable; + /* Trampoline code runs un-relocated so subtract kbase. */ + if (nip >= real_trampolines_start - kbase && + nip < real_trampolines_end - kbase) + goto nonrecoverable; + if (nip >= virt_trampolines_start - kbase && + nip < virt_trampolines_end - kbase) + goto nonrecoverable; + return; + +nonrecoverable: + regs->msr &= ~MSR_RI; +#endif +} + void system_reset_exception(struct pt_regs *regs) { /* @@ -379,6 +443,8 @@ void system_reset_exception(struct pt_regs *regs) if (!nested) nmi_enter(); + hv_nmi_check_nonrecoverable(regs); + __this_cpu_inc(irq_stat.sreset_irqs); /* See if any machine dependent calls */ -- cgit v1.2.3 From 75d9fc7fd94eb43cdf0bec04499a27ced780af19 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 26 Feb 2019 19:30:35 +1000 Subject: powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C The OPAL call wrapper gets interrupt disabling wrong. It disables interrupts just by clearing MSR[EE], which has two problems: - It doesn't call into the IRQ tracing subsystem, which means tracing across OPAL calls does not always notice IRQs have been disabled. - It doesn't go through the IRQ soft-mask code, which causes a minor bug. MSR[EE] can not be restored by saving the MSR then clearing MSR[EE], because a racing interrupt while soft-masked could clear MSR[EE] between the two steps. This can cause MSR[EE] to be incorrectly enabled when the OPAL call returns. Fortunately that should only result in another masked interrupt being taken to disable MSR[EE] again, but it's a bit sloppy. The existing code also saves MSR to PACA, which is not re-entrant if there is a nested OPAL call from different MSR contexts, which can happen these days with SRESET interrupts on bare metal. To fix these issues, move the tracing and IRQ handling code to C, and call into asm just for the low level call when everything is ready to go. Save the MSR on stack rather than PACA. Performance cost is kept to a minimum with a few optimisations: - The endian switch upon return is combined with the MSR restore, which avoids an expensive context synchronizing operation for LE kernels. This makes up for the additional mtmsrd to enable interrupts with local_irq_enable(). - blr is now used to return from the opal_* functions that are called as C functions, to avoid link stack corruption. This requires a skiboot fix as well to keep the call stack balanced. A NULL call is more costly after this, (410ns->430ns on POWER9), but OPAL calls are generally not performance critical at this scale. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 10 +- arch/powerpc/platforms/powernv/Makefile | 5 +- arch/powerpc/platforms/powernv/opal-call.c | 283 ++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 344 +++---------------------- 4 files changed, 328 insertions(+), 314 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-call.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index e01f31fb0865..effdd096fa4c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -37,13 +37,11 @@ void kexec_copy_flush(struct kimage *image); extern struct static_key hcall_tracepoint_key; void __trace_hcall_entry(unsigned long opcode, unsigned long *args); void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); -/* OPAL tracing */ -#ifdef CONFIG_JUMP_LABEL -extern struct static_key opal_tracepoint_key; -#endif -void __trace_opal_entry(unsigned long opcode, unsigned long *args); -void __trace_opal_exit(long opcode, unsigned long retval); +/* OPAL */ +int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, + int64_t a4, int64_t a5, int64_t a6, int64_t a7, + int64_t opcode, uint64_t msr); /* VMX copying */ int enter_vmx_usercopy(void); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index b540ce8eec55..da2e99efbd04 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o -obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o +obj-y += setup.o opal-call.o opal-wrappers.o opal.o opal-async.o +obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o @@ -11,7 +11,6 @@ obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o -obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c new file mode 100644 index 000000000000..578757d403ab --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#ifdef CONFIG_TRACEPOINTS +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode) +{ + unsigned int *depth; + unsigned long args[8]; + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + return; + + args[0] = a0; + args[1] = a1; + args[2] = a2; + args[3] = a3; + args[4] = a4; + args[5] = a5; + args[6] = a6; + args[7] = a7; + + (*depth)++; + trace_opal_entry(opcode, &args[0]); + (*depth)--; +} + +static void __trace_opal_exit(unsigned long opcode, unsigned long retval) +{ + unsigned int *depth; + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + return; + + (*depth)++; + trace_opal_exit(opcode, retval); + (*depth)--; +} + +static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key); + +int opal_tracepoint_regfunc(void) +{ + static_branch_inc(&opal_tracepoint_key); + return 0; +} + +void opal_tracepoint_unregfunc(void) +{ + static_branch_dec(&opal_tracepoint_key); +} + +static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode, unsigned long msr) +{ + s64 ret; + + __trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode); + ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + __trace_opal_exit(opcode, ret); + + return ret; +} + +#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key)) + +#else /* CONFIG_TRACEPOINTS */ + +static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode, unsigned long msr) +{ +} + +#define DO_TRACE false +#endif /* CONFIG_TRACEPOINTS */ + +static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, + int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode) +{ + unsigned long flags; + unsigned long msr = mfmsr(); + bool mmu = (msr & (MSR_IR|MSR_DR)); + int64_t ret; + + msr &= ~MSR_EE; + + if (unlikely(!mmu)) + return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + + local_save_flags(flags); + hard_irq_disable(); + + if (DO_TRACE) { + ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + } else { + ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + } + + local_irq_restore(flags); + + return ret; +} + +#define OPAL_CALL(name, opcode) \ +int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ + int64_t a4, int64_t a5, int64_t a6, int64_t a7) \ +{ \ + return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \ +} + +OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); +OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); +OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); +OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE); +OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); +OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); +OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); +OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); +OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); +OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); +OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); +OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); +OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS); +OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY); +OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY); +OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE); +OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD); +OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD); +OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); +OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); +OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); +OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); +OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); +OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); +OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); +OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); +OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); +OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); +OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); +OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); +OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW); +OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY); +OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE); +OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV); +OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE); +OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE); +OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE); +OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE); +OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE); +OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE); +OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32); +OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64); +OPAL_CALL(opal_start_cpu, OPAL_START_CPU); +OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS); +OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL); +OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW); +OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL); +OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET); +OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA); +OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA); +OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB); +OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); +OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); +OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); +OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); +OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); +OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); +OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); +OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); +OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); +OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); +OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ); +OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); +OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); +OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); +OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); +OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS); +OPAL_CALL(opal_read_elog, OPAL_ELOG_READ); +OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK); +OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE); +OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND); +OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE); +OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); +OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); +OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); +OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); +OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); +OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); +OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); +OPAL_CALL(opal_dump_read, OPAL_DUMP_READ); +OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK); +OPAL_CALL(opal_get_msg, OPAL_GET_MSG); +OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC); +OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); +OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); +OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); +OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); +OPAL_CALL(opal_get_param, OPAL_GET_PARAM); +OPAL_CALL(opal_set_param, OPAL_SET_PARAM); +OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); +OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); +OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); +OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); +OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); +OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); +OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); +OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); +OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE); +OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); +OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); +OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); +OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); +OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); +OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET); +OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO); +OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG); +OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG); +OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO); +OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO); +OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE); +OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK); +OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK); +OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ); +OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ); +OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); +OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); +OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); +OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); +OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); +OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); +OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); +OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); +OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); +OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); +OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); +OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); +OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); +OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); +OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); +OPAL_CALL(opal_quiesce, OPAL_QUIESCE); +OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); +OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); +OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); +OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); +OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); +OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index f4875fe3f8ff..7d2052d8af9d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -17,317 +17,51 @@ #include #include - .section ".text" - -#ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL -#define OPAL_BRANCH(LABEL) \ - ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) -#else - - .section ".toc","aw" - - .globl opal_tracepoint_refcount -opal_tracepoint_refcount: - .8byte 0 - - .section ".text" - -/* - * We branch around this in early init by using an unconditional cpu - * feature. - */ -#define OPAL_BRANCH(LABEL) \ -BEGIN_FTR_SECTION; \ - b 1f; \ -END_FTR_SECTION(0, 1); \ - ld r11,opal_tracepoint_refcount@toc(r2); \ - cmpdi r11,0; \ - bne- LABEL; \ -1: - -#endif - -#else -#define OPAL_BRANCH(LABEL) -#endif + .section ".text" /* - * DO_OPAL_CALL assumes: - * r0 = opal call token - * r12 = msr - * LR has been saved + * r3-r10 - OPAL call arguments + * STK_PARAM(R11) - OPAL opcode + * STK_PARAM(R12) - MSR to restore */ -#define DO_OPAL_CALL() \ - mfcr r11; \ - stw r11,8(r1); \ - li r11,0; \ - ori r11,r11,MSR_EE; \ - std r12,PACASAVEDMSR(r13); \ - andc r12,r12,r11; \ - mtmsrd r12,1; \ - LOAD_REG_ADDR(r11,opal_return); \ - mtlr r11; \ - li r11,MSR_DR|MSR_IR|MSR_LE;\ - andc r12,r12,r11; \ - mtspr SPRN_HSRR1,r12; \ - LOAD_REG_ADDR(r11,opal); \ - ld r12,8(r11); \ - ld r2,0(r11); \ - mtspr SPRN_HSRR0,r12; \ +_GLOBAL_TOC(__opal_call) + mflr r0 + std r0,PPC_LR_STKOFF(r1) + ld r12,STK_PARAM(R12)(r1) + li r0,MSR_IR|MSR_DR|MSR_LE + andc r12,r12,r0 + LOAD_REG_ADDR(r11, opal_return) + mtlr r11 + LOAD_REG_ADDR(r11, opal) + ld r2,0(r11) + ld r11,8(r11) + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 + /* set token to r0 */ + ld r0,STK_PARAM(R11)(r1) hrfid - -#define OPAL_CALL(name, token) \ - _GLOBAL_TOC(name); \ - mfmsr r12; \ - mflr r0; \ - andi. r11,r12,MSR_IR|MSR_DR; \ - std r0,PPC_LR_STKOFF(r1); \ - li r0,token; \ - beq opal_real_call; \ - OPAL_BRANCH(opal_tracepoint_entry) \ - DO_OPAL_CALL() - - opal_return: /* - * Fixup endian on OPAL return... we should be able to simplify - * this by instead converting the below trampoline to a set of - * bytes (always BE) since MSR:LE will end up fixed up as a side - * effect of the rfid. + * Restore MSR on OPAL return. The MSR is set to big-endian. */ - FIXUP_ENDIAN_HV - ld r2,PACATOC(r13); - lwz r4,8(r1); - ld r5,PPC_LR_STKOFF(r1); - ld r6,PACASAVEDMSR(r13); - mtcr r4; - mtspr SPRN_HSRR0,r5; - mtspr SPRN_HSRR1,r6; - hrfid - -opal_real_call: - mfcr r11 - stw r11,8(r1) - /* Set opal return address */ - LOAD_REG_ADDR(r11, opal_return_realmode) - mtlr r11 - li r11,MSR_LE - andc r12,r12,r11 - mtspr SPRN_HSRR1,r12 - LOAD_REG_ADDR(r11,opal) - ld r12,8(r11) - ld r2,0(r11) - mtspr SPRN_HSRR0,r12 - hrfid - -opal_return_realmode: - FIXUP_ENDIAN_HV - ld r2,PACATOC(r13); - lwz r11,8(r1); - ld r12,PPC_LR_STKOFF(r1) - mtcr r11; - mtlr r12 - blr - -#ifdef CONFIG_TRACEPOINTS -opal_tracepoint_entry: - stdu r1,-STACKFRAMESIZE(r1) - std r0,STK_REG(R23)(r1) - std r3,STK_REG(R24)(r1) - std r4,STK_REG(R25)(r1) - std r5,STK_REG(R26)(r1) - std r6,STK_REG(R27)(r1) - std r7,STK_REG(R28)(r1) - std r8,STK_REG(R29)(r1) - std r9,STK_REG(R30)(r1) - std r10,STK_REG(R31)(r1) - mr r3,r0 - addi r4,r1,STK_REG(R24) - bl __trace_opal_entry - ld r0,STK_REG(R23)(r1) - ld r3,STK_REG(R24)(r1) - ld r4,STK_REG(R25)(r1) - ld r5,STK_REG(R26)(r1) - ld r6,STK_REG(R27)(r1) - ld r7,STK_REG(R28)(r1) - ld r8,STK_REG(R29)(r1) - ld r9,STK_REG(R30)(r1) - ld r10,STK_REG(R31)(r1) - - /* setup LR so we return via tracepoint_return */ - LOAD_REG_ADDR(r11,opal_tracepoint_return) - std r11,16(r1) - - mfmsr r12 - DO_OPAL_CALL() - -opal_tracepoint_return: - std r3,STK_REG(R31)(r1) - mr r4,r3 - ld r3,STK_REG(R23)(r1) - bl __trace_opal_exit - ld r3,STK_REG(R31)(r1) - addi r1,r1,STACKFRAMESIZE - ld r0,16(r1) +#ifdef __BIG_ENDIAN__ + ld r11,STK_PARAM(R12)(r1) + mtmsrd r11 +#else + /* Endian can only be switched with rfi, must byte reverse MSR load */ + .short 0x4039 /* li r10,STK_PARAM(R12) */ + .byte (STK_PARAM(R12) >> 8) & 0xff + .byte STK_PARAM(R12) & 0xff + + .long 0x280c6a7d /* ldbrx r11,r10,r1 */ + .long 0x05009f42 /* bcl 20,31,$+4 */ + .long 0xa602487d /* mflr r10 */ + .long 0x14004a39 /* addi r10,r10,20 */ + .long 0xa64b5a7d /* mthsrr0 r10 */ + .long 0xa64b7b7d /* mthsrr1 r11 */ + .long 0x2402004c /* hrfid */ +#endif + ld r2,PACATOC(r13) + ld r0,PPC_LR_STKOFF(r1) mtlr r0 blr -#endif - - -OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); -OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); -OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); -OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE); -OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); -OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); -OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); -OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); -OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); -OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); -OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); -OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); -OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS); -OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY); -OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY); -OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE); -OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD); -OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD); -OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); -OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); -OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); -OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); -OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); -OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); -OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); -OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); -OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); -OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); -OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); -OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); -OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); -OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW); -OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY); -OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE); -OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV); -OPAL_CALL(opal_pci_set_mve, OPAL_PCI_SET_MVE); -OPAL_CALL(opal_pci_set_mve_enable, OPAL_PCI_SET_MVE_ENABLE); -OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE); -OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE); -OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE); -OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE); -OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32); -OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64); -OPAL_CALL(opal_start_cpu, OPAL_START_CPU); -OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS); -OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL); -OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW); -OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL); -OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET); -OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA); -OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA); -OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB); -OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); -OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); -OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); -OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); -OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); -OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); -OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); -OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); -OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); -OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); -OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ); -OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); -OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); -OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); -OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); -OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS); -OPAL_CALL(opal_read_elog, OPAL_ELOG_READ); -OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK); -OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE); -OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND); -OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE); -OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); -OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); -OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); -OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); -OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); -OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); -OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); -OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); -OPAL_CALL(opal_dump_read, OPAL_DUMP_READ); -OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK); -OPAL_CALL(opal_get_msg, OPAL_GET_MSG); -OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC); -OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); -OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); -OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); -OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); -OPAL_CALL(opal_get_param, OPAL_GET_PARAM); -OPAL_CALL(opal_set_param, OPAL_SET_PARAM); -OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); -OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); -OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); -OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); -OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); -OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE); -OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); -OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); -OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); -OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); -OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); -OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); -OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); -OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); -OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); -OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); -OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); -OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); -OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE); -OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); -OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); -OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); -OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); -OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); -OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); -OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); -OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); -OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); -OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET); -OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO); -OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG); -OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG); -OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO); -OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO); -OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE); -OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK); -OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK); -OPAL_CALL(opal_xive_allocate_irq, OPAL_XIVE_ALLOCATE_IRQ); -OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ); -OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); -OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); -OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); -OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); -OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); -OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); -OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); -OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); -OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); -OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); -OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); -OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); -OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); -OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); -OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); -OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); -OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); -OPAL_CALL(opal_quiesce, OPAL_QUIESCE); -OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); -OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); -OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); -OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); -OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); -OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); -OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); -OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); -- cgit v1.2.3 From bd3524feac214f0ab9693c6d4c0cb5be8e1318b9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 1 Mar 2019 22:56:36 +1000 Subject: powerpc/64s: Fix unrelocated interrupt trampoline address test The recent commit got this test wrong, it declared the assembler symbols the wrong way, and also used the wrong symbol name (xxx_start rather than start_xxx, see asm/head-64.h). Fixes: ccd477028a ("powerpc/64s: Fix HV NMI vs HV interrupt recoverability test") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 8 -------- arch/powerpc/include/asm/sections.h | 7 +++++++ arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- arch/powerpc/kernel/traps.c | 9 +++++---- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index effdd096fa4c..296584e6dd55 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -49,14 +49,6 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); -/* Exceptions */ -#ifdef CONFIG_PPC_POWERNV -extern unsigned long real_trampolines_start; -extern unsigned long real_trampolines_end; -extern unsigned long virt_trampolines_start; -extern unsigned long virt_trampolines_end; -#endif - /* Traps */ long machine_check_early(struct pt_regs *regs); long hmi_exception_realmode(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index e335a8f846af..4a1664a8658d 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -17,6 +17,13 @@ extern char __end_interrupts[]; extern char __prom_init_toc_start[]; extern char __prom_init_toc_end[]; +#ifdef CONFIG_PPC_POWERNV +extern char start_real_trampolines[]; +extern char end_real_trampolines[]; +extern char start_virt_trampolines[]; +extern char end_virt_trampolines[]; +#endif + static inline int in_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 99312328ee66..a5b8fbae56a0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -70,10 +70,10 @@ OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) #ifdef CONFIG_PPC_POWERNV - .globl real_trampolines_start - .globl real_trampolines_end - .globl virt_trampolines_start - .globl virt_trampolines_end + .globl start_real_trampolines + .globl end_real_trampolines + .globl start_virt_trampolines + .globl end_virt_trampolines #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a5757bef03cd..a21200c6aaea 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -419,12 +419,13 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) goto nonrecoverable; if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0)) goto nonrecoverable; + /* Trampoline code runs un-relocated so subtract kbase. */ - if (nip >= real_trampolines_start - kbase && - nip < real_trampolines_end - kbase) + if (nip >= (unsigned long)(start_real_trampolines - kbase) && + nip < (unsigned long)(end_real_trampolines - kbase)) goto nonrecoverable; - if (nip >= virt_trampolines_start - kbase && - nip < virt_trampolines_end - kbase) + if (nip >= (unsigned long)(start_virt_trampolines - kbase) && + nip < (unsigned long)(end_virt_trampolines - kbase)) goto nonrecoverable; return; -- cgit v1.2.3