From 8153a5ead0898ba5a932282e571dfccd61940bba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Jul 2017 22:33:19 -0400 Subject: ppc: annotate ->poll() instances Signed-off-by: Al Viro --- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/platforms/cell/spufs/backing_ops.c | 6 +++--- arch/powerpc/platforms/cell/spufs/file.c | 16 ++++++++-------- arch/powerpc/platforms/cell/spufs/hw_ops.c | 5 ++--- arch/powerpc/platforms/cell/spufs/spufs.h | 3 +-- arch/powerpc/platforms/powernv/opal-prd.c | 2 +- 6 files changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 0f0b1b2f3b60..1da8b7d8c6ca 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -388,7 +388,7 @@ out: return error; } -static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +static __poll_t rtas_log_poll(struct file *file, poll_table * wait) { poll_wait(file, &rtas_log_wait, wait); if (rtas_log_size) diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 6e8a9ef8590e..1a9a756b0b2f 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -86,10 +86,10 @@ static u32 spu_backing_mbox_stat_read(struct spu_context *ctx) return ctx->csa.prob.mb_stat_R; } -static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx, - unsigned int events) +static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx, + __poll_t events) { - int ret; + __poll_t ret; u32 stat; ret = 0; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 5ffcdeb1eb17..fc7772c3d068 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -762,10 +762,10 @@ out: return count; } -static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) +static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait) { struct spu_context *ctx = file->private_data; - unsigned int mask; + __poll_t mask; poll_wait(file, &ctx->ibox_wq, wait); @@ -898,10 +898,10 @@ out: return count; } -static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) +static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait) { struct spu_context *ctx = file->private_data; - unsigned int mask; + __poll_t mask; poll_wait(file, &ctx->wbox_wq, wait); @@ -1690,11 +1690,11 @@ out: return ret; } -static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait) +static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait) { struct spu_context *ctx = file->private_data; u32 free_elements, tagstatus; - unsigned int mask; + __poll_t mask; poll_wait(file, &ctx->mfc_wq, wait); @@ -2455,11 +2455,11 @@ static ssize_t spufs_switch_log_read(struct file *file, char __user *buf, return cnt == 0 ? error : cnt; } -static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait) +static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait) { struct inode *inode = file_inode(file); struct spu_context *ctx = SPUFS_I(inode)->i_ctx; - unsigned int mask = 0; + __poll_t mask = 0; int rc; poll_wait(file, &ctx->switch_log->wait, wait); diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index 8655c4cbefc2..fff58198b5b6 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -56,11 +56,10 @@ static u32 spu_hw_mbox_stat_read(struct spu_context *ctx) return in_be32(&ctx->spu->problem->mb_stat_R); } -static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx, - unsigned int events) +static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events) { struct spu *spu = ctx->spu; - int ret = 0; + __poll_t ret = 0; u32 stat; spin_lock_irq(&spu->register_lock); diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 5e59f80e95db..2d0479ad3af4 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -185,8 +185,7 @@ struct mfc_dma_command { struct spu_context_ops { int (*mbox_read) (struct spu_context * ctx, u32 * data); u32(*mbox_stat_read) (struct spu_context * ctx); - unsigned int (*mbox_stat_poll)(struct spu_context *ctx, - unsigned int events); + __poll_t (*mbox_stat_poll)(struct spu_context *ctx, __poll_t events); int (*ibox_read) (struct spu_context * ctx, u32 * data); int (*wbox_write) (struct spu_context * ctx, u32 data); u32(*signal1_read) (struct spu_context * ctx); diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index de4dd09f4a15..c18de0a9b1bd 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -147,7 +147,7 @@ static bool opal_msg_queue_empty(void) return ret; } -static unsigned int opal_prd_poll(struct file *file, +static __poll_t opal_prd_poll(struct file *file, struct poll_table_struct *wait) { poll_wait(file, &opal_prd_msg_wait, wait); -- cgit v1.2.3 From c8ae67fe02ff4c0e9b0007d60cef3a8ef64eafaf Mon Sep 17 00:00:00 2001 From: Pankaj Bansal Date: Fri, 24 Nov 2017 18:52:11 +0530 Subject: powerpc: dts: P1010: Add endianness property to flexcan node The flexcan driver assumed that flexcan controller is big endian for powerpc architecture and little endian for other architectures. But this is not universally true. flexcan controller can be little or big endian on any architecture. Therefore the flexcan driver has been modified to check for "big-endian" device tree property for controllers that are big endian. consequently add the property to freescale P1010 SOC device tree. Signed-off-by: Pankaj Bansal Reviewed-by: Poonam Aggrwal Signed-off-by: Marc Kleine-Budde --- arch/powerpc/boot/dts/fsl/p1010si-post.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index af12ead88c5f..1b4aafc1f6a2 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -137,12 +137,14 @@ compatible = "fsl,p1010-flexcan"; reg = <0x1c000 0x1000>; interrupts = <48 0x2 0 0>; + big-endian; }; can1: can@1d000 { compatible = "fsl,p1010-flexcan"; reg = <0x1d000 0x1000>; interrupts = <61 0x2 0 0>; + big-endian; }; L2: l2-cache-controller@20000 { -- cgit v1.2.3 From f2c2cbcc35d47f1471a04155ac357521f5170371 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 24 Oct 2016 21:00:08 -0700 Subject: powerpc: Use pr_warn instead of pr_warning At some point, pr_warning will be removed so all logging messages use a consistent _warn style. Update arch/powerpc/ Miscellanea: o Coalesce formats o Realign arguments o Use %s, __func__ instead of embedded function names o Remove unnecessary line continuations Signed-off-by: Joe Perches Acked-by: Geoff Levand [mpe: Rebase due to some %pOF changes.] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 4 ++-- arch/powerpc/mm/init_64.c | 5 ++--- arch/powerpc/mm/mem.c | 3 +-- arch/powerpc/platforms/512x/mpc512x_shared.c | 4 ++-- arch/powerpc/platforms/85xx/socrates_fpga_pic.c | 7 +++---- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 2 +- arch/powerpc/platforms/pasemi/dma_lib.c | 4 ++-- arch/powerpc/platforms/powernv/opal.c | 8 ++++---- arch/powerpc/platforms/powernv/pci-ioda.c | 10 +++++----- arch/powerpc/platforms/ps3/device-init.c | 12 +++++------- arch/powerpc/platforms/ps3/mm.c | 4 ++-- arch/powerpc/platforms/ps3/os-area.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 8 ++++---- arch/powerpc/platforms/pseries/setup.c | 4 ++-- arch/powerpc/sysdev/fsl_pci.c | 8 ++++---- arch/powerpc/sysdev/mpic.c | 10 ++++------ arch/powerpc/sysdev/xics/icp-native.c | 10 ++++------ arch/powerpc/sysdev/xics/ics-opal.c | 4 ++-- arch/powerpc/sysdev/xics/ics-rtas.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 8 ++++---- 20 files changed, 56 insertions(+), 65 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0ac7aa346c69..7cd2803e2cc3 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1276,8 +1276,8 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus) i + PCI_BRIDGE_RESOURCES) == 0) continue; } - pr_warning("PCI: Cannot allocate resource region " - "%d of PCI bridge %d, will remap\n", i, bus->number); + pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n", + i, bus->number); clear_resource: /* The resource might be figured out when doing * reassignment based on the resources required diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..f6eb7e8f4c93 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -214,9 +214,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) rc = vmemmap_create_mapping(start, page_size, __pa(p)); if (rc < 0) { - pr_warning( - "vmemmap_populate: Unable to create vmemmap mapping: %d\n", - rc); + pr_warn("%s: Unable to create vmemmap mapping: %d\n", + __func__, rc); return -EFAULT; } } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..1281c6eb3a85 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -138,8 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size); if (rc) { - pr_warning( - "Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", + pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); return -EFAULT; } diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index f99e79ee060e..48abb4cb304c 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -387,8 +387,8 @@ static unsigned int __init get_fifo_size(struct device_node *np, if (fp) return *fp; - pr_warning("no %s property in %pOF node, defaulting to %d\n", - prop_name, np, DEFAULT_FIFO_SIZE); + pr_warn("no %s property in %pOF node, defaulting to %d\n", + prop_name, np, DEFAULT_FIFO_SIZE); return DEFAULT_FIFO_SIZE; } diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 82f8490b5aa7..38d4ba9f37b5 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -252,8 +252,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h, /* type is configurable */ if (intspec[1] != IRQ_TYPE_LEVEL_LOW && intspec[1] != IRQ_TYPE_LEVEL_HIGH) { - pr_warning("FPGA PIC: invalid irq type, " - "setting default active low\n"); + pr_warn("FPGA PIC: invalid irq type, setting default active low\n"); *out_flags = IRQ_TYPE_LEVEL_LOW; } else { *out_flags = intspec[1]; @@ -267,7 +266,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h, if (intspec[2] <= 2) fpga_irq->irq_line = intspec[2]; else - pr_warning("FPGA PIC: invalid irq routing\n"); + pr_warn("FPGA PIC: invalid irq routing\n"); return 0; } @@ -293,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic) for (i = 0; i < 3; i++) { socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i); if (!socrates_fpga_irqs[i]) { - pr_warning("FPGA PIC: can't get irq%d.\n", i); + pr_warn("FPGA PIC: can't get irq%d\n", i); continue; } irq_set_chained_handler(socrates_fpga_irqs[i], diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index a0e989ed4b6f..17c6cd3d02e6 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -101,7 +101,7 @@ static int __init mpc86xx_hpcn_probe(void) /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ if (of_machine_is_compatible("mpc86xx")) { - pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n"); + pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); return 1; } diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index aafa01ba062f..2c72263ad6ab 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -589,7 +589,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0); while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) { if (time_after(jiffies, timeout)) { - pr_warning("Warning: Could not disable RX section\n"); + pr_warn("Warning: Could not disable RX section\n"); break; } } @@ -598,7 +598,7 @@ int pasemi_dma_init(void) pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0); while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) { if (time_after(jiffies, timeout)) { - pr_warning("Warning: Could not disable TX section\n"); + pr_warn("Warning: Could not disable TX section\n"); break; } } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 041ddbd1fc57..2c1aa9cef66d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, struct notifier_block *nb) { if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Invalid arguments, msg_type:%d\n", - __func__, msg_type); + pr_warn("%s: Invalid arguments, msg_type:%d\n", + __func__, msg_type); return -EINVAL; } @@ -281,8 +281,8 @@ static void opal_handle_message(void) /* check for errors. */ if (ret) { - pr_warning("%s: Failed to retrieve opal message, err=%lld\n", - __func__, ret); + pr_warn("%s: Failed to retrieve opal message, err=%lld\n", + __func__, ret); return; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 749055553064..080ee202a173 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1059,8 +1059,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available, disabling device\n", - pci_name(dev)); + pr_warn("%s: Not enough PE# available, disabling device\n", + pci_name(dev)); return NULL; } @@ -1164,7 +1164,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe = pnv_ioda_alloc_pe(phb); if (!pe) { - pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", + pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", __func__, pci_domain_nr(bus), bus->number); return NULL; } @@ -3293,7 +3293,7 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); if (!phb->dbgfs) { - pr_warning("%s: Error on creating debugfs on PHB#%x\n", + pr_warn("%s: Error on creating debugfs on PHB#%x\n", __func__, hose->global_number); continue; } @@ -4026,7 +4026,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); if (rc) - pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); + pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc); /* * If we're running in kdump kernel, the previous kernel never diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e48462447ff0..e7075aaff1bb 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -663,8 +663,8 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id) if (rem) break; } - pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__, - bus_id, dev_id); + pr_warn("%s:%u: device %llu:%llu not found\n", + __func__, __LINE__, bus_id, dev_id); return; found: @@ -859,11 +859,9 @@ static int ps3_probe_thread(void *data) if (notify_event->event_type != notify_region_probe || notify_event->bus_id != dev.sbd.bus_id) { - pr_warning("%s:%u: bad notify_event: event %llu, " - "dev_id %llu, dev_type %llu\n", - __func__, __LINE__, notify_event->event_type, - notify_event->dev_id, - notify_event->dev_type); + pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n", + __func__, __LINE__, notify_event->event_type, + notify_event->dev_id, notify_event->dev_type); continue; } diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index b0f34663b1ae..7f870ec29daf 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -607,8 +607,8 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, r->ioid, iopte_flag); if (result) { - pr_warning("%s:%d: lv1_put_iopte failed: %s\n", - __func__, __LINE__, ps3_result(result)); + pr_warn("%s:%d: lv1_put_iopte failed: %s\n", + __func__, __LINE__, ps3_result(result)); goto fail_map; } DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__, diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 3db53e8aff92..cdbfc5cfd6f3 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -699,7 +699,7 @@ static void os_area_queue_work_handler(struct work_struct *work) error = update_flash_db(); if (error) - pr_warning("%s: Could not update FLASH ROM\n", __func__); + pr_warn("%s: Could not update FLASH ROM\n", __func__); pr_debug(" <- %s:%d\n", __func__, __LINE__); } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 69921f72e2da..5fe77ac2e955 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -816,15 +816,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop) ret = tce_clearrange_multi_pSeriesLP(0, 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); if (ret) - pr_warning("%pOF failed to clear tces in window.\n", - np); + pr_warn("%pOF failed to clear tces in window.\n", + np); else pr_debug("%pOF successfully cleared tces in window.\n", np); ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); if (ret) - pr_warning("%pOF: failed to remove direct window: rtas returned " + pr_warn("%pOF: failed to remove direct window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", np, ret, ddw_avail[2], liobn); else @@ -836,7 +836,7 @@ delprop: if (remove_prop) ret = of_remove_property(np, win64); if (ret) - pr_warning("%pOF: failed to remove direct window property: %d\n", + pr_warn("%pOF: failed to remove direct window property: %d\n", np, ret); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f1beb8367ac..1d6e2de2445c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -371,8 +371,8 @@ void pseries_disable_reloc_on_exc(void) mdelay(get_longbusy_msecs(rc)); } if (rc != H_SUCCESS) - pr_warning("Warning: Failed to disable relocation on " - "exceptions: %ld\n", rc); + pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", + rc); } EXPORT_SYMBOL(pseries_disable_reloc_on_exc); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 22d98057f773..1e57edd4947a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -448,7 +448,7 @@ static void setup_pci_atmu(struct pci_controller *hose) #endif /* adjusting outbound windows could reclaim space in mem map */ if (paddr_hi < 0xffffffffull) - pr_warning("%pOF: WARNING: Outbound window cfg leaves " + pr_warn("%pOF: WARNING: Outbound window cfg leaves " "gaps in memory map. Adjusting the memory map " "could reduce unnecessary bounce buffering.\n", hose->dn); @@ -531,7 +531,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) dev = pdev->dev.of_node; if (!of_device_is_available(dev)) { - pr_warning("%pOF: disabled\n", dev); + pr_warn("%pOF: disabled\n", dev); return -ENODEV; } @@ -808,8 +808,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev) is_mpc83xx_pci = 1; if (!of_device_is_available(dev)) { - pr_warning("%pOF: disabled by the firmware.\n", - dev); + pr_warn("%pOF: disabled by the firmware.\n", + dev); return -ENODEV; } pr_debug("Adding PCI host bridge %pOF\n", dev); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ead3e2549ebf..73067805300a 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1008,9 +1008,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq, if (hw == mpic->spurious_vec) return -EINVAL; if (mpic->protected && test_bit(hw, mpic->protected)) { - pr_warning("mpic: Mapping of source 0x%x failed, " - "source protected by firmware !\n",\ - (unsigned int)hw); + pr_warn("mpic: Mapping of source 0x%x failed, source protected by firmware !\n", + (unsigned int)hw); return -EPERM; } @@ -1040,9 +1039,8 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq, return 0; if (hw >= mpic->num_sources) { - pr_warning("mpic: Mapping of source 0x%x failed, " - "source out of range !\n",\ - (unsigned int)hw); + pr_warn("mpic: Mapping of source 0x%x failed, source out of range !\n", + (unsigned int)hw); return -EINVAL; } diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 2bfb9968d562..1459f4e8b698 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -241,18 +241,16 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, cpu, hw_id); if (!request_mem_region(addr, size, rname)) { - pr_warning("icp_native: Could not reserve ICP MMIO" - " for CPU %d, interrupt server #0x%x\n", - cpu, hw_id); + pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n", + cpu, hw_id); return -EBUSY; } icp_native_regs[cpu] = ioremap(addr, size); kvmppc_set_xics_phys(cpu, addr); if (!icp_native_regs[cpu]) { - pr_warning("icp_native: Failed ioremap for CPU %d, " - "interrupt server #0x%x, addr %#lx\n", - cpu, hw_id, addr); + pr_warn("icp_native: Failed ioremap for CPU %d, interrupt server #0x%x, addr %#lx\n", + cpu, hw_id, addr); release_mem_region(addr, size); return -ENOMEM; } diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 1c6bf4b66f56..f85f916ba432 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -131,8 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d, wanted_server = xics_get_irq_server(d->irq, cpumask, 1); if (wanted_server < 0) { - pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", - __func__, cpumask_pr_args(cpumask), d->irq); + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } server = ics_opal_mangle_server(wanted_server); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 42e0c56ff81c..6aabc74688a6 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -141,8 +141,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, irq_server = xics_get_irq_server(d->irq, cpumask, 1); if (irq_server == -1) { - pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", - __func__, cpumask_pr_args(cpumask), d->irq); + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index ffe138b8b9dc..77e864d5506d 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -243,8 +243,8 @@ void xics_migrate_irqs_away(void) /* This is expected during cpu offline. */ if (cpu_online(cpu)) - pr_warning("IRQ %u affinity broken off cpu %u\n", - virq, cpu); + pr_warn("IRQ %u affinity broken off cpu %u\n", + virq, cpu); /* Reset affinity to all cpus */ raw_spin_unlock_irqrestore(&desc->lock, flags); @@ -466,7 +466,7 @@ void __init xics_init(void) rc = icp_opal_init(); } if (rc < 0) { - pr_warning("XICS: Cannot find a Presentation Controller !\n"); + pr_warn("XICS: Cannot find a Presentation Controller !\n"); return; } @@ -481,7 +481,7 @@ void __init xics_init(void) if (rc < 0) rc = ics_opal_init(); if (rc < 0) - pr_warning("XICS: Cannot find a Source Controller !\n"); + pr_warn("XICS: Cannot find a Source Controller !\n"); /* Initialize common bits */ xics_get_server_size(); -- cgit v1.2.3 From 2aaf0a198d7af53a946ad69976d3cc7e7673d13f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2017 09:43:55 -0800 Subject: powerpc/powermac: fix OF node refcount leak We need to call of_node_put() for device nodes obtained with of_find_node_by_name(). Signed-off-by: Dmitry Torokhov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/pic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 5e0719b27294..666fceda5e48 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -604,6 +604,7 @@ static int pmacpic_find_viaint(void) if (np == NULL) goto not_found; viaint = irq_of_parse_and_map(np, 0); + of_node_put(np); not_found: #endif /* CONFIG_ADB_PMU */ -- cgit v1.2.3 From df26200299eb05fa7d059cd235847efc4c4baf80 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2017 10:01:44 -0800 Subject: powerpc/powermac: drop useless call to of_find_node_by_name We are not using result, so this simply results in a leaked refcount. Signed-off-by: Dmitry Torokhov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/feature.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 9e3f39d36e88..80a31f581d36 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2853,7 +2853,6 @@ set_initial_features(void) } /* Enable ATA-100 before PCI probe. */ - np = of_find_node_by_name(NULL, "ata-6"); for_each_node_by_name(np, "ata-6") { if (np->parent && of_device_is_compatible(np->parent, "uni-north") -- cgit v1.2.3 From 0aa8ff9b76282300d16e0a1403b115996ff88a4c Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2017 17:54:37 -0800 Subject: powerpc: Use of for_each_node_by_name() instead of open-coding it Instead of manually coding the loop with of_find_node_by_name(), let's switch to the standard macro for iterating over nodes with given name. Signed-off-by: Dmitry Torokhov [mpe: Fix build failures due to typo in mpc832x_mds.c] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/83xx/mpc832x_mds.c | 2 +- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +- arch/powerpc/platforms/83xx/mpc836x_mds.c | 2 +- arch/powerpc/platforms/cell/interrupt.c | 3 +-- arch/powerpc/platforms/cell/setup.c | 3 +-- arch/powerpc/platforms/cell/spider-pic.c | 3 +-- arch/powerpc/platforms/powermac/feature.c | 2 +- 7 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index bb7b25acf26f..74c154e67c8b 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -75,7 +75,7 @@ static void __init mpc832x_sys_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); } diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index a4539c5accb0..438986593873 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -204,7 +204,7 @@ static void __init mpc832x_rdb_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); } #endif /* CONFIG_QUICC_ENGINE */ diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index 4fc3051c2b2e..fd44dd03e1f3 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -83,7 +83,7 @@ static void __init mpc836x_mds_setup_arch(void) par_io_init(np); of_node_put(np); - for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;) + for_each_node_by_name(np, "ucc") par_io_of_config(np); #ifdef CONFIG_QE_USB /* Must fixup Par IO before QE GPIO chips are registered. */ diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 6fc85e29dc08..5d4bf9aed51a 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -315,8 +315,7 @@ static int __init setup_iic(void) struct cbe_iic_regs __iomem *node_iic; const u32 *np; - for (dn = NULL; - (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { + for_each_node_by_name(dn, "interrupt-controller") { if (!of_device_is_compatible(dn, "IBM,CBEA-Internal-Interrupt-Controller")) continue; diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index d3543e68efe8..7d31b8d14661 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -192,8 +192,7 @@ static void __init mpic_init_IRQ(void) struct device_node *dn; struct mpic *mpic; - for (dn = NULL; - (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + for_each_node_by_name(dn, "interrupt-controller") { if (!of_device_is_compatible(dn, "CBEA,platform-open-pic")) continue; diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index aa44bfc46467..c137f0cb4151 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -343,8 +343,7 @@ void __init spider_init_IRQ(void) * device-tree is bogus anyway) so all we can do is pray or maybe test * the address and deduce the node-id */ - for (dn = NULL; - (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + for_each_node_by_name(dn, "interrupt-controller") { if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) { if (of_address_to_resource(dn, 0, &r)) { printk(KERN_WARNING "spider-pic: Failed\n"); diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 80a31f581d36..466b84234683 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2641,7 +2641,7 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ phys_addr_t addr; u64 size; - for (node = NULL; (node = of_find_node_by_name(node, name)) != NULL;) { + for_each_node_by_name(node, name) { if (!compat) break; if (of_device_is_compatible(node, compat)) -- cgit v1.2.3 From 5c929885f1bb4b77f85b1769c49405a0e0f154a1 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Mon, 16 Oct 2017 11:19:14 +0530 Subject: powerpc/vdso64: Add support for CLOCK_{REALTIME/MONOTONIC}_COARSE Current vDSO64 implementation does not have support for coarse clocks (CLOCK_MONOTONIC_COARSE, CLOCK_REALTIME_COARSE), for which it falls back to system call, increasing the response time, vDSO implementation reduces the cycle time. Below is a benchmark of the difference in execution times. (Non-coarse clocks are also included just for completion) clock-gettime-realtime: syscall: 172 nsec/call clock-gettime-realtime: libc: 28 nsec/call clock-gettime-realtime: vdso: 22 nsec/call clock-gettime-monotonic: syscall: 171 nsec/call clock-gettime-monotonic: libc: 30 nsec/call clock-gettime-monotonic: vdso: 25 nsec/call clock-gettime-realtime-coarse: syscall: 153 nsec/call clock-gettime-realtime-coarse: libc: 16 nsec/call clock-gettime-realtime-coarse: vdso: 10 nsec/call clock-gettime-monotonic-coarse: syscall: 167 nsec/call clock-gettime-monotonic-coarse: libc: 17 nsec/call clock-gettime-monotonic-coarse: vdso: 11 nsec/call CC: Benjamin Herrenschmidt Reviewed-by: Naveen N. Rao Signed-off-by: Santosh Sivaraj Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/vdso64/gettimeofday.S | 67 ++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6b958414b4e0..3f6316bcde4e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -396,6 +396,8 @@ int main(void) /* Other bits used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 382021324883..c002adcc694c 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -64,6 +64,12 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) cmpwi cr0,r3,CLOCK_REALTIME cmpwi cr1,r3,CLOCK_MONOTONIC cror cr0*4+eq,cr0*4+eq,cr1*4+eq + + cmpwi cr5,r3,CLOCK_REALTIME_COARSE + cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE + cror cr5*4+eq,cr5*4+eq,cr6*4+eq + + cror cr0*4+eq,cr0*4+eq,cr5*4+eq bne cr0,99f mflr r12 /* r12 saves lr */ @@ -72,6 +78,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) bl V_LOCAL_FUNC(__get_datapage) /* get data page */ lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l + beq cr5,70f 50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ bne cr1,80f /* if not monotonic, all done */ @@ -97,19 +104,57 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) ld r0,CFG_TB_UPDATE_COUNT(r3) cmpld cr0,r0,r8 /* check if updated */ bne- 50b + b 78f - /* Add wall->monotonic offset and check for overflow or underflow. + /* + * For coarse clocks we get data directly from the vdso data page, so + * we don't need to call __do_get_tspec, but we still need to do the + * counter trick. */ - add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 1f - subf r5,r7,r5 - addi r4,r4,1 -1: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 +70: ld r8,CFG_TB_UPDATE_COUNT(r3) + andi. r0,r8,1 /* pending update ? loop */ + bne- 70b + add r3,r3,r0 /* r0 is already 0 */ + + /* + * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE + * too + */ + ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) + ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) + bne cr6,75f + + /* CLOCK_MONOTONIC_COARSE */ + lwa r6,WTOM_CLOCK_SEC(r3) + lwa r9,WTOM_CLOCK_NSEC(r3) + + /* check if counter has updated */ + or r0,r6,r9 +75: or r0,r0,r4 + or r0,r0,r5 + xor r0,r0,r0 + add r3,r3,r0 + ld r0,CFG_TB_UPDATE_COUNT(r3) + cmpld cr0,r0,r8 /* check if updated */ + bne- 70b + + /* Counter has not updated, so continue calculating proper values for + * sec and nsec if monotonic coarse, or just return with the proper + * values for realtime. + */ + bne cr6,80f + + /* Add wall->monotonic offset and check for overflow or underflow */ +78: add r4,r4,r6 + add r5,r5,r9 + cmpd cr0,r5,r7 + cmpdi cr1,r5,0 + blt 79f + subf r5,r7,r5 + addi r4,r4,1 +79: bge cr1,80f + addi r4,r4,-1 + add r5,r5,r7 80: std r4,TSPC64_TV_SEC(r11) std r5,TSPC64_TV_NSEC(r11) -- cgit v1.2.3 From 5bb866de17db430e67032e0344f480a2dfc76b84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Dec 2017 14:43:20 -0500 Subject: ppc: for put_user() pull linux/uaccess.h, not asm/uaccess.h Signed-off-by: Al Viro --- arch/powerpc/kvm/book3s_xive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..8f016ccf5424 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 43347d56c8d9dd732cee2f8efd384ad21dd1f6c4 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Wed, 15 Nov 2017 14:50:13 +0100 Subject: livepatch: send a fake signal to all blocking tasks Live patching consistency model is of LEAVE_PATCHED_SET and SWITCH_THREAD. This means that all tasks in the system have to be marked one by one as safe to call a new patched function. Safe means when a task is not (sleeping) in a set of patched functions. That is, no patched function is on the task's stack. Another clearly safe place is the boundary between kernel and userspace. The patching waits for all tasks to get outside of the patched set or to cross the boundary. The transition is completed afterwards. The problem is that a task can block the transition for quite a long time, if not forever. It could sleep in a set of patched functions, for example. Luckily we can force the task to leave the set by sending it a fake signal, that is a signal with no data in signal pending structures (no handler, no sign of proper signal delivered). Suspend/freezer use this to freeze the tasks as well. The task gets TIF_SIGPENDING set and is woken up (if it has been sleeping in the kernel before) or kicked by rescheduling IPI (if it was running on other CPU). This causes the task to go to kernel/userspace boundary where the signal would be handled and the task would be marked as safe in terms of live patching. There are tasks which are not affected by this technique though. The fake signal is not sent to kthreads. They should be handled differently. They can be woken up so they leave the patched set and their TIF_PATCH_PENDING can be cleared thanks to stack checking. For the sake of completeness, if the task is in TASK_RUNNING state but not currently running on some CPU it doesn't get the IPI, but it would eventually handle the signal anyway. Second, if the task runs in the kernel (in TASK_RUNNING state) it gets the IPI, but the signal is not handled on return from the interrupt. It would be handled on return to the userspace in the future when the fake signal is sent again. Stack checking deals with these cases in a better way. If the task was sleeping in a syscall it would be woken by our fake signal, it would check if TIF_SIGPENDING is set (by calling signal_pending() predicate) and return ERESTART* or EINTR. Syscalls with ERESTART* return values are restarted in case of the fake signal (see do_signal()). EINTR is propagated back to the userspace program. This could disturb the program, but... * each process dealing with signals should react accordingly to EINTR return values. * syscalls returning EINTR happen to be quite common situation in the system even if no fake signal is sent. * freezer sends the fake signal and does not deal with EINTR anyhow. Thus EINTR values are returned when the system is resumed. The very safe marking is done in architectures' "entry" on syscall and interrupt/exception exit paths, and in a stack checking functions of livepatch. TIF_PATCH_PENDING is cleared and the next recalc_sigpending() drops TIF_SIGPENDING. In connection with this, also call klp_update_patch_state() before do_signal(), so that recalc_sigpending() in dequeue_signal() can clear TIF_PATCH_PENDING immediately and thus prevent a double call of do_signal(). Note that the fake signal is not sent to stopped/traced tasks. Such task prevents the patching to finish till it continues again (is not traced anymore). Last, sending the fake signal is not automatic. It is done only when admin requests it by writing 1 to signal sysfs attribute in livepatch sysfs directory. Signed-off-by: Miroslav Benes Cc: Oleg Nesterov Cc: Michael Ellerman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: linuxppc-dev@lists.ozlabs.org Cc: x86@kernel.org Acked-by: Michael Ellerman (powerpc) Signed-off-by: Jiri Kosina --- Documentation/ABI/testing/sysfs-kernel-livepatch | 12 +++++++ Documentation/livepatch/livepatch.txt | 11 +++++-- arch/powerpc/kernel/signal.c | 6 ++-- arch/x86/entry/common.c | 6 ++-- kernel/livepatch/core.c | 30 +++++++++++++++++ kernel/livepatch/transition.c | 41 ++++++++++++++++++++++++ kernel/livepatch/transition.h | 1 + kernel/signal.c | 4 ++- 8 files changed, 102 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch index d5d39748382f..3bb9d5bc1ce3 100644 --- a/Documentation/ABI/testing/sysfs-kernel-livepatch +++ b/Documentation/ABI/testing/sysfs-kernel-livepatch @@ -33,6 +33,18 @@ Description: An attribute which indicates whether the patch is currently in transition. +What: /sys/kernel/livepatch//signal +Date: Nov 2017 +KernelVersion: 4.15.0 +Contact: live-patching@vger.kernel.org +Description: + A writable attribute that allows administrator to affect the + course of an existing transition. Writing 1 sends a fake + signal to all remaining blocking tasks. The fake signal + means that no proper signal is delivered (there is no data in + signal pending structures). Tasks are interrupted or woken up, + and forced to change their patched state. + What: /sys/kernel/livepatch// Date: Nov 2014 KernelVersion: 3.19.0 diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index ecdb18104ab0..9bcdef277a36 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -176,8 +176,12 @@ If a patch is in transition, this file shows 0 to indicate the task is unpatched and 1 to indicate it's patched. Otherwise, if no patch is in transition, it shows -1. Any tasks which are blocking the transition can be signaled with SIGSTOP and SIGCONT to force them to change their -patched state. - +patched state. This may be harmful to the system though. +/sys/kernel/livepatch//signal attribute provides a better alternative. +Writing 1 to the attribute sends a fake signal to all remaining blocking +tasks. No proper signal is actually delivered (there is no data in signal +pending structures). Tasks are interrupted or woken up, and forced to change +their patched state. 3.1 Adding consistency model support to new architectures --------------------------------------------------------- @@ -435,6 +439,9 @@ Information about the registered patches can be found under /sys/kernel/livepatch. The patches could be enabled and disabled by writing there. +/sys/kernel/livepatch//signal attribute allows administrator to affect a +patching operation. + See Documentation/ABI/testing/sysfs-kernel-livepatch for more details. diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e9436c5e1e09..bf9c4e7792d1 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -153,6 +153,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (thread_info_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + if (thread_info_flags & _TIF_SIGPENDING) { BUG_ON(regs != current->thread.regs); do_signal(current); @@ -163,9 +166,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) tracehook_notify_resume(regs); } - if (thread_info_flags & _TIF_PATCH_PENDING) - klp_update_patch_state(current); - user_enter(); } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d7d3cc24baf4..1e3883e45687 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -153,6 +153,9 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (cached_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + /* deal with pending signal delivery */ if (cached_flags & _TIF_SIGPENDING) do_signal(regs); @@ -165,9 +168,6 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); - if (cached_flags & _TIF_PATCH_PENDING) - klp_update_patch_state(current); - /* Disable IRQs and retry */ local_irq_disable(); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index de9e45dca70f..88766bd91803 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -454,6 +454,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch); * /sys/kernel/livepatch/ * /sys/kernel/livepatch//enabled * /sys/kernel/livepatch//transition + * /sys/kernel/livepatch//signal * /sys/kernel/livepatch// * /sys/kernel/livepatch/// */ @@ -528,11 +529,40 @@ static ssize_t transition_show(struct kobject *kobj, patch == klp_transition_patch); } +static ssize_t signal_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct klp_patch *patch; + int ret; + bool val; + + patch = container_of(kobj, struct klp_patch, kobj); + + /* + * klp_mutex lock is not grabbed here intentionally. It is not really + * needed. The race window is harmless and grabbing the lock would only + * hold the action back. + */ + if (patch != klp_transition_patch) + return -EINVAL; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + if (val) + klp_send_signals(); + + return count; +} + static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled); static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition); +static struct kobj_attribute signal_kobj_attr = __ATTR_WO(signal); static struct attribute *klp_patch_attrs[] = { &enabled_kobj_attr.attr, &transition_kobj_attr.attr, + &signal_kobj_attr.attr, NULL }; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 56add6327736..edcfcb8ebb2d 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -608,3 +608,44 @@ void klp_copy_process(struct task_struct *child) /* TIF_PATCH_PENDING gets copied in setup_thread_stack() */ } + +/* + * Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set. + * Kthreads with TIF_PATCH_PENDING set are woken up. Only admin can request this + * action currently. + */ +void klp_send_signals(void) +{ + struct task_struct *g, *task; + + pr_notice("signaling remaining tasks\n"); + + read_lock(&tasklist_lock); + for_each_process_thread(g, task) { + if (!klp_patch_pending(task)) + continue; + + /* + * There is a small race here. We could see TIF_PATCH_PENDING + * set and decide to wake up a kthread or send a fake signal. + * Meanwhile the task could migrate itself and the action + * would be meaningless. It is not serious though. + */ + if (task->flags & PF_KTHREAD) { + /* + * Wake up a kthread which sleeps interruptedly and + * still has not been migrated. + */ + wake_up_state(task, TASK_INTERRUPTIBLE); + } else { + /* + * Send fake signal to all non-kthread tasks which are + * still not migrated. + */ + spin_lock_irq(&task->sighand->siglock); + signal_wake_up(task, 0); + spin_unlock_irq(&task->sighand->siglock); + } + } + read_unlock(&tasklist_lock); +} diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h index 0f6e27c481f9..40522795a5f6 100644 --- a/kernel/livepatch/transition.h +++ b/kernel/livepatch/transition.h @@ -11,5 +11,6 @@ void klp_cancel_transition(void); void klp_start_transition(void); void klp_try_complete_transition(void); void klp_reverse_transition(void); +void klp_send_signals(void); #endif /* _LIVEPATCH_TRANSITION_H */ diff --git a/kernel/signal.c b/kernel/signal.c index 8dcd8825b2de..186143b06d00 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -40,6 +40,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -163,7 +164,8 @@ void recalc_sigpending_and_wake(struct task_struct *t) void recalc_sigpending(void) { - if (!recalc_sigpending_tsk(current) && !freezing(current)) + if (!recalc_sigpending_tsk(current) && !freezing(current) && + !klp_patch_pending(current)) clear_thread_flag(TIF_SIGPENDING); } -- cgit v1.2.3 From 10de741fd322c31f6c4f07869f4c6f0fa64b5bfe Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 22 Nov 2017 17:01:05 +1100 Subject: powerpc: Remove DEBUG define in 64-bit early setup code This statement causes some not very useful messages to always be printed on the serial port at boot, even on quiet boots. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8956a9856604..d3124c302146 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -10,8 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#define DEBUG - #include #include #include -- cgit v1.2.3 From 5138b31422efec897461951f7ce72e59bbb9cb1a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 22 Nov 2017 17:01:06 +1100 Subject: powerpc: Reduce log level of "OPAL detected !" message This message isn't terribly useful. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2c1aa9cef66d..69b5263fc9e3 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node, if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { powerpc_firmware_features |= FW_FEATURE_OPAL; - pr_info("OPAL detected !\n"); + pr_debug("OPAL detected !\n"); } else { panic("OPAL != V3 detected, no longer supported.\n"); } -- cgit v1.2.3 From a443bf6e8a7674b86221f4922cae82d67dc9e8ad Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Tue, 14 Nov 2017 04:29:08 -0500 Subject: powerpc/modules: Add REL24 relocation support of livepatch symbols Livepatch re-uses module loader function apply_relocate_add() to write relocations, instead of managing them by arch-dependent klp_write_module_reloc() function. apply_relocate_add() doesn't understand livepatch symbols (marked with SHN_LIVEPATCH symbol section index) and assumes them to be local symbols by default for R_PPC64_REL24 relocation type. It fails with an error, when trying to calculate offset with local_entry_offset(): module_64: kpatch_meminfo: REL24 -1152921504897399800 out of range! Whereas livepatch symbols are essentially SHN_UNDEF, should be called via stub used for global calls. This issue can be fixed by teaching apply_relocate_add() to handle both SHN_UNDEF/SHN_LIVEPATCH symbols via the same stub. This patch extends SHN_UNDEF code to handle livepatch symbols too. Signed-off-by: Kamalesh Babulal Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 759104b99f9f..1685b40f3935 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -614,7 +614,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_PPC_REL24: /* FIXME: Handle weak symbols here --RR */ - if (sym->st_shndx == SHN_UNDEF) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_LIVEPATCH) { /* External: go via stub */ value = stub_for_addr(sechdrs, value, me); if (!value) -- cgit v1.2.3 From b9eab08d012fa093947b230f9a87257c27fb829b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Nov 2017 11:45:37 -0600 Subject: powerpc/modules: Don't try to restore r2 after a sibling call When attempting to load a livepatch module, I got the following error: module_64: patch_module: Expect noop after relocate, got 3c820000 The error was triggered by the following code in unregister_netdevice_queue(): 14c: 00 00 00 48 b 14c 14c: R_PPC64_REL24 net_set_todo 150: 00 00 82 3c addis r4,r2,0 GCC didn't insert a nop after the branch to net_set_todo() because it's a sibling call, so it never returns. The nop isn't needed after the branch in that case. Signed-off-by: Josh Poimboeuf Acked-by: Naveen N. Rao Reviewed-and-tested-by: Kamalesh Babulal Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/kernel/module_64.c | 12 +++++++++++- arch/powerpc/lib/code-patching.c | 5 +++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index abef812de7f8..2c895e8d07f7 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -33,6 +33,7 @@ int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); int instr_is_relative_branch(unsigned int instr); +int instr_is_relative_link_branch(unsigned int instr); int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); unsigned long branch_target(const unsigned int *instr); unsigned int translate_branch(const unsigned int *dest, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 1685b40f3935..5b44668c0e45 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -487,7 +487,17 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { - if (is_early_mcount_callsite(instruction - 1)) + u32 *prev_insn = instruction - 1; + + if (is_early_mcount_callsite(prev_insn)) + return 1; + + /* + * Make sure the branch isn't a sibling call. Sibling calls aren't + * "link" branches and they don't return, so they don't need the r2 + * restore afterwards. + */ + if (!instr_is_relative_link_branch(*prev_insn)) return 1; if (*instruction != PPC_INST_NOP) { diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index d469224c4ada..096d4e4d31e6 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -302,6 +302,11 @@ int instr_is_relative_branch(unsigned int instr) return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } +int instr_is_relative_link_branch(unsigned int instr) +{ + return instr_is_relative_branch(instr) && (instr & BRANCH_SET_LINK); +} + static unsigned long branch_iform_target(const unsigned int *instr) { signed long imm; -- cgit v1.2.3 From 1ea61ea23985c0f15c027e4c0ac02224efdfb243 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 14 Nov 2017 04:29:10 -0500 Subject: powerpc/modules: Improve restore_r2() error message Print the function address associated with the restore_r2() error to make it easier to debug the problem. Also clarify the wording a bit. Before: module_64: patch_foo: Expect noop after relocate, got 3c820000 After: module_64: patch_foo: Expected nop after call, got 7c630034 at netdev_has_upper_dev+0x54/0xb0 [patch_foo] Signed-off-by: Josh Poimboeuf Signed-off-by: Kamalesh Babulal [mpe: Change noop to nop, as that's the name of the instruction] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5b44668c0e45..e6c011d56629 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -501,8 +501,8 @@ static int restore_r2(u32 *instruction, struct module *me) return 1; if (*instruction != PPC_INST_NOP) { - pr_err("%s: Expect noop after relocate, got %08x\n", - me->name, *instruction); + pr_err("%s: Expected nop after call, got %08x at %pS\n", + me->name, *instruction, instruction); return 0; } /* ld r2,R2_STACK_OFFSET(r1) */ -- cgit v1.2.3 From 3d6bf693d8bc63f2e5eca7373916c4871f8ffd66 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Nov 2017 22:05:27 +1100 Subject: powerpc/boot: Only build uartlite if XILINX_VIRTEX=y The serial code in uartlite.c only matches if we find one of two Xilinx (xlnx) nodes in the device tree, there's no need to build or link the code on other platforms. As far as I can tell CONFIG_XILINX_VIRTEX is the appropriate symbol to use to conditionally compile the code. Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 ++- arch/powerpc/boot/serial.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 08782f55b89f..16beaeee5f95 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -109,7 +109,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c cpm-serial.c \ - uartlite.c opal.c + opal.c src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S ifndef CONFIG_PPC64_BOOT_WRAPPER @@ -120,6 +120,7 @@ src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c +src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 7b5c02b1afd0..71d062b48c1b 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -133,9 +133,11 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif +#ifdef CONFIG_XILINX_VIRTEX else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") || dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) rc = uartlite_console_init(devp, &serial_cd); +#endif else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); -- cgit v1.2.3 From f8e8e69cea4934485e2dd3d25005c68d671167d9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Nov 2017 22:05:28 +1100 Subject: powerpc/boot: Only build OPAL code when necessary Only build the OPAL console code in when necessary. This looks like it should use CONFIG_PPC_POWERNV, but because the opal-call.S code is 64-bit only, we must only build it when we're building the boot wrapper 64-bit. Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 6 +++--- arch/powerpc/boot/serial.c | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 16beaeee5f95..650be8300fda 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -108,10 +108,10 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ $(libfdt) libfdt-wrapper.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ - oflib.c ofconsole.c cuboot.c cpm-serial.c \ - opal.c + oflib.c ofconsole.c cuboot.c cpm-serial.c + src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c -src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S +src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S endif diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 71d062b48c1b..6f2b2ecf10bd 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -138,8 +138,10 @@ int serial_console_init(void) dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) rc = uartlite_console_init(devp, &serial_cd); #endif +#ifdef CONFIG_PPC64_BOOT_WRAPPER else if (dt_is_compatible(devp, "ibm,opal-console-raw")) rc = opal_console_init(devp, &serial_cd); +#endif /* Add other serial console driver calls here */ -- cgit v1.2.3 From c3bb690dc61282bebc8474429ac941f42747da15 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Nov 2017 22:05:29 +1100 Subject: powerpc/boot: Only build CPM code when necessary As far as I can tell CONFIG_CPM is the right symbol to use to conditionally compile the cpm-serial.c code. Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 ++- arch/powerpc/boot/serial.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 650be8300fda..ef6549e57157 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -108,7 +108,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ $(libfdt) libfdt-wrapper.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ elf_util.c $(zlib-y) devtree.c stdlib.c \ - oflib.c ofconsole.c cuboot.c cpm-serial.c + oflib.c ofconsole.c cuboot.c src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c @@ -121,6 +121,7 @@ src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mpsc.c mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c src-wlib-$(CONFIG_XILINX_VIRTEX) += uartlite.c +src-wlib-$(CONFIG_CPM) += cpm-serial.c src-plat-y := of.c epapr.c src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 6f2b2ecf10bd..88955095ec07 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -124,11 +124,13 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "marvell,mv64360-mpsc")) rc = mpsc_console_init(devp, &serial_cd); #endif +#ifdef CONFIG_CPM else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") || dt_is_compatible(devp, "fsl,cpm1-smc-uart") || dt_is_compatible(devp, "fsl,cpm2-scc-uart") || dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); +#endif #ifdef CONFIG_PPC_MPC52XX else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); -- cgit v1.2.3 From a10726075dec4ceb050c2f775e54363411141f7a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 13 Nov 2017 23:06:55 +1100 Subject: powerpc/32: Add .data.rel* sections explicitly Match powerpc/64 and include .data.rel* input sections in the .data output section explicitly. This solves the warning: powerpc-linux-gnu-ld: warning: orphan section `.data.rel.ro' from `arch/powerpc/kernel/head_44x.o' being placed in section `.data.rel.ro'. Link: https://lists.01.org/pipermail/kbuild-all/2017-November/040010.html Reported-by: kbuild test robot Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2..51e4ec92ade1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -264,6 +264,7 @@ SECTIONS #ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA + *(.data.rel*) *(.sdata) *(.sdata2) *(.got.plt) *(.got) -- cgit v1.2.3 From 494d82ceae7f3b9fdb5154b4469e5bb22f56040b Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 1 Dec 2017 18:58:24 +0300 Subject: powerpc/44x/fsp2: Add fsp2 headers Add cmu, plbX, l2, ddr3/4, crcs register definitions. Add mfcmu, mtcmu functions for indirect access to cmu. Add mtl2, mfl2 same for l2 cache core reg set. Signed-off-by: Ivan Mikhaylov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/fsp2.h | 272 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 arch/powerpc/platforms/44x/fsp2.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h new file mode 100644 index 000000000000..9e1d52754c8b --- /dev/null +++ b/arch/powerpc/platforms/44x/fsp2.h @@ -0,0 +1,272 @@ +#ifndef _ASM_POWERPC_FSP_DCR_H_ +#define _ASM_POWERPC_FSP_DCR_H_ +#ifdef __KERNEL__ +#include + +#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */ +#define DCRN_CMU_DATA 0x00D /* Chip management unic data */ + +/* PLB4 Arbiter */ +#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */ +#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */ +#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */ +#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */ +#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */ +#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/ +#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */ +#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */ +#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */ +#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */ +#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */ +#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */ +#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/ +#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/ + +/* PLB4/OPB bridge 0, 1, 2, 3 */ +#define DCRN_PLB4OPB0_BASE 0x020 +#define DCRN_PLB4OPB1_BASE 0x030 +#define DCRN_PLB4OPB2_BASE 0x040 +#define DCRN_PLB4OPB3_BASE 0x050 + +#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */ +#define PLB4OPB_GEAR 0x2 /* Error Address Register */ +#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */ +#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */ +#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */ + +/* PLB4-to-AHB Bridge */ +#define DCRN_PLB4AHB_BASE 0x400 +#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1) +#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2) +#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3) +#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8) +#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9) + +/* PLB6 Controller */ +#define DCRN_PLB6_BASE 0x11111300 +#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE) +#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B) +#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E) +#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10) + +/* PLB4-to-PLB6 Bridge */ +#define DCRN_PLB4PLB6_BASE 0x11111320 +#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1) +#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3) +#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4) + +/* PLB6-to-PLB4 Bridge */ +#define DCRN_PLB6PLB4_BASE 0x11111350 +#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1) +#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3) +#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4) + +/* PLB6-to-MCIF Bridge */ +#define DCRN_PLB6MCIF_BASE 0x11111380 +#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0) +#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1) +#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2) +#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3) + +/* Configuration Logic Registers */ +#define DCRN_CONF_BASE 0x11111400 +#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A) +#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E) +#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D) +#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E) + +#define DCRN_L2CDCRAI 0x11111100 +#define DCRN_L2CDCRDI 0x11111104 +/* L2 indirect addresses */ +#define L2MCK 0x120 +#define L2MCKEN 0x130 +#define L2INT 0x150 +#define L2INTEN 0x160 +#define L2LOG0 0x180 +#define L2LOG1 0x184 +#define L2LOG2 0x188 +#define L2LOG3 0x18C +#define L2LOG4 0x190 +#define L2LOG5 0x194 +#define L2PLBSTAT0 0x300 +#define L2PLBSTAT1 0x304 +#define L2PLBMCKEN0 0x330 +#define L2PLBMCKEN1 0x334 +#define L2PLBINTEN0 0x360 +#define L2PLBINTEN1 0x364 +#define L2ARRSTAT0 0x500 +#define L2ARRSTAT1 0x504 +#define L2ARRSTAT2 0x508 +#define L2ARRMCKEN0 0x530 +#define L2ARRMCKEN1 0x534 +#define L2ARRMCKEN2 0x538 +#define L2ARRINTEN0 0x560 +#define L2ARRINTEN1 0x564 +#define L2ARRINTEN2 0x568 +#define L2CPUSTAT 0x700 +#define L2CPUMCKEN 0x730 +#define L2CPUINTEN 0x760 +#define L2RACSTAT0 0x900 +#define L2RACMCKEN0 0x930 +#define L2RACINTEN0 0x960 +#define L2WACSTAT0 0xD00 +#define L2WACSTAT1 0xD04 +#define L2WACSTAT2 0xD08 +#define L2WACMCKEN0 0xD30 +#define L2WACMCKEN1 0xD34 +#define L2WACMCKEN2 0xD38 +#define L2WACINTEN0 0xD60 +#define L2WACINTEN1 0xD64 +#define L2WACINTEN2 0xD68 +#define L2WDFSTAT 0xF00 +#define L2WDFMCKEN 0xF30 +#define L2WDFINTEN 0xF60 + +/* DDR3/4 Memory Controller */ +#define DCRN_DDR34_BASE 0x11120000 +#define DCRN_DDR34_MCSTAT 0x10 +#define DCRN_DDR34_MCOPT1 0x20 +#define DCRN_DDR34_MCOPT2 0x21 +#define DCRN_DDR34_PHYSTAT 0x32 +#define DCRN_DDR34_CFGR0 0x40 +#define DCRN_DDR34_CFGR1 0x41 +#define DCRN_DDR34_CFGR2 0x42 +#define DCRN_DDR34_CFGR3 0x43 +#define DCRN_DDR34_SCRUB_CNTL 0xAA +#define DCRN_DDR34_SCRUB_INT 0xAB +#define DCRN_DDR34_SCRUB_START_ADDR 0xB0 +#define DCRN_DDR34_SCRUB_END_ADDR 0xD0 +#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0 +#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1 +#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2 +#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3 +#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4 +#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5 +#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6 +#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7 +#define DCRN_DDR34_ECCERR_PORT0 0xF0 +#define DCRN_DDR34_ECCERR_PORT1 0xF2 +#define DCRN_DDR34_ECCERR_PORT2 0xF4 +#define DCRN_DDR34_ECCERR_PORT3 0xF6 +#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8 +#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB + +#define DDR34_SCRUB_CNTL_STOP 0x00000000 +#define DDR34_SCRUB_CNTL_SCRUB 0x80000000 +#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000 +#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000 +#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000 + +/* PLB-Attached DDR3/4 Core Wrapper */ +#define DCRN_CW_BASE 0x11111800 +#define DCRN_CW_MCER0 0x00 +#define DCRN_CW_MCER1 0x01 +#define DCRN_CW_MCER_AND0 0x02 +#define DCRN_CW_MCER_AND1 0x03 +#define DCRN_CW_MCER_OR0 0x04 +#define DCRN_CW_MCER_OR1 0x05 +#define DCRN_CW_MCER_MASK0 0x06 +#define DCRN_CW_MCER_MASK1 0x07 +#define DCRN_CW_MCER_MASK_AND0 0x08 +#define DCRN_CW_MCER_MASK_AND1 0x09 +#define DCRN_CW_MCER_MASK_OR0 0x0A +#define DCRN_CW_MCER_MASK_OR1 0x0B +#define DCRN_CW_MCER_ACTION0 0x0C +#define DCRN_CW_MCER_ACTION1 0x0D +#define DCRN_CW_MCER_WOF0 0x0E +#define DCRN_CW_MCER_WOF1 0x0F +#define DCRN_CW_LFIR 0x10 +#define DCRN_CW_LFIR_AND 0x11 +#define DCRN_CW_LFIR_OR 0x12 +#define DCRN_CW_LFIR_MASK 0x13 +#define DCRN_CW_LFIR_MASK_AND 0x14 +#define DCRN_CW_LFIR_MASK_OR 0x15 + +#define CW_MCER0_MEM_CE 0x00020000 +/* CMU addresses */ +#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */ +#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */ +#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */ +#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */ +#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */ +#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */ +#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */ +#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */ +#define CMUN_PW0 0x2C /* Pulse Width Register */ +#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */ +#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */ +#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */ +#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */ +#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */ +#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */ +#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */ +#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */ +#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */ +#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */ +#define CMUN_PCD0 0x37 /* PSI clock divider 0 */ +#define CMUN_PCD1 0x38 /* PSI clock divider 1 */ +#define CMUN_TMR0 0x39 /* Reset Timer */ +#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */ +#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */ +#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */ +#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */ +#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */ +#define CMUN_ETDRB 0x3F /* ETDR Backdoor */ + +/* CRCS bit fields */ +#define CRCS_STAT_MASK 0xF0000000 +#define CRCS_STAT_POR 0x10000000 +#define CRCS_STAT_PHR 0x20000000 +#define CRCS_STAT_PCIE 0x30000000 +#define CRCS_STAT_CRCS_SYS 0x40000000 +#define CRCS_STAT_DBCR_SYS 0x50000000 +#define CRCS_STAT_HOST_SYS 0x60000000 +#define CRCS_STAT_CHIP_RST_B 0x70000000 +#define CRCS_STAT_CRCS_CHIP 0x80000000 +#define CRCS_STAT_DBCR_CHIP 0x90000000 +#define CRCS_STAT_HOST_CHIP 0xA0000000 +#define CRCS_STAT_PSI_CHIP 0xB0000000 +#define CRCS_STAT_CRCS_CORE 0xC0000000 +#define CRCS_STAT_DBCR_CORE 0xD0000000 +#define CRCS_STAT_HOST_CORE 0xE0000000 +#define CRCS_STAT_PCIE_HOT 0xF0000000 +#define CRCS_STAT_SELF_CORE 0x40000000 +#define CRCS_STAT_SELF_CHIP 0x50000000 +#define CRCS_WATCHE 0x08000000 +#define CRCS_CORE 0x04000000 /* Reset PPC440 core */ +#define CRCS_CHIP 0x02000000 /* Chip Reset */ +#define CRCS_SYS 0x01000000 /* System Reset */ +#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */ +#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */ +#define CRCS_PLOCK 0x00000002 /* PLL Locked */ + +#define mtcmu(reg, data) \ +do { \ + mtdcr(DCRN_CMU_ADDR, reg); \ + mtdcr(DCRN_CMU_DATA, data); \ +} while (0) + +#define mfcmu(reg)\ + ({u32 data; \ + mtdcr(DCRN_CMU_ADDR, reg); \ + data = mfdcr(DCRN_CMU_DATA); \ + data; }) + +#define mtl2(reg, data) \ +do { \ + mtdcr(DCRN_L2CDCRAI, reg); \ + mtdcr(DCRN_L2CDCRDI, data); \ +} while (0) + +#define mfl2(reg) \ + ({u32 data; \ + mtdcr(DCRN_L2CDCRAI, reg); \ + data = mfdcr(DCRN_L2CDCRDI); \ + data; }) + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_FSP2_DCR_H_ */ -- cgit v1.2.3 From 9c4c374676a12db4a452534f3347323d35c32d1a Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 1 Dec 2017 18:58:25 +0300 Subject: powerpc/44x/fsp2: Interrupt handling setup * clear out any possible plb6 errors * board interrupt handling setup within l2 reg set * fsp2 parity error setup All those points are needed for correct interrupt handling on board level including error handling report. Reviewed-by: Alistair Popple Signed-off-by: Ivan Mikhaylov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/fsp2.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 92e98048404f..baed409c9663 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include "fsp2.h" static __initdata struct of_device_id fsp2_of_bus[] = { { .compatible = "ibm,plb4", }, @@ -44,10 +46,45 @@ machine_device_initcall(fsp2, fsp2_device_probe); static int __init fsp2_probe(void) { + u32 val; unsigned long root = of_get_flat_dt_root(); if (!of_flat_dt_is_compatible(root, "ibm,fsp2")) return 0; + + /* Clear BC_ERR and mask snoopable request plb errors. */ + val = mfdcr(DCRN_PLB6_CR0); + val |= 0x20000000; + mtdcr(DCRN_PLB6_BASE, val); + mtdcr(DCRN_PLB6_HD, 0xffff0000); + mtdcr(DCRN_PLB6_SHD, 0xffff0000); + + /* L2 machine checks */ + mtl2(L2PLBMCKEN0, 0xffffffff); + mtl2(L2PLBMCKEN1, 0x0000ffff); + mtl2(L2ARRMCKEN0, 0xffffffff); + mtl2(L2ARRMCKEN1, 0xffffffff); + mtl2(L2ARRMCKEN2, 0xfffff000); + mtl2(L2CPUMCKEN, 0xffffffff); + mtl2(L2RACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN1, 0xffffffff); + mtl2(L2WACMCKEN2, 0xffffffff); + mtl2(L2WDFMCKEN, 0xffffffff); + + /* L2 interrupts */ + mtl2(L2PLBINTEN1, 0xffff0000); + + /* + * At a global level, enable all L2 machine checks and interrupts + * reported by the L2 subsystems, except for the external machine check + * input (UIC0.1). + */ + mtl2(L2MCKEN, 0x000007ff); + mtl2(L2INTEN, 0x000004ff); + + /* Enable FSP-2 configuration logic parity errors */ + mtdcr(DCRN_CONF_EIR_RS, 0x80000000); return 1; } -- cgit v1.2.3 From 50f01c57d700cce04a9dc87bc55db2e283f57212 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 1 Dec 2017 18:58:26 +0300 Subject: powerpc/44x/fsp2: tvsense workaround for dd1 TVSENSE(temperature and voltage sensors) reset is blocked (clock gated) by the POR default of the TVS sleep config bit. As a consequence, TVSENSE will provide erratic sensor values, which may result in spurious (parity) errors recorded in the CMU FIR and leading to erroneous interrupt requests once the CMU interrupt is unmasked. Purpose of this to set up CMU in working state in any cases even in case of parity errors. Reviewed-by: Alistair Popple Signed-off-by: Ivan Mikhaylov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/fsp2.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index baed409c9663..7c9bc933d533 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -59,6 +59,23 @@ static int __init fsp2_probe(void) mtdcr(DCRN_PLB6_HD, 0xffff0000); mtdcr(DCRN_PLB6_SHD, 0xffff0000); + /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS + * sleep config bit. As a consequence, TVSENSE will provide erratic + * sensor values, which may result in spurious (parity) errors + * recorded in the CMU FIR and leading to erroneous interrupt requests + * once the CMU interrupt is unmasked. + */ + + /* 1. set TVS1[UNDOZE] */ + val = mfcmu(CMUN_TVS1); + val |= 0x4; + mtcmu(CMUN_TVS1, val); + + /* 2. clear FIR[TVS] and FIR[TVSPAR] */ + val = mfcmu(CMUN_FIR0); + val |= 0x30000000; + mtcmu(CMUN_FIR0, val); + /* L2 machine checks */ mtl2(L2PLBMCKEN0, 0xffffffff); mtl2(L2PLBMCKEN1, 0x0000ffff); -- cgit v1.2.3 From 7813043e1bbcea764c86ef2fcf43aeae2b0e240d Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 1 Dec 2017 18:58:27 +0300 Subject: powerpc/44x/fsp2: Add irq error handlers Add irq error handlers for cmu, plb, opb, mcue, conf with debug information output in case of problems. Signed-off-by: Ivan Mikhaylov Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/fsp2.c | 205 +++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 7c9bc933d533..04f0c73a9b4f 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -28,8 +28,17 @@ #include #include #include +#include +#include #include "fsp2.h" +#define FSP2_BUS_ERR "ibm,bus-error-irq" +#define FSP2_CMU_ERR "ibm,cmu-error-irq" +#define FSP2_CONF_ERR "ibm,conf-error-irq" +#define FSP2_OPBD_ERR "ibm,opbd-error-irq" +#define FSP2_MCUE "ibm,mc-ue-irq" +#define FSP2_RST_WRN "ibm,reset-warning-irq" + static __initdata struct of_device_id fsp2_of_bus[] = { { .compatible = "ibm,plb4", }, { .compatible = "ibm,plb6", }, @@ -37,6 +46,194 @@ static __initdata struct of_device_id fsp2_of_bus[] = { {}, }; +static void l2regs(void) +{ + pr_err("L2 Controller:\n"); + pr_err("MCK: 0x%08x\n", mfl2(L2MCK)); + pr_err("INT: 0x%08x\n", mfl2(L2INT)); + pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0)); + pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1)); + pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0)); + pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1)); + pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2)); + pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT)); + pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0)); + pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0)); + pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1)); + pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2)); + pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT)); + pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0)); + pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1)); + pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2)); + pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3)); + pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4)); + pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5)); +} + +static void show_plbopb_regs(u32 base, int num) +{ + pr_err("\nPLBOPB Bridge %d:\n", num); + pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0)); + pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1)); + pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2)); + pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU)); + pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR)); +} + +static irqreturn_t bus_err_handler(int irq, void *data) +{ + pr_err("Bus Error\n"); + + l2regs(); + + pr_err("\nPLB6 Controller:\n"); + pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD)); + pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR)); + + pr_err("\nPLB6-to-PLB4 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL)); + + pr_err("\nPLB4-to-PLB6 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL)); + + pr_err("\nPLB6-to-MCIF Bridge:\n"); + pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1)); + pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH)); + pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL)); + + pr_err("\nPLB4 Arbiter:\n"); + pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH)); + pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH)); + pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + + show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0); + show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1); + show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2); + show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3); + + pr_err("\nPLB4-to-AHB Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR)); + pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR)); + pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR)); + + pr_err("\nAHB-to-PLB4 Bridge:\n"); + pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR)); + pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR)); + panic("Bus Error\n"); +} + +static irqreturn_t cmu_err_handler(int irq, void *data) { + pr_err("CMU Error\n"); + pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0)); + panic("CMU Error\n"); +} + +static irqreturn_t conf_err_handler(int irq, void *data) { + pr_err("Configuration Logic Error\n"); + pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC)); + pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0)); + pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1)); + panic("Configuration Logic Error\n"); +} + +static irqreturn_t opbd_err_handler(int irq, void *data) { + panic("OPBD Error\n"); +} + +static irqreturn_t mcue_handler(int irq, void *data) { + pr_err("DDR: Uncorrectable Error\n"); + pr_err("MCSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT)); + pr_err("MCOPT1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1)); + pr_err("MCOPT2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2)); + pr_err("PHYSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT)); + pr_err("CFGR0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0)); + pr_err("CFGR1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1)); + pr_err("CFGR2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2)); + pr_err("CFGR3: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3)); + pr_err("SCRUB_CNTL: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL)); + pr_err("ECCERR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0)); + pr_err("ECCERR_ADDR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0)); + pr_err("ECCERR_CNT_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0)); + pr_err("ECC_CHECK_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0)); + pr_err("MCER0: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0)); + pr_err("MCER1: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1)); + pr_err("BESR: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BEARL: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARL)); + pr_err("BEARH: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARH)); + panic("DDR: Uncorrectable Error\n"); +} + +static irqreturn_t rst_wrn_handler(int irq, void *data) { + u32 crcs = mfcmu(CMUN_CRCS); + switch (crcs & CRCS_STAT_MASK) { + case CRCS_STAT_CHIP_RST_B: + panic("Received chassis-initiated reset request"); + default: + panic("Unknown external reset: CRCS=0x%x", crcs); + } +} + +static void node_irq_request(const char *compat, irq_handler_t errirq_handler) +{ + struct device_node *np; + unsigned int irq; + int32_t rc; + + for_each_compatible_node(np, NULL, compat) { + irq = irq_of_parse_and_map(np, 0); + if (irq == NO_IRQ) { + pr_err("device tree node %s is missing a interrupt", + np->name); + return; + } + + rc = request_irq(irq, errirq_handler, 0, np->name, np); + if (rc) { + pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d", + np->full_name, rc); + return; + } + } +} + +static void critical_irq_setup(void) +{ + node_irq_request(FSP2_CMU_ERR, cmu_err_handler); + node_irq_request(FSP2_BUS_ERR, bus_err_handler); + node_irq_request(FSP2_CONF_ERR, conf_err_handler); + node_irq_request(FSP2_OPBD_ERR, opbd_err_handler); + node_irq_request(FSP2_MCUE, mcue_handler); + node_irq_request(FSP2_RST_WRN, rst_wrn_handler); +} + static int __init fsp2_device_probe(void) { of_platform_bus_probe(NULL, fsp2_of_bus, NULL); @@ -105,11 +302,17 @@ static int __init fsp2_probe(void) return 1; } +static void __init fsp2_irq_init(void) +{ + uic_init_tree(); + critical_irq_setup(); +} + define_machine(fsp2) { .name = "FSP-2", .probe = fsp2_probe, .progress = udbg_progress, - .init_IRQ = uic_init_tree, + .init_IRQ = fsp2_irq_init, .get_irq = uic_get_irq, .restart = ppc4xx_reset_system, .calibrate_decr = generic_calibrate_decr, -- cgit v1.2.3 From acb1feab320e38588fccc568e3767761f494976f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 17 Nov 2017 02:00:50 +1000 Subject: powerpc/64: Don't trace irqs-off at interrupt return to soft-disabled context When an interrupt is returning to a soft-disabled context (which can happen for non-maskable interrupts or synchronous interrupts), it goes through the motions of soft-disabling again, including calling TRACE_DISABLE_INTS (i.e., trace_hardirqs_off()). This is not necessary, because we must already be soft-disabled in the interrupt context, it also may be causing crashes in the irq tracing code to re-enter as an nmi. Replace it with a warning to ensure that soft-interrupts are still disabled. Fixes: 7c0482e3d055 ("powerpc/irq: Fix another case of lazy IRQ state getting out of sync") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3320bcac7192..36878b6ee8b8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -911,9 +911,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) beq 1f rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS stb r7,PACAIRQHAPPENED(r13) -1: li r0,0 - stb r0,PACASOFTIRQEN(r13); - TRACE_DISABLE_INTS +1: +#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) + /* The interrupt should not have soft enabled. */ + lbz r7,PACASOFTIRQEN(r13) +1: tdnei r7,0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING +#endif b .Ldo_restore /* -- cgit v1.2.3 From 5c45b5280196a92c4437f5648209c5bd3f08e882 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Wed, 6 Dec 2017 17:12:28 -0200 Subject: powerpc/modules: Fix alignment of .toc section in kernel modules powerpc64 gcc can generate code that offsets an address, to access part of an object in memory. If the address is a -mcmodel=medium toc pointer relative address then code like the following is possible. addis r9,r2,var@toc@ha ld r3,var@toc@l(r9) ld r4,(var+8)@toc@l(r9) This works fine so long as var is naturally aligned, *and* r2 is sufficiently aligned. If not, there is a possibility that the offset added to access var+8 wraps over a n*64k+32k boundary. Modules don't have any guarantee that r2 is sufficiently aligned. Moreover, code generated by older compilers generates a .toc section with 2**0 alignment, which can result in relocation failures at module load time even without the wrap problem. Thus, this patch links modules with an aligned .toc section (Makefile and module.lds changes), and forces alignment for out of tree modules or those without a .toc section (module_64.c changes). Signed-off-by: Alan Modra [desnesn: updated patch to apply to powerpc-next kernel v4.15 ] Signed-off-by: Desnes A. Nunes do Rosario [mpe: Fix out-of-tree build, swap -256 for ~0xff, reflow comment] Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 1 + arch/powerpc/kernel/module.lds | 8 ++++++++ arch/powerpc/kernel/module_64.c | 16 +++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 arch/powerpc/kernel/module.lds (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1381693a4a51..ccd2556bdb53 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -63,6 +63,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y)) ifdef CONFIG_PPC32 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o else +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/powerpc/kernel/module.lds ifeq ($(call ld-ifversion, -ge, 225000000, y),y) # Have the linker provide sfpr if possible. # There is a corresponding test in arch/powerpc/lib/Makefile diff --git a/arch/powerpc/kernel/module.lds b/arch/powerpc/kernel/module.lds new file mode 100644 index 000000000000..cea5dc124be4 --- /dev/null +++ b/arch/powerpc/kernel/module.lds @@ -0,0 +1,8 @@ +/* Force alignment of .toc section. */ +SECTIONS +{ + .toc 0 : ALIGN(256) + { + *(.got .toc) + } +} diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index e6c011d56629..d056b745743d 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -339,8 +339,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; - else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) + else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { me->arch.toc_section = i; + if (sechdrs[i].sh_addralign < 8) + sechdrs[i].sh_addralign = 8; + } else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); @@ -373,12 +376,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } -/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this - gives the value maximum span in an instruction which uses a signed - offset) */ +/* + * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the + * value maximum span in an instruction which uses a signed offset). Round down + * to a 256 byte boundary for the odd case where we are setting up r2 without a + * .toc section. + */ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { - return sechdrs[me->arch.toc_section].sh_addr + 0x8000; + return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; } /* Both low and high 16 bits are added as SIGNED additions, so if low -- cgit v1.2.3 From 988fc3ba5653278a8c14d6ccf687371775930d2b Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Thu, 9 Nov 2017 08:00:33 -0600 Subject: powerpc/pci: Separate SR-IOV Calls SR-IOV can now be enabled for the powernv platform and pseries platform. Therefore move the appropriate calls to machine dependent code instead of relying on definition at compile time. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 8 +++++++ arch/powerpc/include/asm/pci-bridge.h | 4 +--- arch/powerpc/kernel/eeh_driver.c | 4 ++-- arch/powerpc/kernel/pci-common.c | 23 +++++++++++++++++++ arch/powerpc/kernel/pci_dn.c | 6 ----- arch/powerpc/platforms/powernv/eeh-powernv.c | 33 ++++++++++++++-------------- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++-- 7 files changed, 55 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 73b92017b6d7..4298ceae6db5 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -172,11 +172,19 @@ struct machdep_calls { /* Called after scan and before resource survey */ void (*pcibios_fixup_phb)(struct pci_controller *hose); + /* + * Called after device has been added to bus and + * before sysfs has been created. + */ + void (*pcibios_bus_add_device)(struct pci_dev *pdev); + resource_size_t (*pcibios_default_alignment)(void); #ifdef CONFIG_PCI_IOV void (*pcibios_fixup_sriov)(struct pci_dev *pdev); resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno); + int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs); + int (*pcibios_sriov_disable)(struct pci_dev *pdev); #endif /* CONFIG_PCI_IOV */ /* Called to shutdown machine specific hardware not already controlled diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 62ed83db04ae..9f66ddebb799 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -203,10 +203,9 @@ struct pci_dn { struct eeh_dev *edev; /* eeh device */ #endif #define IODA_INVALID_PE 0xFFFFFFFF -#ifdef CONFIG_PPC_POWERNV unsigned int pe_number; - int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV + int vf_index; /* VF index in the PF */ u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ unsigned int *pe_num_map; /* PE# for the first VF PE or array */ @@ -215,7 +214,6 @@ struct pci_dn { int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ int mps; /* Maximum Payload Size */ -#endif struct list_head child_list; struct list_head list; struct resource holes[PCI_SRIOV_NUM_BARS]; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 4f71e4c9beb7..3c0fa99c5533 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -440,7 +440,7 @@ static void *eeh_add_virt_device(void *data, void *userdata) return NULL; } -#ifdef CONFIG_PPC_POWERNV +#ifdef CONFIG_PCI_IOV pci_iov_add_virtfn(edev->physfn, pdn->vf_index); #endif return NULL; @@ -496,7 +496,7 @@ static void *eeh_rmv_device(void *data, void *userdata) (*removed)++; if (edev->physfn) { -#ifdef CONFIG_PPC_POWERNV +#ifdef CONFIG_PCI_IOV struct pci_dn *pdn = eeh_dev_to_pdn(edev); pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 7cd2803e2cc3..68d18ff38808 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -249,8 +249,31 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno) return pci_iov_resource_size(pdev, resno); } + +int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + if (ppc_md.pcibios_sriov_enable) + return ppc_md.pcibios_sriov_enable(pdev, num_vfs); + + return 0; +} + +int pcibios_sriov_disable(struct pci_dev *pdev) +{ + if (ppc_md.pcibios_sriov_disable) + return ppc_md.pcibios_sriov_disable(pdev); + + return 0; +} + #endif /* CONFIG_PCI_IOV */ +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + if (ppc_md.pcibios_bus_add_device) + ppc_md.pcibios_bus_add_device(pdev); +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f4..ab147a1909c8 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -156,10 +156,8 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; -#ifdef CONFIG_PPC_POWERNV pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; -#endif INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); list_add_tail(&pdn->list, &parent->child_list); @@ -226,9 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) */ if (pdev->is_virtfn) { pdn = pci_get_pdn(pdev); -#ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; -#endif return; } @@ -294,9 +290,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, return NULL; dn->data = pdn; pdn->phb = hose; -#ifdef CONFIG_PPC_POWERNV pdn->pe_number = IODA_INVALID_PE; -#endif regs = of_get_property(dn, "reg", NULL); if (regs) { u32 addr = of_read_number(regs, 1); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4650fb294e7a..961e64115d92 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -43,6 +43,22 @@ static int eeh_event_irq = -EINVAL; +void pnv_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + static int pnv_eeh_init(void) { struct pci_controller *hose; @@ -86,6 +102,7 @@ static int pnv_eeh_init(void) } eeh_set_pe_aux_size(max_diag_size); + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; return 0; } @@ -1749,22 +1766,6 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; -void pcibios_bus_add_device(struct pci_dev *pdev) -{ - struct pci_dn *pdn = pci_get_pdn(pdev); - - if (!pdev->is_virtfn) - return; - - /* - * The following operations will fail if VF's sysfs files - * aren't created or its resources aren't finalized. - */ - eeh_add_device_early(pdn); - eeh_add_device_late(pdev); - eeh_sysfs_add_device(pdev); -} - #ifdef CONFIG_PCI_IOV static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) { diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 080ee202a173..b9146d5aef81 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1692,7 +1692,7 @@ m64_failed: return ret; } -int pcibios_sriov_disable(struct pci_dev *pdev) +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) { pnv_pci_sriov_disable(pdev); @@ -1701,7 +1701,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev) return 0; } -int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); @@ -4019,6 +4019,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; + ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; #endif pci_add_flags(PCI_REASSIGN_ALL_RSRC); -- cgit v1.2.3 From dae7253f9f78a731755ca20c66b2d2c40b86baea Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Thu, 9 Nov 2017 08:00:34 -0600 Subject: powerpc/pseries: Add pseries SR-IOV Machine dependent calls Add calls for pseries platform to configure/deconfigure SR-IOV. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/eeh_pseries.c | 24 +++++++++++++++++++++++ arch/powerpc/platforms/pseries/pci.c | 29 ++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 6b812ad990e4..2295f117e2d3 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -55,6 +55,27 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; +void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + pdn->device_id = pdev->device; + pdn->vendor_id = pdev->vendor; + pdn->class_code = pdev->class; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); + +} + /* * Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -120,6 +141,9 @@ static int pseries_eeh_init(void) /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); + /* Set EEH machine dependent code */ + ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + return 0; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 09eba5a9929a..14a90cf471cc 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -3,17 +3,17 @@ * Copyright (C) 2003 Anton Blanchard , IBM * * pSeries specific routines for PCI. - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -54,10 +54,26 @@ void pcibios_name_device(struct pci_dev *dev) } } } -} +} DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif +#ifdef CONFIG_PCI_IOV +int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_dev_pci_data(pdev); + return 0; +} + +int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +{ + /* Release PCI data */ + remove_dev_pci_data(pdev); + return 0; +} +#endif + static void __init pSeries_request_regions(void) { if (!isa_io_base) @@ -76,6 +92,11 @@ void __init pSeries_final_fixup(void) pSeries_request_regions(); eeh_addr_cache_build(); + +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; +#endif } /* -- cgit v1.2.3 From 608c0d8804ef3ca4cda8ec6ad914e47deb283d7b Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Thu, 9 Nov 2017 08:00:35 -0600 Subject: PCI/IOV: Add pci_vf_drivers_autoprobe() interface Add a pci_vf_drivers_autoprobe() interface. Setting autoprobe to false on the PF prevents drivers from binding to VFs when they are enabled. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pci.c | 2 ++ drivers/pci/iov.c | 11 +++++++++++ include/linux/pci.h | 2 ++ 3 files changed, 15 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 14a90cf471cc..48d3af026f90 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -63,6 +63,7 @@ int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); + pci_vf_drivers_autoprobe(pdev, false); return 0; } @@ -70,6 +71,7 @@ int pseries_pcibios_sriov_disable(struct pci_dev *pdev) { /* Release PCI data */ remove_dev_pci_data(pdev); + pci_vf_drivers_autoprobe(pdev, true); return 0; } #endif diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 6bacb8995e96..0b7b5da63d4e 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -635,6 +635,17 @@ void pci_restore_iov_state(struct pci_dev *dev) sriov_restore_state(dev); } +/** + * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs + * @dev: the PCI device + * @auto_probe: set VF drivers auto probe flag + */ +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe) +{ + if (dev->is_physfn) + dev->sriov->drivers_autoprobe = auto_probe; +} + /** * pci_iov_bus_range - find bus range used by Virtual Function * @bus: the PCI bus diff --git a/include/linux/pci.h b/include/linux/pci.h index 0403894147a3..e3e94467687a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1961,6 +1961,7 @@ int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { @@ -1988,6 +1989,7 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) { return 0; } static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -- cgit v1.2.3 From 60b58afc96c9df71871df2dbad42037757ceef26 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:14 -0800 Subject: bpf: fix net.core.bpf_jit_enable race global bpf_jit_enable variable is tested multiple times in JITs, blinding and verifier core. The malicious root can try to toggle it while loading the programs. This race condition was accounted for and there should be no issues, but it's safer to avoid this race condition. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 2 +- arch/mips/net/ebpf_jit.c | 2 +- arch/powerpc/net/bpf_jit_comp64.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp_64.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 5 +++-- kernel/bpf/core.c | 3 ++- kernel/bpf/verifier.c | 2 +- 10 files changed, 13 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c199990e12b6..4425189bb24c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1824,7 +1824,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* If BPF JIT was not enabled then we must fall back to * the interpreter. */ - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; /* If constant blinding was enabled and we failed during blinding diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ba38d403abb2..288137cb0871 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -844,7 +844,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int image_size; u8 *image_ptr; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 962b0259b4b6..97069a1b6f43 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1869,7 +1869,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!bpf_jit_enable || !cpu_has_mips64r2) + if (!prog->jit_requested || !cpu_has_mips64r2) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d5a5bc43cf8f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -993,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; - if (!bpf_jit_enable) + if (!fp->jit_requested) return org_fp; tmp_fp = bpf_jit_blind_constants(org_fp); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90..f4baa8c514d3 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1300,7 +1300,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (!bpf_jit_enable) + if (!fp->jit_requested) return orig_fp; tmp = bpf_jit_blind_constants(fp); diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 5765e7e711f7..a2f1b5e774a7 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1517,7 +1517,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) u8 *image_ptr; int pass; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0554e8aef4d5..68859b58ab84 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1121,7 +1121,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int pass; int i; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); diff --git a/include/linux/filter.h b/include/linux/filter.h index f26e6da1007b..3d6edc34932c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -458,6 +458,7 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ + jit_requested:1,/* archs need to JIT the prog */ locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ @@ -804,7 +805,7 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) return fp->jited && bpf_jit_is_ebpf(); } -static inline bool bpf_jit_blinding_enabled(void) +static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) { /* These are the prerequisites, should someone ever have the * idea to call blinding outside of them, we make sure to @@ -812,7 +813,7 @@ static inline bool bpf_jit_blinding_enabled(void) */ if (!bpf_jit_is_ebpf()) return false; - if (!bpf_jit_enable) + if (!prog->jit_requested) return false; if (!bpf_jit_harden) return false; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index dc12c4fd006e..bda911644b1c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -94,6 +94,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->pages = size / PAGE_SIZE; fp->aux = aux; fp->aux->prog = fp; + fp->jit_requested = ebpf_jit_enabled(); INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); @@ -721,7 +722,7 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) struct bpf_insn *insn; int i, rewritten; - if (!bpf_jit_blinding_enabled()) + if (!bpf_jit_blinding_enabled(prog)) return prog; clone = bpf_prog_clone_create(prog, GFP_USER); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cdc1f043c69b..8e0e4cd0d5e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5080,7 +5080,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * handlers are currently limited to 64 bit only. */ - if (ebpf_jit_enabled() && BITS_PER_LONG == 64 && + if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_map_lookup_elem) { map_ptr = env->insn_aux_data[i + delta].map_ptr; if (map_ptr == BPF_MAP_PTR_POISON || -- cgit v1.2.3 From dd1ea5763e65c69983697dc97ea9d6e3cb9df922 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 19:52:29 +0200 Subject: powerpc/pci: Use for_each_pci_bridge() helper Use for_each_pci_bridge() helper to make the code slightly cleaner. No functional change intended. Requires: 24a0c654d7d6 ("PCI: Add for_each_pci_bridge() helper") Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci-hotplug.c | 7 ++----- arch/powerpc/kernel/pci_of_scan.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 2d71269e7dc1..741f47295188 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -134,11 +134,8 @@ void pci_hp_add_devices(struct pci_bus *bus) pcibios_setup_bus_devices(bus); max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) { - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) - max = pci_scan_bridge(bus, dev, - max, pass); - } + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, pass); } } pcibios_finish_adding_to_bus(bus); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 0d790f8432d2..8bdaa2a6fa62 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -369,11 +369,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, pcibios_setup_bus_devices(bus); /* Now scan child busses */ - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) { - of_scan_pci_bridge(dev); - } - } + for_each_pci_bridge(dev, bus) + of_scan_pci_bridge(dev); } /** -- cgit v1.2.3 From 9d987eba8ed794b9fcf0d08cf1b312d2ece598de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 19:52:30 +0200 Subject: powerpc/pci: Unroll two pass loop when scanning bridges The current scanning code is really hard to understand because it calls the same function in a loop where pass value is changed without any comments explaining it: for (pass = 0; pass < 2; pass++) for_each_pci_bridge(dev, bus) max = pci_scan_bridge(bus, dev, max, pass); Unfamiliar reader cannot tell easily what is the purpose of this loop without looking at internals of pci_scan_bridge(). In order to make this bit easier to understand, open-code the loop in pci_scan_child_bus() and pci_hp_add_bridge() with added comments. No functional changes intended. Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- arch/powerpc/kernel/pci-hotplug.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 741f47295188..cf47b1aec4c2 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, pass, max; + int slotno, mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -133,10 +133,17 @@ void pci_hp_add_devices(struct pci_bus *bus) pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); pcibios_setup_bus_devices(bus); max = bus->busn_res.start; - for (pass = 0; pass < 2; pass++) { - for_each_pci_bridge(dev, bus) - max = pci_scan_bridge(bus, dev, max, pass); - } + /* + * Scan bridges that are already configured. We don't touch + * them unless they are misconfigured (which will be done in + * the second scan below). + */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 0); + + /* Scan bridges that need to be reconfigured */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 1); } pcibios_finish_adding_to_bus(bus); } -- cgit v1.2.3 From 961292e664e2ebfb56cf0733d4587711d7bc4c69 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 4 Dec 2017 18:08:01 -0600 Subject: powerpc: Set I/O port resource types correctly Set I/O port resource structs to have IORESOURCE_IO in their type field. Previously we marked these as merely IORESOURCE_BUSY without indicating the type. Setting the type doesn't fix any functional problem but makes %pR on the resource work better. Signed-off-by: Bjorn Helgaas --- arch/powerpc/platforms/maple/time.c | 2 +- arch/powerpc/sysdev/i8259.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 81799d70a1ee..cfddc87f81bf 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -134,7 +134,7 @@ int maple_set_rtc_time(struct rtc_time *tm) static struct resource rtc_iores = { .name = "rtc", - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; unsigned long __init maple_get_boot_time(void) diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index bafb014e1a7e..cb9a8b71fd0f 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -145,21 +145,21 @@ static struct resource pic1_iores = { .name = "8259 (master)", .start = 0x20, .end = 0x21, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic2_iores = { .name = "8259 (slave)", .start = 0xa0, .end = 0xa1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic_edgectrl_iores = { .name = "8259 edge control", .start = 0x4d0, .end = 0x4d1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static int i8259_host_match(struct irq_domain *h, struct device_node *node, -- cgit v1.2.3 From 59aa31fd6f964ac63ac9cb90c8468f60e9bb0756 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:45 -0800 Subject: powerpc: introduce pte_set_hidx() helper Introduce pte_set_hidx().It sets the (H_PAGE_F_SECOND|H_PAGE_F_GIX) bits at the appropriate location in the PTE of 4K PTE. For 64K PTE, it sets the bits in the second part of the PTE. Though the implementation for the former just needs the slot parameter, it does take some additional parameters to keep the prototype consistent. This function will be handy as we work towards re-arranging the bits in the subsequent patches. Acked-by: Balbir Singh Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 14 ++++++++++++++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0..2975fc172d1a 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -49,6 +49,20 @@ static inline int hash__hugepd_ok(hugepd_t hpd) } #endif +/* + * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just + * a matter of returning the PTE bits that need to be modified. On 64K PTE, + * things are a little more involved and hence needs many more parameters to + * accomplish the same. However we want to abstract this out from the caller by + * keeping the prototype consistent across the two formats. + */ +static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, + unsigned int subpg_index, unsigned long hidx) +{ + return (hidx << H_PAGE_F_GIX_SHIFT) & + (H_PAGE_F_SECOND | H_PAGE_F_GIX); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline char *get_hpte_slot_array(pmd_t *pmdp) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67..c1bd258b7303 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -68,6 +68,8 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) return rpte; } +#define HIDX_BITS(x, index) (x << (index << 2)) + static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { if ((pte_val(rpte.pte) & H_PAGE_COMBO)) @@ -75,6 +77,29 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; } +/* + * Commit the hidx and return PTE bits that needs to be modified. The caller is + * expected to modify the PTE bits accordingly and commit the PTE to memory. + */ +static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, + unsigned int subpg_index, unsigned long hidx) +{ + unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + + rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); + *hidxp = rpte.hidx | HIDX_BITS(hidx, subpg_index); + + /* + * Anyone reading PTE must ensure hidx bits are read after reading the + * PTE by using the read-side barrier smp_rmb(). __real_pte() can be + * used for that. + */ + smp_wmb(); + + /* No PTE bits to be modified, return 0x0UL */ + return 0x0UL; +} + #define __rpte_to_pte(r) ((r).pte) extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); /* -- cgit v1.2.3 From 318995b4f5fa814b9f9aa434ca845b9a2cb0ae02 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:46 -0800 Subject: powerpc: introduce pte_get_hash_gslot() helper Introduce pte_get_hash_gslot()() which returns the global slot number of the HPTE in the global hash table. This function will come in handy as we work towards re-arranging the PTE bits in the later patches. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 3 +++ arch/powerpc/mm/hash_utils_64.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index ecb1239d74f4..9099c1f3a54e 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -167,6 +167,9 @@ static inline int hash__pte_none(pte_t pte) return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; } +unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, + int ssize, real_pte_t rpte, unsigned int subpg_index); + /* This low level function performs the actual PTE insertion * Setting the PTE depends on the MMU type and other factors. It's * an horrible mess that I'm not going to try to clean up now but diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 655a5a9a183d..b35bd0202934 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1592,6 +1592,24 @@ static inline void tm_flush_hash_page(int local) } #endif +/* + * Return the global hash slot, corresponding to the given PTE, which contains + * the HPTE. + */ +unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, + int ssize, real_pte_t rpte, unsigned int subpg_index) +{ + unsigned long hash, gslot, hidx; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(rpte, subpg_index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + gslot += hidx & _PTEIDX_GROUP_IX; + return gslot; +} + /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ -- cgit v1.2.3 From 9d2edb18486febea930ea028dd128544cc28f3fe Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:47 -0800 Subject: powerpc: Free up four 64K PTE bits in 4K backed HPTE pages Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6, in the 4K backed HPTE pages.These bits continue to be used for 64K backed HPTE pages in this patch, but will be freed up in the next patch. The bit numbers are big-endian as defined in the ISA3.0 The patch does the following change to the 4k HTPE backed 64K PTE's format. H_PAGE_BUSY moves from bit 3 to bit 9 (B bit in the figure below) V0 which occupied bit 4 is not used anymore. V1 which occupied bit 5 is not used anymore. V2 which occupied bit 6 is not used anymore. V3 which occupied bit 7 is not used anymore. Before the patch, the 4k backed 64k PTE format was as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x|B|V0|V1|V2|V3|x| | |x|x|................|x|x|x|x| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' After the patch, the 4k backed 64k PTE format is as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x| | | | | |x|B| |x|x|................|.|.|.|.| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' the four bits S,G,I,X (one quadruplet per 4k HPTE) that cache the hash-bucket slot value, is initialized to 1,1,1,1 indicating -- an invalid slot. If a HPTE gets cached in a 1111 slot(i.e 7th slot of secondary hash bucket), it is released immediately. In other words, even though 1111 is a valid slot value in the hash bucket, we consider it invalid and release the slot and the HPTE. This gives us the opportunity to determine the validity of S,G,I,X bits based on its contents and not on any of the bits V0,V1,V2 or V3 in the primary PTE When we release a HPTE cached in the 1111 slot we also release a legitimate slot in the primary hash bucket and unmap its corresponding HPTE. This is to ensure that we do get a HPTE cached in a slot of the primary hash bucket, the next time we retry. Though treating 1111 slot as invalid, reduces the number of available slots in the hash bucket and may have an effect on the performance, the probabilty of hitting a 1111 slot is extermely low. Compared to the current scheme, the above scheme reduces the number of false hash table updates significantly and has the added advantage of releasing four valuable PTE bits for other purpose. NOTE:even though bits 3, 4, 5, 6, 7 are not used when the 64K PTE is backed by 4k HPTE, they continue to be used if the PTE gets backed by 64k HPTE. The next patch will decouple that aswell, and truely release the bits. This idea was jointly developed by Paul Mackerras, Aneesh, Michael Ellermen and myself. 4K PTE format remains unchanged currently. The patch does the following code changes a) PTE flags are split between 64k and 4k header files. b) __hash_page_4K() is reimplemented to reflect the above logic. Acked-by: Balbir Singh Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 + arch/powerpc/include/asm/book3s/64/hash-64k.h | 9 +-- arch/powerpc/include/asm/book3s/64/hash.h | 1 - arch/powerpc/mm/hash64_64k.c | 102 +++++++++++++------------- arch/powerpc/mm/hash_utils_64.c | 4 +- 5 files changed, 61 insertions(+), 57 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 2975fc172d1a..e0130c34766e 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -17,6 +17,8 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) +#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ + /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index c1bd258b7303..c929e6437a60 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -13,18 +13,14 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ +#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ + /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details */ #define H_PAGE_THP_HUGE H_PAGE_4K_PFN -/* - * Used to track subpage group valid if H_PAGE_COMBO is set - * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND - */ -#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) - /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) @@ -69,6 +65,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) } #define HIDX_BITS(x, index) (x << (index << 2)) +#define INVALID_RPTE_HIDX ~(0x0UL) static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 9099c1f3a54e..4e19a612a963 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -10,7 +10,6 @@ */ #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS #define H_PAGE_F_GIX_SHIFT 56 -#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ #define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ #define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 1a68cb19b0e3..b79f50c693cf 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -15,34 +15,22 @@ #include #include #include + /* - * index from 0 - 15 + * Return true, if the entry has a slot value which + * the software considers as invalid. */ -bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) +static inline bool hpte_soft_invalid(unsigned long hidx) { - unsigned long g_idx; - unsigned long ptev = pte_val(rpte.pte); - - g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT; - index = index >> 2; - if (g_idx & (0x1 << index)) - return true; - else - return false; + return ((hidx & 0xfUL) == 0xfUL); } + /* * index from 0 - 15 */ -static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index) +bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) { - unsigned long g_idx; - - if (!(ptev & H_PAGE_COMBO)) - return ptev; - index = index >> 2; - g_idx = 0x1 << index; - - return ptev | (g_idx << H_PAGE_F_GIX_SHIFT); + return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index))); } int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, @@ -50,12 +38,11 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, int ssize, int subpg_prot) { real_pte_t rpte; - unsigned long *hidxp; unsigned long hpte_group; unsigned int subpg_index; - unsigned long rflags, pa, hidx; + unsigned long rflags, pa; unsigned long old_pte, new_pte, subpg_pte; - unsigned long vpn, hash, slot; + unsigned long vpn, hash, slot, gslot; unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift; /* @@ -126,18 +113,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, if (__rpte_sub_valid(rpte, subpg_index)) { int ret; - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(rpte, subpg_index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - - ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, + subpg_index); + ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K, MMU_PAGE_4K, ssize, flags); + /* - *if we failed because typically the HPTE wasn't really here + * If we failed because typically the HPTE wasn't really here * we try an insertion. */ if (ret == -1) @@ -148,6 +131,14 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, } htab_insert_hpte: + + /* + * Initialize all hidx entries to invalid value, the first time + * the PTE is about to allocate a 4K HPTE. + */ + if (!(old_pte & H_PAGE_COMBO)) + rpte.hidx = INVALID_RPTE_HIDX; + /* * handle H_PAGE_4K_PFN case */ @@ -172,15 +163,39 @@ repeat: * Primary is full, try the secondary */ if (unlikely(slot == -1)) { + bool soft_invalid; + hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, MMU_PAGE_4K, MMU_PAGE_4K, ssize); - if (slot == -1) { - if (mftb() & 0x1) + + soft_invalid = hpte_soft_invalid(slot); + if (unlikely(soft_invalid)) { + /* + * We got a valid slot from a hardware point of view. + * but we cannot use it, because we use this special + * value; as defined by hpte_soft_invalid(), to track + * invalid slots. We cannot use it. So invalidate it. + */ + gslot = slot & _PTEIDX_GROUP_IX; + mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, 0); + } + + if (unlikely(slot == -1 || soft_invalid)) { + /* + * For soft invalid slot, let's ensure that we release a + * slot from the primary, with the hope that we will + * acquire that slot next time we try. This will ensure + * that we do not get the same soft-invalid slot. + */ + if (soft_invalid || (mftb() & 0x1)) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? @@ -198,21 +213,10 @@ repeat: MMU_PAGE_4K, MMU_PAGE_4K, old_pte); return -1; } - /* - * Insert slot number & secondary bit in PTE second half, - * clear H_PAGE_BUSY and set appropriate HPTE slot bit - * Since we have H_PAGE_BUSY set on ptep, we can be sure - * nobody is undating hidx. - */ - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); - rpte.hidx &= ~(0xfUL << (subpg_index << 2)); - *hidxp = rpte.hidx | (slot << (subpg_index << 2)); - new_pte = mark_subptegroup_valid(new_pte, subpg_index); - new_pte |= H_PAGE_HASHPTE; - /* - * check __real_pte for details on matching smp_rmb() - */ - smp_wmb(); + + new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot); + new_pte |= H_PAGE_HASHPTE; + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b35bd0202934..88102b6c664c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -979,8 +979,9 @@ void __init hash__early_init_devtree(void) void __init hash__early_init_mmu(void) { +#ifndef CONFIG_PPC_64K_PAGES /* - * We have code in __hash_page_64K() and elsewhere, which assumes it can + * We have code in __hash_page_4K() and elsewhere, which assumes it can * do the following: * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); * @@ -991,6 +992,7 @@ void __init hash__early_init_mmu(void) * with a BUILD_BUG_ON(). */ BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3))); +#endif /* CONFIG_PPC_64K_PAGES */ htab_init_page_sizes(); -- cgit v1.2.3 From bf9a95f9a6481bc6ec98ef7b328c14177eeb3492 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:48 -0800 Subject: powerpc: Free up four 64K PTE bits in 64K backed HPTE pages Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6 in the 64K backed HPTE pages. This along with the earlier patch will entirely free up the four bits from 64K PTE. The bit numbers are big-endian as defined in the ISA3.0 This patch does the following change to 64K PTE backed by 64K HPTE. H_PAGE_F_SECOND (S) which occupied bit 4 moves to the second part of the pte to bit 60. H_PAGE_F_GIX (G,I,X) which occupied bit 5, 6 and 7 also moves to the second part of the pte to bit 61, 62, 63, 64 respectively since bit 7 is now freed up, we move H_PAGE_BUSY (B) from bit 9 to bit 7. The second part of the PTE will hold (H_PAGE_F_SECOND|H_PAGE_F_GIX) at bit 60,61,62,63. NOTE: None of the bits in the secondary PTE were not used by 64k-HPTE backed PTE. Before the patch, the 64K HPTE backed 64k PTE format was as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x| |S |G |I |X |x|B| |x|x|................|x|x|x|x| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' | | | | | | | | | | | | |..................| | | | | <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' After the patch, the 64k HPTE backed 64k PTE format is as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x| | | | |B |x| | |x|x|................|.|.|.|.| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' | | | | | | | | | | | | |..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' The above PTE changes is applicable to hugetlbpages aswell. The patch does the following code changes: a) moves the H_PAGE_F_SECOND and H_PAGE_F_GIX to 4k PTE header since it is no more needed b the 64k PTEs. b) abstracts out __real_pte() and __rpte_to_hidx() so the caller need not know the bit location of the slot. c) moves the slot bits to the secondary pte. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 3 +++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 30 +++++++++++++-------------- arch/powerpc/include/asm/book3s/64/hash.h | 3 --- arch/powerpc/mm/hash64_64k.c | 21 +++++++++---------- arch/powerpc/mm/hugetlbpage-hash64.c | 16 ++++++-------- 5 files changed, 33 insertions(+), 40 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index e0130c34766e..ed0d1cd98907 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -17,6 +17,9 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) +#define H_PAGE_F_GIX_SHIFT 56 +#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ /* PTE flags to conserve for HPTE identification */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index c929e6437a60..73ed988e6da5 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -13,7 +13,7 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ -#define H_PAGE_BUSY _RPAGE_RPN42 /* software: PTE & hash are busy */ +#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ /* * We need to differentiate between explicit huge page and THP huge @@ -22,8 +22,7 @@ #define H_PAGE_THP_HUGE H_PAGE_4K_PFN /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ - H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) /* * we support 16 fragments per PTE page of 64K size. */ @@ -51,27 +50,26 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) unsigned long *hidxp; rpte.pte = pte; - rpte.hidx = 0; - if (pte_val(pte) & H_PAGE_COMBO) { - /* - * Make sure we order the hidx load against the H_PAGE_COMBO - * check. The store side ordering is done in __hash_page_4K - */ - smp_rmb(); - hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); - rpte.hidx = *hidxp; - } + + /* + * Ensure that we do not read the hidx before we read the PTE. Because + * the writer side is expected to finish writing the hidx first followed + * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that. + */ + smp_rmb(); + + hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); + rpte.hidx = *hidxp; return rpte; } #define HIDX_BITS(x, index) (x << (index << 2)) +#define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL) #define INVALID_RPTE_HIDX ~(0x0UL) static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - if ((pte_val(rpte.pte) & H_PAGE_COMBO)) - return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; + return BITS_TO_HIDX(rpte.hidx, index); } /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 4e19a612a963..e72474f13936 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -9,9 +9,6 @@ * */ #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS -#define H_PAGE_F_GIX_SHIFT 56 -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ #ifdef CONFIG_PPC_64K_PAGES diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index b79f50c693cf..2253bbc6a599 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -103,8 +103,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * On hash insert failure we use old pte value and we don't * want slot information there if we have a insert failure. */ - old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); - new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); + old_pte &= ~H_PAGE_HASHPTE; + new_pte &= ~H_PAGE_HASHPTE; goto htab_insert_hpte; } /* @@ -225,6 +225,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize) { + real_pte_t rpte; unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; @@ -261,6 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -268,16 +270,13 @@ int __hash_page_64K(unsigned long ea, unsigned long access, vpn = hpt_vpn(ea, vsid, ssize); if (unlikely(old_pte & H_PAGE_HASHPTE)) { + unsigned long gslot; + /* * There MIGHT be an HPTE for this pte */ - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, MMU_PAGE_64K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; @@ -326,9 +325,9 @@ repeat: MMU_PAGE_64K, MMU_PAGE_64K, old_pte); return -1; } + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0c2a91df3210..12511f5a015f 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -23,6 +23,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) { + real_pte_t rpte; unsigned long vpn; unsigned long old_pte, new_pte; unsigned long rflags, pa, sz; @@ -62,6 +63,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -72,15 +74,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ - unsigned long hash, slot; + unsigned long gslot; - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize, mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -107,8 +104,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } /* -- cgit v1.2.3 From 7b84947cadf18f9a7763f281509db1f24073b4af Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:49 -0800 Subject: powerpc: shifted-by-one hidx value 0xf is considered invalid hidx value. It indicates absence of a backing HPTE. A PTE is initialized to 0xf either a) when it is new it is newly allocated to hold 4k-backing-HPTE or b) Any time it gets demoted to a 4k-backing-HPTE This patch shifts the representation by one-modulo-0xf; i.e hidx 0 is represented as 1, 1 as 2,... , and 0xf as 0. This convention lets us initialize the secondary-part of the PTE to all zeroes. PTEs are anyway zero'd when allocated. We do not have to zero them again; thus saving on the initialization. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 73ed988e6da5..b78d94e5249b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -63,13 +63,21 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) return rpte; } +/* + * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented + * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a + * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when + * allocated. We dont have to zero them gain; thus save on the initialization. + */ +#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */ +#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */ #define HIDX_BITS(x, index) (x << (index << 2)) #define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL) -#define INVALID_RPTE_HIDX ~(0x0UL) +#define INVALID_RPTE_HIDX 0x0UL static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - return BITS_TO_HIDX(rpte.hidx, index); + return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index)); } /* @@ -82,7 +90,7 @@ static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte, unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index); - *hidxp = rpte.hidx | HIDX_BITS(hidx, subpg_index); + *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index); /* * Anyone reading PTE must ensure hidx bits are read after reading the -- cgit v1.2.3 From 273b49368966559dd155323c64ec3421e8f4049d Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:50 -0800 Subject: powerpc: Swizzle around 4K PTE bits to free up bit 5 and bit 6 We need PTE bits 3 ,4, 5, 6 and 57 to support protection-keys, because these are the bits we want to consolidate on across all configuration to support protection keys. Bit 3,4,5 and 6 are currently used on 4K-pte kernels. But bit 9 and 10 are available. Hence we use the two available bits and free up bit 5 and 6. We will still not be able to free up bit 3 and 4. In the absence of any other free bits, we will have to stay satisfied with what we have :-(. This means we will not be able to support 32 protection keys, but only 8. The bit numbers are big-endian as defined in the ISA3.0 This patch does the following change to 4K PTE. H_PAGE_F_SECOND (S) which occupied bit 4 moves to bit 7. H_PAGE_F_GIX (G,I,X) which occupied bit 5, 6 and 7 also moves to bit 8,9, 10 respectively. H_PAGE_HASHPTE (H) which occupied bit 8 moves to bit 4. Before the patch, the 4k PTE format was as follows 0 1 2 3 4 5 6 7 8 9 10....................57.....63 : : : : : : : : : : : : : v v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x|B|S |G |I |X |H| | |x|x|................| |x|x|x| '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' After the patch, the 4k PTE format is as follows 0 1 2 3 4 5 6 7 8 9 10....................57.....63 : : : : : : : : : : : : : v v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x|B|H | | |S |G|I|X|x|x|................| |.|.|.| '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' The patch has no code changes; just swizzles around bits. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 7 ++++--- arch/powerpc/include/asm/book3s/64/hash-64k.h | 1 + arch/powerpc/include/asm/book3s/64/hash.h | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index ed0d1cd98907..8a91f2475ea3 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -17,10 +17,11 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -#define H_PAGE_F_GIX_SHIFT 56 -#define H_PAGE_F_SECOND _RPAGE_RSV2 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RSV3 | _RPAGE_RSV4 | _RPAGE_RPN44) +#define H_PAGE_F_GIX_SHIFT 53 +#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) #define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ +#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */ /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index b78d94e5249b..62e101b43cf6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -14,6 +14,7 @@ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ #define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ +#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ /* * We need to differentiate between explicit huge page and THP huge diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index e72474f13936..0920eff731b3 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -9,7 +9,6 @@ * */ #define H_PTE_NONE_MASK _PAGE_HPTEFLAGS -#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ #ifdef CONFIG_PPC_64K_PAGES #include -- cgit v1.2.3 From a8548686463b67d66084d51a2737dc8d49369876 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:51 -0800 Subject: powerpc: use helper functions to get and set hash slots replace redundant code in __hash_page_4K() and flush_hash_page() with helper functions pte_get_hash_gslot() and pte_set_hidx() Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash64_4k.c | 14 ++++++-------- arch/powerpc/mm/hash_utils_64.c | 13 ++++--------- 2 files changed, 10 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6fa450c12d6d..5a69b51d08a3 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -20,6 +20,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) { + real_pte_t rpte; unsigned long hpte_group; unsigned long rflags, pa; unsigned long old_pte, new_pte; @@ -54,6 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, * need to add in 0x1 if it's a read-only user page */ rflags = htab_convert_pte_flags(new_pte); + rpte = __real_pte(__pte(old_pte), ptep); if (cpu_has_feature(CPU_FTR_NOEXECUTE) && !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) @@ -64,13 +66,10 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, /* * There MIGHT be an HPTE for this pte */ - hash = hpt_hash(vpn, shift, ssize); - if (old_pte & H_PAGE_F_SECOND) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; + unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize, + rpte, 0); - if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K, MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -118,8 +117,7 @@ repeat: return -1; } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; - new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & - (H_PAGE_F_SECOND | H_PAGE_F_GIX); + new_pte |= pte_set_hidx(ptep, rpte, 0, slot); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88102b6c664c..0c802ded6f21 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1618,23 +1618,18 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { - unsigned long hash, index, shift, hidx, slot; + unsigned long index, shift, gslot; int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(pte, index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); + gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index); + DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot); /* * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize, ssize, local); } pte_iterate_hashed_end(); -- cgit v1.2.3 From 7e4363550c2599a90f55fe3c538a7cf99fe4f473 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:52 -0800 Subject: powerpc: capture the PTE format changes in the dump pte report The H_PAGE_F_SECOND,H_PAGE_F_GIX are not in the 64K main-PTE. capture these changes in the dump pte report. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/dump_linuxpagetables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index c2e7dea59490..139dd1993eeb 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -213,7 +213,7 @@ static const struct flag_info flag_array[] = { .val = H_PAGE_4K_PFN, .set = "4K_pfn", }, { -#endif +#else /* CONFIG_PPC_64K_PAGES */ .mask = H_PAGE_F_GIX, .val = H_PAGE_F_GIX, .set = "f_gix", @@ -224,6 +224,7 @@ static const struct flag_info flag_array[] = { .val = H_PAGE_F_SECOND, .set = "f_second", }, { +#endif /* CONFIG_PPC_64K_PAGES */ #endif .mask = _PAGE_SPECIAL, .val = _PAGE_SPECIAL, -- cgit v1.2.3 From 5fa5b16be5b319184378870467352eab6700b1de Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 4 Dec 2017 07:49:10 +0530 Subject: powerpc/mm/hugetlb: Use pte_access_permitted for hugetlb access check No functional change in this patch. This update gup_hugepte to use the helper. This will help later when we add memory keys. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hugetlbpage.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a9b9083c5e49..c7e5afe5e118 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -855,9 +855,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_read(pte)) - return 0; - if (write && !pte_write(pte)) + if (!pte_access_permitted(pte, write)) return 0; /* hugepages are never "special" */ -- cgit v1.2.3 From f72a85e347810c76eb246e68751233aad3b84ac8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 4 Dec 2017 07:49:11 +0530 Subject: powerpc/mm/book3s/64: Add proper pte access check helper pte_access_premitted get called in get_user_pages_fast path. If we have marked the pte PROT_NONE, we should not allow a read access on the address. With the current implementation we are not checking the READ and only check for WRITE. This is needed on archs like ppc64 that implement PROT_NONE using RWX access instead of _PAGE_PRESENT. Also add pte_user check just to make sure we are not accessing kernel mapping. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 44697817ccc6..23b03449e2f1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -546,6 +546,30 @@ static inline int pte_present(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* Also check for pte_user */ + unsigned long clear_pte_bits = _PAGE_PRIVILEGED; + /* + * _PAGE_READ is needed for any access and will be + * cleared for PROT_NONE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + if ((pteval & clear_pte_bits) == clear_pte_bits) + return false; + return true; +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -850,6 +874,11 @@ static inline int pud_bad(pud_t pud) return hash__pud_bad(pud); } +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return pte_access_permitted(pud_pte(pud), write); +} #define pgd_write(pgd) pte_write(pgd_pte(pgd)) static inline void pgd_set(pgd_t *pgdp, unsigned long val) @@ -889,6 +918,12 @@ static inline int pgd_bad(pgd_t pgd) return hash__pgd_bad(pgd); } +#define pgd_access_permitted pgd_access_permitted +static inline bool pgd_access_permitted(pgd_t pgd, bool write) +{ + return pte_access_permitted(pgd_pte(pgd), write); +} + extern struct page *pgd_page(pgd_t pgd); /* Pointers in the page table tree are physical addresses */ @@ -1009,6 +1044,12 @@ static inline int pmd_protnone(pmd_t pmd) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return pte_access_permitted(pmd_pte(pmd), write); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); -- cgit v1.2.3 From 5769beaf180a892c3fea92937954727fb912bded Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 4 Dec 2017 07:49:12 +0530 Subject: powerpc/mm: Add proper pte access check helper for other platforms pte_access_premitted get called in get_user_pages_fast path. If we have marked the pte PROT_NONE, we should not allow a read access on the address. With the current implementation we are not checking the READ and only check for WRITE. This is needed on archs like ppc64 that implement PROT_NONE using _PAGE_USER access instead of _PAGE_PRESENT. Also add pte_user check just to make sure we are not accessing kernel mapping. Even though there is code duplication, keeping the low level pte accessors different for different platforms helps in code readability. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 23 +++++++++++++++++++++++ arch/powerpc/include/asm/nohash/pgtable.h | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 016579ef16d3..30a155c0a6b0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -311,6 +311,29 @@ static inline int pte_present(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +/* + * We only find page table entry in the last level + * Hence no need for other accessors + */ +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* + * A read-only access is controlled by _PAGE_USER bit. + * We have _PAGE_READ set for WRITE and EXECUTE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + return true; +} + /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 5c68f4a59f75..fc4376c8d444 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -45,6 +45,29 @@ static inline int pte_present(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +/* + * We only find page table entry in the last level + * Hence no need for other accessors + */ +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* + * A read-only access is controlled by _PAGE_USER bit. + * We have _PAGE_READ set for WRITE and EXECUTE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + return true; +} + /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * -- cgit v1.2.3 From e0af0c161000fa5914198d4cd1183bced68378f5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 2 Jan 2018 15:50:06 -0800 Subject: arch: Remove clkdev.h asm-generic from Kbuild Now that every architecture is using the generic clkdev.h file and we no longer include asm/clkdev.h anywhere in the tree, we can remove it. Cc: Russell King Acked-by: Arnd Bergmann Cc: Acked-by: Geert Uytterhoeven [m68k] Signed-off-by: Stephen Boyd --- arch/alpha/include/asm/Kbuild | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/blackfin/include/asm/Kbuild | 1 - arch/c6x/include/asm/Kbuild | 1 - arch/cris/include/asm/Kbuild | 1 - arch/frv/include/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/m32r/include/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/metag/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mn10300/include/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/riscv/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/score/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/tile/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/unicore32/include/asm/Kbuild | 1 - arch/x86/include/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/clkdev.h | 30 ------------------------------ 32 files changed, 61 deletions(-) delete mode 100644 include/asm-generic/clkdev.h (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 47f3fba3e41f..9b68790013e2 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -generic-y += clkdev.h generic-y += exec.h generic-y += export.h generic-y += fb.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index cd8398daccee..4bd5d4369e05 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += bugs.h -generic-y += clkdev.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 0f2c8a2a8131..873e3c189279 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += clkdev.h generic-y += current.h generic-y += early_ioremap.h generic-y += emergency-restart.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index e63d0a8312de..3a9b84d39d71 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += bugs.h -generic-y += clkdev.h generic-y += delay.h generic-y += div64.h generic-y += dma.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 40e44231743f..fe736973630f 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += bugs.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index d717329c8cf9..fd4c840de837 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += atomic.h generic-y += barrier.h generic-y += bugs.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 460349cb147f..6c14021d0f89 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += atomic.h generic-y += barrier.h -generic-y += clkdev.h generic-y += cmpxchg.h generic-y += current.h generic-y += device.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 2cf7648787b2..b16b9c48ea09 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,5 +1,4 @@ -generic-y += clkdev.h generic-y += device.h generic-y += exec.h generic-y += extable.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index bc077491d299..bdd7776c8d91 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -3,7 +3,6 @@ generic-y += barrier.h generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h -generic-y += clkdev.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 4a23e527d651..e9743f689fb8 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 1d7641f891e1..6dd867873364 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += clkdev.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 7e11b125c35e..f1b50129150b 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += clkdev.h generic-y += current.h generic-y += exec.h generic-y += extable.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 59d6d0d38f67..88a9d27df1ac 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += barrier.h -generic-y += clkdev.h generic-y += device.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 3fba97ed9bb2..913c779979df 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += bugs.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += dma.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9d66f7793841..3c80a5a308ed 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += barrier.h generic-y += bitops.h generic-y += bug.h generic-y += bugs.h -generic-y += clkdev.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7c8aab23bce8..2f9e2b53a085 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,6 +1,5 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h -generic-y += clkdev.h generic-y += current.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index db5b57829a81..509c45a75d1f 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += barrier.h -generic-y += clkdev.h generic-y += device.h generic-y += exec.h generic-y += extable.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 896c26ae0da9..d232da2cbb38 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -3,7 +3,6 @@ generic-y += barrier.h generic-y += bitops.h generic-y += bug.h generic-y += bugs.h -generic-y += clkdev.h generic-y += cmpxchg.h generic-y += current.h generic-y += device.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 6eb16719549e..f05c722a21f8 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h generic-y += checksum.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index a41139575ab4..2013d639e735 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += barrier.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 2542ea15d338..3196d227e351 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += clkdev.h generic-y += div64.h generic-y += export.h generic-y += irq_regs.h diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 18158be62a2b..04846be29947 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h -generic-y += clkdev.h generic-y += cputime.h generic-y += device.h generic-y += div64.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 41c211a4d8b1..d0abf0bac21b 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += asm-offsets.h generic-y += cacheflush.h -generic-y += clkdev.h generic-y += device.h generic-y += dma-contiguous.h generic-y += div64.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 54b3b2039af1..1a0ee72f6a7c 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += barrier.h -generic-y += clkdev.h generic-y += current.h generic-y += extable.h generic-y += irq_work.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 1a6f9c39feef..1efcce74997b 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,4 +1,3 @@ -generic-y += clkdev.h generic-y += current.h generic-y += delay.h generic-y += div64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 80ddc01f57ac..ac67828da201 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -1,7 +1,6 @@ # User exported sparc header files -generic-y += clkdev.h generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index d28d2b8932c7..414dfc3a1808 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bug.h generic-y += bugs.h -generic-y += clkdev.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 50a32c33d729..ca536a38486f 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += barrier.h generic-y += bug.h -generic-y += clkdev.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index fda7e2153086..6f70c76c81fc 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += atomic.h generic-y += bugs.h -generic-y += clkdev.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 5d6a53fd7521..de690c2d2e33 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -6,7 +6,6 @@ generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h -generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index dff7cc39437c..436b20337168 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += bug.h -generic-y += clkdev.h generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h diff --git a/include/asm-generic/clkdev.h b/include/asm-generic/clkdev.h deleted file mode 100644 index 4ff334749ed5..000000000000 --- a/include/asm-generic/clkdev.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * include/asm-generic/clkdev.h - * - * Based on the ARM clkdev.h: - * Copyright (C) 2008 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Helper for the clk API to assist looking up a struct clk. - */ -#ifndef __ASM_CLKDEV_H -#define __ASM_CLKDEV_H - -#include - -#ifndef CONFIG_COMMON_CLK -struct clk; - -static inline int __clk_get(struct clk *clk) { return 1; } -static inline void __clk_put(struct clk *clk) { } -#endif - -static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size) -{ - return kzalloc(size, GFP_KERNEL); -} - -#endif -- cgit v1.2.3 From 24e6d5a59ac7d31adc0322de2d0117dfa370936f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:53 +0100 Subject: mm: pass the vmem_altmap to arch_add_memory and __add_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 5 +++-- arch/powerpc/mm/mem.c | 5 +++-- arch/s390/mm/init.c | 5 +++-- arch/sh/mm/init.c | 5 +++-- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 11 ++++++----- include/linux/memory_hotplug.h | 17 ++++++++++------- kernel/memremap.c | 3 ++- mm/hmm.c | 5 +++-- mm/memory_hotplug.c | 7 +++---- 10 files changed, 39 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7af4e05bb61e..2e2e4f532204 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -647,13 +647,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..e670cfc2766e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -144,7 +145,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 671535e64aba..e12c5af50cd7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -232,7 +233,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index afc54d593a26..552afbf55bad 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,14 +485,15 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..8a3091511a71 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,12 +829,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8acdc35c2dfa..e80bb4189254 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,14 +787,15 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..db276afbefcc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,6 +13,7 @@ struct pglist_data; struct mem_section; struct memory_block; struct resource; +struct vmem_altmap; #ifdef CONFIG_MEMORY_HOTPLUG /* @@ -131,18 +132,19 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ -extern int __add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #else /* ARCH_HAS_ADD_PAGES */ -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -318,7 +320,8 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); -extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); +extern int arch_add_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); diff --git a/kernel/memremap.c b/kernel/memremap.c index 403ab9cdb949..8488cdeead16 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -382,6 +382,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (altmap) { memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; + altmap = pgmap->altmap; } pgmap->ref = ref; pgmap->res = &page_map->res; @@ -427,7 +428,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, goto err_pfn_remap; mem_hotplug_begin(); - error = arch_add_memory(nid, align_start, align_size, false); + error = arch_add_memory(nid, align_start, align_size, altmap, false); if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60..231aaacd1997 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -931,10 +931,11 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) * want the linear mapping and thus use arch_add_memory(). */ if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) - ret = arch_add_memory(nid, align_start, align_size, false); + ret = arch_add_memory(nid, align_start, align_size, NULL, + false); else ret = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, false); + align_size >> PAGE_SHIFT, NULL, false); if (ret) { mem_hotplug_done(); goto error_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5c6f96e6b334..fc0485dcece1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -292,18 +292,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long i; int err = 0; int start_sec, end_sec; - struct vmem_altmap *altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); if (altmap) { /* * Validate altmap is within bounds of the total request @@ -1148,7 +1147,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) } /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, true); + ret = arch_add_memory(nid, start, size, NULL, true); if (ret < 0) goto error; -- cgit v1.2.3 From 7b73d978a5d0d2a3637bdd57191cb6ffbad3feca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:54 +0100 Subject: mm: pass the vmem_altmap to vmemmap_populate We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 6 ++++-- arch/ia64/mm/discontig.c | 3 ++- arch/powerpc/mm/init_64.c | 7 ++----- arch/s390/mm/vmem.c | 3 ++- arch/sparc/mm/init_64.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- include/linux/memory_hotplug.h | 3 ++- include/linux/mm.h | 6 ++++-- mm/memory_hotplug.c | 7 ++++--- mm/sparse-vmemmap.c | 7 ++++--- mm/sparse.c | 20 ++++++++++++-------- 11 files changed, 39 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..ec8952ff13be 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -654,12 +654,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 9b2d994cddf6..1555aecaaf85 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -754,7 +754,8 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..779b74a96b8f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,16 +194,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3316d463fc29..c44ef0e7c466 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long pgt_prot, sgt_prot; unsigned long address = start; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 55ba62957e64..42d27a1a042a 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, - int node) + int node, struct vmem_altmap *altmap) { unsigned long pte_base; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e80bb4189254..594902ef56ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1411,9 +1411,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index db276afbefcc..cbdd6d52e877 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -327,7 +327,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); +extern int sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/include/linux/mm.h b/include/linux/mm.h index ea818ff739cd..2f3a7ebecbe2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2538,7 +2538,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, unsigned long map_count, int nodeid); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); +struct page *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -2556,7 +2557,8 @@ static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); -int vmemmap_populate(unsigned long start, unsigned long end, int node); +int vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc0485dcece1..b36f1822c432 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -250,7 +250,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - bool want_memblock) + struct vmem_altmap *altmap, bool want_memblock) { int ret; int i; @@ -258,7 +258,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); + ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); if (ret < 0) return ret; @@ -317,7 +317,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); + err = __add_section(nid, section_nr_to_pfn(i), altmap, + want_memblock); /* * EEXIST is finally dealt with by ioresource collision diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 17acf01791fa..376dcf05a39c 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -278,7 +278,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) +struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { unsigned long start; unsigned long end; @@ -288,7 +289,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) start = (unsigned long)map; end = (unsigned long)(map + PAGES_PER_SECTION); - if (vmemmap_populate(start, end, nid)) + if (vmemmap_populate(start, end, nid, altmap)) return NULL; return map; @@ -318,7 +319,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..5f4a0dac7836 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -417,7 +417,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data, } #ifndef CONFIG_SPARSEMEM_VMEMMAP -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) +struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { struct page *map; unsigned long size; @@ -472,7 +473,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); @@ -500,7 +501,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - map = sparse_mem_map_populate(pnum, nid); + map = sparse_mem_map_populate(pnum, nid, NULL); if (map) return map; @@ -678,10 +679,11 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid); + return sparse_mem_map_populate(pnum, nid, altmap); } static void __kfree_section_memmap(struct page *memmap) { @@ -721,7 +723,8 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { return __kmalloc_section_memmap(); } @@ -773,7 +776,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) +int __meminit sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -789,7 +793,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); -- cgit v1.2.3 From da024512a1fa5c979257e442130ee1d468285057 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:55 +0100 Subject: mm: pass the vmem_altmap to arch_remove_memory and __remove_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 6 ++---- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 6 ++---- include/linux/memory_hotplug.h | 5 +++-- kernel/memremap.c | 2 +- mm/hmm.c | 4 ++-- mm/memory_hotplug.c | 8 ++------ 10 files changed, 19 insertions(+), 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 2e2e4f532204..6a8ce9e1536e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -663,7 +663,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -671,7 +671,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (ret) pr_warn("%s: Problem encountered in __remove_pages() as" " ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e670cfc2766e..22aa528b78a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -149,11 +149,10 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -162,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e12c5af50cd7..3fa3e5323612 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -240,7 +240,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 552afbf55bad..ce0bbaa7e404 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; @@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (unlikely(ret)) pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a3091511a71..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -839,14 +839,14 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 594902ef56ef..3c046618cc7e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,21 +1132,19 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index cbdd6d52e877..e71927d0d46b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -126,9 +126,10 @@ static inline bool movable_node_is_enabled(void) #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); -extern int arch_remove_memory(u64 start, u64 size); +extern int arch_remove_memory(u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 8488cdeead16..380fca1c4a02 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -304,7 +304,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size); + arch_remove_memory(align_start, align_size, pgmap->altmap); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); diff --git a/mm/hmm.c b/mm/hmm.c index 231aaacd1997..5d17ba89062f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -838,10 +838,10 @@ static void hmm_devmem_release(struct device *dev, void *data) mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages); + __remove_pages(zone, start_pfn, npages, NULL); else arch_remove_memory(start_pfn << PAGE_SHIFT, - npages << PAGE_SHIFT); + npages << PAGE_SHIFT, NULL); mem_hotplug_done(); hmm_devmem_radix_release(resource); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b36f1822c432..eae6bf47caf7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -569,7 +569,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, * calling offline_pages(). */ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -577,10 +577,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, /* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { - struct page *page = pfn_to_page(phys_start_pfn); - struct vmem_altmap *altmap; - - altmap = to_vmem_altmap((unsigned long) page); if (altmap) map_offset = vmem_altmap_offset(altmap); } else { @@ -1890,7 +1886,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size); + arch_remove_memory(start, size, NULL); try_offline_node(nid); -- cgit v1.2.3 From 24b6d4164348370c6b6a58b4248babd85ff9e982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:56 +0100 Subject: mm: pass the vmem_altmap to vmemmap_free We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 3 +- arch/ia64/mm/discontig.c | 3 +- arch/powerpc/mm/init_64.c | 5 ++-- arch/s390/mm/vmem.c | 3 +- arch/sparc/mm/init_64.c | 3 +- arch/x86/mm/init_64.c | 67 ++++++++++++++++++++++++------------------ include/linux/memory_hotplug.h | 2 +- include/linux/mm.h | 3 +- mm/memory_hotplug.c | 7 +++-- mm/sparse.c | 23 ++++++++------- 10 files changed, 68 insertions(+), 51 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ec8952ff13be..0b1f13e0b4b3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -696,7 +696,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 1555aecaaf85..5ea0d8d0968b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -760,7 +760,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 779b74a96b8f..db7d4e092157 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -254,7 +254,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -265,7 +266,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -285,7 +285,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c44ef0e7c466..db55561c5981 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -297,7 +297,8 @@ out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 42d27a1a042a..995f9490334d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3c046618cc7e..0cab4b5b59ba 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,11 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,7 +1138,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e71927d0d46b..20dd98ad44a0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -331,7 +331,7 @@ extern void remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset); + unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f3a7ebecbe2..9d4cd4c1dc6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2561,7 +2561,8 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_free(unsigned long start, unsigned long end); +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index eae6bf47caf7..a8dde9734120 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -536,7 +536,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) } static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { unsigned long start_pfn; int scn_nr; @@ -553,7 +553,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); - sparse_remove_one_section(zone, ms, map_offset); + sparse_remove_one_section(zone, ms, map_offset, altmap); return 0; } @@ -607,7 +607,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); map_offset = 0; if (ret) break; diff --git a/mm/sparse.c b/mm/sparse.c index 5f4a0dac7836..06130c13dc99 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -685,12 +685,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) @@ -698,7 +699,7 @@ static void free_map_bootmem(struct page *memmap) unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, NULL); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #else @@ -729,7 +730,8 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, return __kmalloc_section_memmap(); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { if (is_vmalloc_addr(memmap)) vfree(memmap); @@ -798,7 +800,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return -ENOMEM; } @@ -820,7 +822,7 @@ out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); } return ret; } @@ -847,7 +849,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) +static void free_section_usemap(struct page *memmap, unsigned long *usemap, + struct vmem_altmap *altmap) { struct page *usemap_page; @@ -861,7 +864,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) if (PageSlab(usemap_page) || PageCompound(usemap_page)) { kfree(usemap); if (memmap) - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return; } @@ -875,7 +878,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) } void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL, flags; @@ -893,7 +896,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + free_section_usemap(memmap, usemap, altmap); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From a8fc357b2875da8732c91eb085862a0648d82767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:58 +0100 Subject: mm: split altmap memory map allocation from normal case No functional changes, just untangling the call chain and document why the altmap is passed around the hotplug code. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 5 ++++- arch/x86/mm/init_64.c | 5 ++++- include/linux/mm.h | 9 ++------- mm/sparse-vmemmap.c | 15 +++------------ 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index db7d4e092157..7a2251d99ed3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -200,7 +200,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, if (vmemmap_populated(start, page_size)) continue; - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cab4b5b59ba..1ab42c852069 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1385,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; diff --git a/include/linux/mm.h b/include/linux/mm.h index fd01135324b6..09637c353de0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2547,13 +2547,8 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; -void *__vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap); -static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) -{ - return __vmemmap_alloc_block_buf(size, node, NULL); -} - +void *vmemmap_alloc_block_buf(unsigned long size, int node); +void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 376dcf05a39c..d012c9e2811b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) } /* need to make sure size is all the same during early stage */ -static void * __meminit alloc_block_buf(unsigned long size, int node) +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) { void *ptr; @@ -129,7 +129,7 @@ static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, return pfn + nr_align; } -static void * __meminit altmap_alloc_block_buf(unsigned long size, +void * __meminit altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap) { unsigned long pfn, nr_pfns; @@ -153,15 +153,6 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size, return ptr; } -/* need to make sure size is all the same during early stage */ -void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap) -{ - if (altmap) - return altmap_alloc_block_buf(size, altmap); - return alloc_block_buf(size, node); -} - void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long start, unsigned long end) { @@ -178,7 +169,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = alloc_block_buf(PAGE_SIZE, node); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); -- cgit v1.2.3 From 5633e85b2c3133051d8201b586ba195f1733096b Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 10 Nov 2017 08:48:27 +0900 Subject: powerpc64: Add .opd based function descriptor dereference We are moving towards separate kernel and module function descriptor dereference callbacks. This patch enables it for powerpc64. For pointers that belong to the kernel - Added __start_opd and __end_opd pointers, to track the kernel .opd section address range; - Added dereference_kernel_function_descriptor(). Now we will dereference only function pointers that are within [__start_opd, __end_opd); For pointers that belong to a module - Added dereference_module_function_descriptor() to handle module function descriptor dereference. Now we will dereference only pointers that are within [module->opd.start, module->opd.end). Link: http://lkml.kernel.org/r/20171109234830.5067-4-sergey.senozhatsky@gmail.com To: Tony Luck To: Fenghua Yu To: Helge Deller To: Benjamin Herrenschmidt To: Paul Mackerras To: Michael Ellerman To: James Bottomley Cc: Andrew Morton Cc: Jessica Yu Cc: Petr Mladek Cc: Steven Rostedt Cc: linux-ia64@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: Sergey Senozhatsky Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky Tested-by: Santosh Sivaraj #powerpc Signed-off-by: Petr Mladek --- arch/powerpc/include/asm/module.h | 3 +++ arch/powerpc/include/asm/sections.h | 12 ++++++++++++ arch/powerpc/kernel/module_64.c | 14 ++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 2 ++ 4 files changed, 31 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 6c0132c7212f..7e28442827f1 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -45,6 +45,9 @@ struct mod_arch_specific { unsigned long tramp; #endif + /* For module function descriptor dereference */ + unsigned long start_opd; + unsigned long end_opd; #else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 82bec63bbd4f..e335a8f846af 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -66,6 +66,9 @@ static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end) } #ifdef PPC64_ELF_ABI_v1 + +#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 + #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { @@ -76,6 +79,15 @@ static inline void *dereference_function_descriptor(void *ptr) ptr = p; return ptr; } + +#undef dereference_kernel_function_descriptor +static inline void *dereference_kernel_function_descriptor(void *ptr) +{ + if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} #endif /* PPC64_ELF_ABI_v1 */ #endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 759104b99f9f..218971ac7e04 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -93,6 +93,15 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) { return 0; } + +void *dereference_module_function_descriptor(struct module *mod, void *ptr) +{ + if (ptr < (void *)mod->arch.start_opd || + ptr >= (void *)mod->arch.end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} #endif #define STUB_MAGIC 0x73747562 /* stub */ @@ -344,6 +353,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) { + me->arch.start_opd = sechdrs[i].sh_addr; + me->arch.end_opd = sechdrs[i].sh_addr + + sechdrs[i].sh_size; + } /* We don't handle .init for the moment: rename to _init */ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2..5dac5ab22fa2 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -278,7 +278,9 @@ SECTIONS } .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + __start_opd = .; *(.opd) + __end_opd = .; } . = ALIGN(256); -- cgit v1.2.3 From c91a7a405bc4f80f87b7fdda492154cfd17987e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Dec 2017 11:12:02 +0100 Subject: powerpc: remove unused flush_write_buffers definition Signed-off-by: Christoph Hellwig --- arch/powerpc/include/asm/dma-mapping.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 5a6cbe11db6f..592c7f418aa0 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -107,9 +107,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) dev->archdata.dma_offset = off; } -/* this will be removed soon */ -#define flush_write_buffers() - #define HAVE_ARCH_DMA_SET_MASK 1 extern int dma_set_mask(struct device *dev, u64 dma_mask); -- cgit v1.2.3 From ea8c64ace86647260ec4255f483e5844d62af2df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Jan 2018 16:21:13 +0100 Subject: dma-mapping: move swiotlb arch helpers to a new header phys_to_dma, dma_to_phys and dma_capable are helpers published by architecture code for use of swiotlb and xen-swiotlb only. Drivers are not supposed to use these directly, but use the DMA API instead. Move these to a new asm/dma-direct.h helper, included by a linux/dma-direct.h wrapper that provides the default linear mapping unless the architecture wants to override it. In the MIPS case the existing dma-coherent.h is reused for now as untangling it will take a bit of work. Signed-off-by: Christoph Hellwig Acked-by: Robin Murphy --- MAINTAINERS | 1 + arch/Kconfig | 4 +++ arch/arm/Kconfig | 1 + arch/arm/include/asm/dma-direct.h | 36 ++++++++++++++++++++++ arch/arm/include/asm/dma-mapping.h | 31 ------------------- arch/arm64/include/asm/dma-mapping.h | 22 ------------- arch/arm64/mm/dma-mapping.c | 2 +- arch/ia64/include/asm/dma-mapping.h | 18 ----------- arch/mips/Kconfig | 2 ++ arch/mips/include/asm/dma-direct.h | 1 + arch/mips/include/asm/dma-mapping.h | 8 ----- .../include/asm/mach-cavium-octeon/dma-coherence.h | 8 +++++ arch/mips/include/asm/mach-generic/dma-coherence.h | 12 -------- .../include/asm/mach-loongson64/dma-coherence.h | 8 +++++ arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/dma-direct.h | 29 +++++++++++++++++ arch/powerpc/include/asm/dma-mapping.h | 25 --------------- arch/tile/include/asm/dma-mapping.h | 18 ----------- arch/unicore32/include/asm/dma-mapping.h | 18 ----------- arch/x86/Kconfig | 1 + arch/x86/include/asm/dma-direct.h | 30 ++++++++++++++++++ arch/x86/include/asm/dma-mapping.h | 26 ---------------- arch/x86/kernel/amd_gart_64.c | 1 + arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-nommu.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/mm/mem_encrypt.c | 2 +- arch/x86/pci/sta2x11-fixup.c | 1 + arch/xtensa/include/asm/dma-mapping.h | 10 ------ drivers/crypto/marvell/cesa.c | 1 + drivers/mtd/nand/qcom_nandc.c | 1 + drivers/xen/swiotlb-xen.c | 2 +- include/linux/dma-direct.h | 32 +++++++++++++++++++ lib/swiotlb.c | 2 +- 34 files changed, 165 insertions(+), 195 deletions(-) create mode 100644 arch/arm/include/asm/dma-direct.h create mode 100644 arch/mips/include/asm/dma-direct.h create mode 100644 arch/powerpc/include/asm/dma-direct.h create mode 100644 arch/x86/include/asm/dma-direct.h create mode 100644 include/linux/dma-direct.h (limited to 'arch/powerpc') diff --git a/MAINTAINERS b/MAINTAINERS index 95c3fa1f520f..d2cfdcce1db5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4338,6 +4338,7 @@ F: lib/dma-noop.c F: lib/dma-virt.c F: drivers/base/dma-mapping.c F: drivers/base/dma-coherent.c +F: include/linux/dma-direct.h F: include/linux/dma-mapping.h DME1737 HARDWARE MONITOR DRIVER diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..3edf118ad777 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -938,6 +938,10 @@ config STRICT_MODULE_RWX and non-text memory will be made non-executable. This provides protection against certain security exploits (e.g. writing to text) +# select if the architecture provides an asm/dma-direct.h header +config ARCH_HAS_PHYS_TO_DMA + bool + config ARCH_HAS_REFCOUNT bool help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 51c8df561077..00d889a37965 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,6 +8,7 @@ config ARM select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_SET_MEMORY + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h new file mode 100644 index 000000000000..5b0a8a421894 --- /dev/null +++ b/arch/arm/include/asm/dma-direct.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_ARM_DMA_DIRECT_H +#define ASM_ARM_DMA_DIRECT_H 1 + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + unsigned int offset = paddr & ~PAGE_MASK; + return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + unsigned int offset = dev_addr & ~PAGE_MASK; + return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + u64 limit, mask; + + if (!dev->dma_mask) + return 0; + + mask = *dev->dma_mask; + + limit = (mask + 1) & ~mask; + if (limit && size > limit) + return 0; + + if ((addr | (addr + size - 1)) & ~mask) + return 0; + + return 1; +} + +#endif /* ASM_ARM_DMA_DIRECT_H */ diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index daf837423a76..5fb1b7fbdfbe 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -109,37 +109,6 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - unsigned int offset = paddr & ~PAGE_MASK; - return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) -{ - unsigned int offset = dev_addr & ~PAGE_MASK; - return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; -} - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - u64 limit, mask; - - if (!dev->dma_mask) - return 0; - - mask = *dev->dma_mask; - - limit = (mask + 1) & ~mask; - if (limit && size > limit) - return 0; - - if ((addr | (addr + size - 1)) & ~mask) - return 0; - - return 1; -} - static inline void dma_mark_clean(void *addr, size_t size) { } /** diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index eada887a93bf..400fa67d3b5a 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -50,28 +50,6 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - dma_addr_t dev_addr = (dma_addr_t)paddr; - - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) -{ - phys_addr_t paddr = (phys_addr_t)dev_addr; - - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); -} - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - static inline void dma_mark_clean(void *addr, size_t size) { } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b45c5bcaeccb..f3a637b98487 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index c1bab526a046..eabee56d995c 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -27,22 +27,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return platform_dma_get_ops(NULL); } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - #endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 350a990fc719..4b0c26b2e9b7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -429,6 +429,7 @@ config MACH_LOONGSON32 config MACH_LOONGSON64 bool "Loongson-2/3 family of machines" + select ARCH_HAS_PHYS_TO_DMA select SYS_SUPPORTS_ZBOOT help This enables the support of Loongson-2/3 family of machines. @@ -877,6 +878,7 @@ config MIKROTIK_RB532 config CAVIUM_OCTEON_SOC bool "Cavium Networks Octeon SoC based boards" select CEVT_R4K + select ARCH_HAS_PHYS_TO_DMA select ARCH_PHYS_ADDR_T_64BIT select DMA_COHERENT select SYS_SUPPORTS_64BIT_KERNEL diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h new file mode 100644 index 000000000000..f32f15530aba --- /dev/null +++ b/arch/mips/include/asm/dma-direct.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 5c334ac15945..676c14cfc580 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -17,14 +17,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return mips_dma_map_ops; } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - static inline void dma_mark_clean(void *addr, size_t size) {} #define arch_setup_dma_ops arch_setup_dma_ops diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h index 9110988b92a1..138edf6b5b48 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h @@ -61,6 +61,14 @@ static inline void plat_post_dma_flush(struct device *dev) { } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); diff --git a/arch/mips/include/asm/mach-generic/dma-coherence.h b/arch/mips/include/asm/mach-generic/dma-coherence.h index 61addb1677e9..8ad7a40ca786 100644 --- a/arch/mips/include/asm/mach-generic/dma-coherence.h +++ b/arch/mips/include/asm/mach-generic/dma-coherence.h @@ -70,16 +70,4 @@ static inline void plat_post_dma_flush(struct device *dev) } #endif -#ifdef CONFIG_SWIOTLB -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} -#endif - #endif /* __ASM_MACH_GENERIC_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h index 1602a9e9e8c2..b1b575f5c6c1 100644 --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h @@ -17,6 +17,14 @@ struct device; +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..887285eb684a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -139,6 +139,7 @@ config PPC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h new file mode 100644 index 000000000000..a5b59c765426 --- /dev/null +++ b/arch/powerpc/include/asm/dma-direct.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_POWERPC_DMA_DIRECT_H +#define ASM_POWERPC_DMA_DIRECT_H 1 + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ +#ifdef CONFIG_SWIOTLB + struct dev_archdata *sd = &dev->archdata; + + if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr) + return false; +#endif + + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr + get_dma_offset(dev); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr - get_dma_offset(dev); +} +#endif /* ASM_POWERPC_DMA_DIRECT_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 592c7f418aa0..f6ab51205a85 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -112,31 +112,6 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask); extern u64 __dma_get_required_mask(struct device *dev); -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ -#ifdef CONFIG_SWIOTLB - struct dev_archdata *sd = &dev->archdata; - - if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr) - return false; -#endif - - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr + get_dma_offset(dev); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr - get_dma_offset(dev); -} - #define ARCH_HAS_DMA_MMAP_COHERENT #endif /* __KERNEL__ */ diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 97ad62878290..75b8aaa4e70b 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -44,26 +44,8 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) dev->archdata.dma_offset = off; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - static inline void dma_mark_clean(void *addr, size_t size) {} -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - #define HAVE_ARCH_DMA_SET_MASK 1 int dma_set_mask(struct device *dev, u64 mask); diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index ac608c2f6af6..5cb250bf2d8c 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h @@ -25,24 +25,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &swiotlb_dma_map_ops; } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (dev && dev->dma_mask) - return addr + size - 1 <= *dev->dma_mask; - - return 1; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return daddr; -} - static inline void dma_mark_clean(void *addr, size_t size) {} #endif /* __KERNEL__ */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..f6f4328103c0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,6 +54,7 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h new file mode 100644 index 000000000000..1295bc622ebe --- /dev/null +++ b/arch/x86/include/asm/dma-direct.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_DMA_DIRECT_H +#define ASM_X86_DMA_DIRECT_H 1 + +#include + +#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ +bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +#else +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(paddr); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(daddr); +} +#endif /* CONFIG_X86_DMA_REMAP */ +#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0350d99bb8fd..dfdc9357a349 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -12,7 +12,6 @@ #include #include #include -#include #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -42,31 +41,6 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ -extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ - static inline unsigned long dma_alloc_coherent_mask(struct device *dev, gfp_t gfp) { diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index cc0e8bc0ea3f..ecd486cb06ab 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 599d7462eccc..8439e6de6156 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include #include #include diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b0caae27e1b7..618285e475c6 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Fallback functions when the main IOMMU code is not compiled in. This code is roughly equivalent to i386. */ -#include +#include #include #include #include diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 53bd05ea90d8..9d3e35c33d94 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..09532c935da0 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 53d600217973..75577c1490c4 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #define STA2X11_SWIOTLB_SIZE (4*1024*1024) diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 153bf2370988..44098800dad7 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -23,14 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &xtensa_dma_map_ops; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return (dma_addr_t)paddr; -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return (phys_addr_t)daddr; -} - #endif /* _XTENSA_DMA_MAPPING_H */ diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 293832488cc9..3a0c40081ffb 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -24,6 +24,7 @@ #include #include #include +#include /* XXX: drivers shall never use this directly! */ #include #include #include diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 2656c1ac5646..411cdfd12a85 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -23,6 +23,7 @@ #include #include #include +#include /* XXX: drivers shall never use this directly! */ /* NANDc reg offsets */ #define NAND_FLASH_CMD 0x00 diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 82fc54f8eb77..5bb72d3f8337 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -36,7 +36,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h new file mode 100644 index 000000000000..2cc1b6558944 --- /dev/null +++ b/include/linux/dma-direct.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_DMA_DIRECT_H +#define _LINUX_DMA_DIRECT_H 1 + +#include + +#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA +#include +#else +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + dma_addr_t dev_addr = (dma_addr_t)paddr; + + return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +{ + phys_addr_t paddr = (phys_addr_t)dev_addr; + + return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); +} + +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return false; + + return addr + size - 1 <= *dev->dma_mask; +} +#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +#endif /* _LINUX_DMA_DIRECT_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cea19aaf303c..6583f3512386 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -18,7 +18,7 @@ */ #include -#include +#include #include #include #include -- cgit v1.2.3 From b49efd76248242169f28ffd20ada05064d01ed9f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Jan 2018 22:11:31 +0100 Subject: dma-mapping: move dma_mark_clean to dma-direct.h And unlike the other helpers we don't require a as this helper is a special case for ia64 only, and this keeps it as simple as possible. Signed-off-by: Christoph Hellwig --- arch/arm/include/asm/dma-mapping.h | 2 -- arch/arm64/include/asm/dma-mapping.h | 4 ---- arch/ia64/Kconfig | 1 + arch/ia64/include/asm/dma.h | 2 -- arch/mips/include/asm/dma-mapping.h | 2 -- arch/powerpc/include/asm/swiotlb.h | 2 -- arch/tile/include/asm/dma-mapping.h | 2 -- arch/unicore32/include/asm/dma-mapping.h | 2 -- arch/x86/include/asm/swiotlb.h | 2 -- include/linux/dma-direct.h | 9 +++++++++ 10 files changed, 10 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 5fb1b7fbdfbe..e5d9020c9ee1 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -109,8 +109,6 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline void dma_mark_clean(void *addr, size_t size) { } - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 400fa67d3b5a..b7847eb8a7bb 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -50,9 +50,5 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->archdata.dma_coherent; } -static inline void dma_mark_clean(void *addr, size_t size) -{ -} - #endif /* __KERNEL__ */ #endif /* __ASM_DMA_MAPPING_H */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 49583c5a5d44..4d18fca885ee 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -33,6 +33,7 @@ config IA64 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING + select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_SG_CHAIN select VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK diff --git a/arch/ia64/include/asm/dma.h b/arch/ia64/include/asm/dma.h index 186850eec934..23604d6a2cb2 100644 --- a/arch/ia64/include/asm/dma.h +++ b/arch/ia64/include/asm/dma.h @@ -20,6 +20,4 @@ extern unsigned long MAX_DMA_ADDRESS; #define free_dma(x) -void dma_mark_clean(void *addr, size_t size); - #endif /* _ASM_IA64_DMA_H */ diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 676c14cfc580..886e75a383f2 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -17,8 +17,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return mips_dma_map_ops; } -static inline void dma_mark_clean(void *addr, size_t size) {} - #define arch_setup_dma_ops arch_setup_dma_ops static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 01d45a5fd00b..9341ee804d19 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -15,8 +15,6 @@ extern const struct dma_map_ops swiotlb_dma_ops; -static inline void dma_mark_clean(void *addr, size_t size) {} - extern unsigned int ppc_swiotlb_enable; int __init swiotlb_setup_bus_notifier(void); diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 75b8aaa4e70b..d25fce101fc0 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -44,8 +44,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) dev->archdata.dma_offset = off; } -static inline void dma_mark_clean(void *addr, size_t size) {} - #define HAVE_ARCH_DMA_SET_MASK 1 int dma_set_mask(struct device *dev, u64 mask); diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index 5cb250bf2d8c..f2bfec273aa7 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h @@ -25,7 +25,5 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &swiotlb_dma_map_ops; } -static inline void dma_mark_clean(void *addr, size_t size) {} - #endif /* __KERNEL__ */ #endif diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index bdf9aed40403..1c6a6cb230ff 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -28,8 +28,6 @@ static inline void pci_swiotlb_late_init(void) } #endif -static inline void dma_mark_clean(void *addr, size_t size) {} - extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs); diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 2cc1b6558944..10e924b7cba7 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -29,4 +29,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ + +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void dma_mark_clean(void *addr, size_t size); +#else +static inline void dma_mark_clean(void *addr, size_t size) +{ +} +#endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ + #endif /* _LINUX_DMA_DIRECT_H */ -- cgit v1.2.3 From 2d9d6f6c9e8ae9490a8df8727f7e00310a5efb5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Dec 2017 10:58:24 +0100 Subject: powerpc: rename dma_direct_ to dma_nommu_ We want to use the dma_direct_ namespace for a generic implementation, so rename powerpc to the second best choice: dma_nommu_. Signed-off-by: Christoph Hellwig --- arch/powerpc/include/asm/dma-mapping.h | 8 ++-- arch/powerpc/kernel/dma-iommu.c | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 6 +-- arch/powerpc/kernel/dma.c | 68 +++++++++++++++---------------- arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/platforms/cell/iommu.c | 28 ++++++------- arch/powerpc/platforms/pasemi/iommu.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +- arch/powerpc/platforms/pseries/iommu.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 2 +- arch/powerpc/sysdev/dart_iommu.c | 4 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- drivers/misc/cxl/vphb.c | 2 +- 15 files changed, 68 insertions(+), 68 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index f6ab51205a85..8fa394520af6 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -19,13 +19,13 @@ #include /* Some dma direct funcs must be visible for use in other dma_ops */ -extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size, +extern void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); -extern void __dma_direct_free_coherent(struct device *dev, size_t size, +extern void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); -extern int dma_direct_mmap_coherent(struct device *dev, +extern int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs); @@ -73,7 +73,7 @@ static inline unsigned long device_to_mask(struct device *dev) #ifdef CONFIG_PPC64 extern struct dma_map_ops dma_iommu_ops; #endif -extern const struct dma_map_ops dma_direct_ops; +extern const struct dma_map_ops dma_nommu_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 66f33e7f8d40..f9fe2080ceb9 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -114,7 +114,7 @@ int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, - .mmap = dma_direct_mmap_coherent, + .mmap = dma_nommu_mmap_coherent, .map_sg = dma_iommu_map_sg, .unmap_sg = dma_iommu_unmap_sg, .dma_supported = dma_iommu_dma_supported, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index d0ea7860e02b..f1e99b9cee97 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -47,9 +47,9 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) * for everything else. */ const struct dma_map_ops swiotlb_dma_ops = { - .alloc = __dma_direct_alloc_coherent, - .free = __dma_direct_free_coherent, - .mmap = dma_direct_mmap_coherent, + .alloc = __dma_nommu_alloc_coherent, + .free = __dma_nommu_free_coherent, + .mmap = dma_nommu_mmap_coherent, .map_sg = swiotlb_map_sg_attrs, .unmap_sg = swiotlb_unmap_sg_attrs, .dma_supported = swiotlb_dma_supported, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 4194bbbbdb10..6d5d04ccf3b4 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -40,7 +40,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) return pfn; } -static int dma_direct_dma_supported(struct device *dev, u64 mask) +static int dma_nommu_dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PPC64 u64 limit = get_dma_offset(dev) + (memblock_end_of_DRAM() - 1); @@ -62,7 +62,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) #endif } -void *__dma_direct_alloc_coherent(struct device *dev, size_t size, +void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { @@ -119,7 +119,7 @@ void *__dma_direct_alloc_coherent(struct device *dev, size_t size, #endif } -void __dma_direct_free_coherent(struct device *dev, size_t size, +void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { @@ -130,7 +130,7 @@ void __dma_direct_free_coherent(struct device *dev, size_t size, #endif } -static void *dma_direct_alloc_coherent(struct device *dev, size_t size, +static void *dma_nommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { @@ -139,8 +139,8 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, /* The coherent mask may be smaller than the real mask, check if * we can really use the direct ops */ - if (dma_direct_dma_supported(dev, dev->coherent_dma_mask)) - return __dma_direct_alloc_coherent(dev, size, dma_handle, + if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask)) + return __dma_nommu_alloc_coherent(dev, size, dma_handle, flag, attrs); /* Ok we can't ... do we have an iommu ? If not, fail */ @@ -154,15 +154,15 @@ static void *dma_direct_alloc_coherent(struct device *dev, size_t size, dev_to_node(dev)); } -static void dma_direct_free_coherent(struct device *dev, size_t size, +static void dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { struct iommu_table *iommu; - /* See comments in dma_direct_alloc_coherent() */ - if (dma_direct_dma_supported(dev, dev->coherent_dma_mask)) - return __dma_direct_free_coherent(dev, size, vaddr, dma_handle, + /* See comments in dma_nommu_alloc_coherent() */ + if (dma_nommu_dma_supported(dev, dev->coherent_dma_mask)) + return __dma_nommu_free_coherent(dev, size, vaddr, dma_handle, attrs); /* Maybe we used an iommu ... */ iommu = get_iommu_table_base(dev); @@ -175,7 +175,7 @@ static void dma_direct_free_coherent(struct device *dev, size_t size, iommu_free_coherent(iommu, size, vaddr, dma_handle); } -int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, +int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { @@ -193,7 +193,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot); } -static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, +static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, unsigned long attrs) { @@ -213,13 +213,13 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, return nents; } -static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, +static void dma_nommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, unsigned long attrs) { } -static u64 dma_direct_get_required_mask(struct device *dev) +static u64 dma_nommu_get_required_mask(struct device *dev) { u64 end, mask; @@ -231,7 +231,7 @@ static u64 dma_direct_get_required_mask(struct device *dev) return mask; } -static inline dma_addr_t dma_direct_map_page(struct device *dev, +static inline dma_addr_t dma_nommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, @@ -246,7 +246,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, return page_to_phys(page) + offset + get_dma_offset(dev); } -static inline void dma_direct_unmap_page(struct device *dev, +static inline void dma_nommu_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction, @@ -255,7 +255,7 @@ static inline void dma_direct_unmap_page(struct device *dev, } #ifdef CONFIG_NOT_COHERENT_CACHE -static inline void dma_direct_sync_sg(struct device *dev, +static inline void dma_nommu_sync_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { @@ -266,7 +266,7 @@ static inline void dma_direct_sync_sg(struct device *dev, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -static inline void dma_direct_sync_single(struct device *dev, +static inline void dma_nommu_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { @@ -274,24 +274,24 @@ static inline void dma_direct_sync_single(struct device *dev, } #endif -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc_coherent, - .free = dma_direct_free_coherent, - .mmap = dma_direct_mmap_coherent, - .map_sg = dma_direct_map_sg, - .unmap_sg = dma_direct_unmap_sg, - .dma_supported = dma_direct_dma_supported, - .map_page = dma_direct_map_page, - .unmap_page = dma_direct_unmap_page, - .get_required_mask = dma_direct_get_required_mask, +const struct dma_map_ops dma_nommu_ops = { + .alloc = dma_nommu_alloc_coherent, + .free = dma_nommu_free_coherent, + .mmap = dma_nommu_mmap_coherent, + .map_sg = dma_nommu_map_sg, + .unmap_sg = dma_nommu_unmap_sg, + .dma_supported = dma_nommu_dma_supported, + .map_page = dma_nommu_map_page, + .unmap_page = dma_nommu_unmap_page, + .get_required_mask = dma_nommu_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE - .sync_single_for_cpu = dma_direct_sync_single, - .sync_single_for_device = dma_direct_sync_single, - .sync_sg_for_cpu = dma_direct_sync_sg, - .sync_sg_for_device = dma_direct_sync_sg, + .sync_single_for_cpu = dma_nommu_sync_single, + .sync_single_for_device = dma_nommu_sync_single, + .sync_sg_for_cpu = dma_nommu_sync_sg, + .sync_sg_for_device = dma_nommu_sync_sg, #endif }; -EXPORT_SYMBOL(dma_direct_ops); +EXPORT_SYMBOL(dma_nommu_ops); int dma_set_coherent_mask(struct device *dev, u64 mask) { @@ -302,7 +302,7 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) * is no dma_op->set_coherent_mask() so we have to do * things the hard way: */ - if (get_dma_ops(dev) != &dma_direct_ops || + if (get_dma_ops(dev) != &dma_nommu_ops || get_iommu_table_base(dev) == NULL || !dma_iommu_dma_supported(dev, mask)) return -EIO; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0ac7aa346c69..590f4d0a6cb1 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -60,7 +60,7 @@ resource_size_t isa_mem_base; EXPORT_SYMBOL(isa_mem_base); -static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops; +static const struct dma_map_ops *pci_dma_ops = &dma_nommu_ops; void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d213542a48b..9b89df1e71ab 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -791,7 +791,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) { pdev->archdata.dma_mask = DMA_BIT_MASK(32); pdev->dev.dma_mask = &pdev->archdata.dma_mask; - set_dma_ops(&pdev->dev, &dma_direct_ops); + set_dma_ops(&pdev->dev, &dma_nommu_ops); } static __init void print_system_info(void) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4b91ad08eefd..12352a58072a 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -541,7 +541,7 @@ static struct cbe_iommu *cell_iommu_for_node(int nid) return NULL; } -static unsigned long cell_dma_direct_offset; +static unsigned long cell_dma_nommu_offset; static unsigned long dma_iommu_fixed_base; @@ -580,7 +580,7 @@ static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, device_to_mask(dev), flag, dev_to_node(dev)); else - return dma_direct_ops.alloc(dev, size, dma_handle, flag, + return dma_nommu_ops.alloc(dev, size, dma_handle, flag, attrs); } @@ -592,7 +592,7 @@ static void dma_fixed_free_coherent(struct device *dev, size_t size, iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr, dma_handle); else - dma_direct_ops.free(dev, size, vaddr, dma_handle, attrs); + dma_nommu_ops.free(dev, size, vaddr, dma_handle, attrs); } static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, @@ -601,7 +601,7 @@ static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, unsigned long attrs) { if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - return dma_direct_ops.map_page(dev, page, offset, size, + return dma_nommu_ops.map_page(dev, page, offset, size, direction, attrs); else return iommu_map_page(dev, cell_get_iommu_table(dev), page, @@ -614,7 +614,7 @@ static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr, unsigned long attrs) { if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - dma_direct_ops.unmap_page(dev, dma_addr, size, direction, + dma_nommu_ops.unmap_page(dev, dma_addr, size, direction, attrs); else iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size, @@ -626,7 +626,7 @@ static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, unsigned long attrs) { if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs); + return dma_nommu_ops.map_sg(dev, sg, nents, direction, attrs); else return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents, device_to_mask(dev), @@ -638,7 +638,7 @@ static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, unsigned long attrs) { if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING)) - dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs); + dma_nommu_ops.unmap_sg(dev, sg, nents, direction, attrs); else ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction, attrs); @@ -661,8 +661,8 @@ static void cell_dma_dev_setup(struct device *dev) { if (get_pci_dma_ops() == &dma_iommu_ops) set_iommu_table_base(dev, cell_get_iommu_table(dev)); - else if (get_pci_dma_ops() == &dma_direct_ops) - set_dma_offset(dev, cell_dma_direct_offset); + else if (get_pci_dma_ops() == &dma_nommu_ops) + set_dma_offset(dev, cell_dma_nommu_offset); else BUG(); } @@ -810,14 +810,14 @@ static int __init cell_iommu_init_disabled(void) unsigned long base = 0, size; /* When no iommu is present, we use direct DMA ops */ - set_pci_dma_ops(&dma_direct_ops); + set_pci_dma_ops(&dma_nommu_ops); /* First make sure all IOC translation is turned off */ cell_disable_iommus(); /* If we have no Axon, we set up the spider DMA magic offset */ if (of_find_node_by_name(NULL, "axon") == NULL) - cell_dma_direct_offset = SPIDER_DMA_OFFSET; + cell_dma_nommu_offset = SPIDER_DMA_OFFSET; /* Now we need to check to see where the memory is mapped * in PCI space. We assume that all busses use the same dma @@ -851,13 +851,13 @@ static int __init cell_iommu_init_disabled(void) return -ENODEV; } - cell_dma_direct_offset += base; + cell_dma_nommu_offset += base; - if (cell_dma_direct_offset != 0) + if (cell_dma_nommu_offset != 0) cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; printk("iommu: disabled, direct DMA offset is 0x%lx\n", - cell_dma_direct_offset); + cell_dma_nommu_offset); return 0; } diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 7fec04de27fc..78b80cbd9768 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) */ if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { - dev->dev.dma_ops = &dma_direct_ops; + dev->dev.dma_ops = &dma_nommu_ops; /* * Set the coherent DMA mask to prevent the iommu * being used unnecessarily diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index c4a3e93dc324..d0b8ae53660d 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -363,7 +363,7 @@ static int pcmcia_notify(struct notifier_block *nb, unsigned long action, return 0; /* We use the direct ops for localbus */ - dev->dma_ops = &dma_direct_ops; + dev->dma_ops = &dma_nommu_ops; return 0; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 749055553064..9582aeb1fe4c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1850,7 +1850,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) if (bypass) { dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); - set_dma_ops(&pdev->dev, &dma_direct_ops); + set_dma_ops(&pdev->dev, &dma_nommu_ops); } else { /* * If the device can't set the TCE bypass bit but still wants @@ -1868,7 +1868,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return rc; /* 4GB offset bypasses 32-bit space */ set_dma_offset(&pdev->dev, (1ULL << 32)); - set_dma_ops(&pdev->dev, &dma_direct_ops); + set_dma_ops(&pdev->dev, &dma_nommu_ops); } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) { /* * Fail the request if a DMA mask between 32 and 64 bits diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 69921f72e2da..eaa11334fc8c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1231,7 +1231,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) if (dma_offset != 0) { dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset); set_dma_offset(dev, dma_offset); - set_dma_ops(dev, &dma_direct_ops); + set_dma_ops(dev, &dma_nommu_ops); ddw_enabled = true; } } diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index d86938260a86..49e04ec19238 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -618,7 +618,7 @@ static u64 vio_dma_get_required_mask(struct device *dev) static const struct dma_map_ops vio_dma_mapping_ops = { .alloc = vio_dma_iommu_alloc_coherent, .free = vio_dma_iommu_free_coherent, - .mmap = dma_direct_mmap_coherent, + .mmap = dma_nommu_mmap_coherent, .map_sg = vio_dma_iommu_map_sg, .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 3573d54b2770..a6198d4f0f03 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -402,7 +402,7 @@ static int dart_dma_set_mask(struct device *dev, u64 dma_mask) */ if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) { dev_info(dev, "Using 64-bit DMA iommu bypass\n"); - set_dma_ops(dev, &dma_direct_ops); + set_dma_ops(dev, &dma_nommu_ops); } else { dev_info(dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); @@ -446,7 +446,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) controller_ops->dma_bus_setup = NULL; /* Setup pci_dma ops */ - set_pci_dma_ops(&dma_direct_ops); + set_pci_dma_ops(&dma_nommu_ops); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 22d98057f773..e4d0133bbeeb 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -135,7 +135,7 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask) * mapping that allows addressing any RAM address from across PCI. */ if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { - set_dma_ops(dev, &dma_direct_ops); + set_dma_ops(dev, &dma_nommu_ops); set_dma_offset(dev, pci64_dma_offset); } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 512a4897dbf6..7fd0bdc1436a 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -54,7 +54,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) return false; } - set_dma_ops(&dev->dev, &dma_direct_ops); + set_dma_ops(&dev->dev, &dma_nommu_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); return _cxl_pci_associate_default_context(dev, afu); -- cgit v1.2.3 From fdabc3fe998203038a78763c1b3d6ace517e0eea Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Dec 2017 15:31:30 -0600 Subject: PCI: Add #defines for Completion Timeout Disable feature Add #defines for the Completion Timeout Disable feature and use them. No functional change intended. Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 6 +++--- include/uapi/linux/pci_regs.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4650fb294e7a..2f7cd0ef3cdc 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1654,14 +1654,14 @@ static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 2, devctl); - /* Disable Completion Timeout */ + /* Disable Completion Timeout if possible */ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 4, &cap2); - if (cap2 & 0x10) { + if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL2, 4, &cap2); - cap2 |= 0x10; + cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL2, 4, cap2); diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 70c2b2ade048..9dc67643fc18 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -622,6 +622,7 @@ * safely. */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit compare */ @@ -631,6 +632,7 @@ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ +#define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ #define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ -- cgit v1.2.3 From 50ed5780d50baa8a51c3d0e8731c09bb0bc1b4b5 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 19 Dec 2017 00:37:38 -0500 Subject: powerpc/PCI: Deprecate pci_get_bus_and_slot() pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Getting ready to remove pci_get_bus_and_slot() function in favor of pci_get_domain_bus_and_slot(). Use pci_get_domain_bus_and_slot() with a domain number of 0 as the code is not ready to consume multiple domains and existing code used domain number 0. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci_32.c | 3 ++- arch/powerpc/platforms/powermac/feature.c | 2 +- arch/powerpc/sysdev/mv64x60_pci.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1d817f4d97d9..85ad2f78b889 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -96,7 +96,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus) reg = of_get_property(node, "reg", NULL); if (!reg) continue; - dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff)); + dev = pci_get_domain_bus_and_slot(0, pci_bus, + ((reg[0] >> 8) & 0xff)); if (!dev || !dev->subordinate) { pci_dev_put(dev); continue; diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 9e3f39d36e88..ed8b16694153 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -829,7 +829,7 @@ core99_ata100_enable(struct device_node *node, long value) if (value) { if (pci_device_from_OF_node(node, &pbus, &pid) == 0) - pdev = pci_get_bus_and_slot(pbus, pid); + pdev = pci_get_domain_bus_and_slot(0, pbus, pid); if (pdev == NULL) return 0; rc = pci_enable_device(pdev); diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index d52b3b81e05f..6fe9104632a0 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -37,7 +37,7 @@ static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj, if (count < MV64X60_VAL_LEN_MAX) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_read_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, &v); @@ -61,7 +61,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, if (sscanf(buf, "%i", &v) != 1) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_write_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, v); -- cgit v1.2.3 From 12c1f339cd49119e39063ae67f02d936f988c079 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:39:27 +1100 Subject: powerpc/xive: Move definition of ESB bits From xive.h to xive-regs.h since it's a HW register definition and it can be used from assembly Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive-regs.h | 35 +++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/xive.h | 35 ----------------------------------- 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 1d3f2be5ae39..fa4288822b68 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -9,6 +9,41 @@ #ifndef _ASM_POWERPC_XIVE_REGS_H #define _ASM_POWERPC_XIVE_REGS_H +/* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * Each interrupt source has a 2-bit state machine called ESB + * which can be controlled by MMIO. It's made of 2 bits, P and + * Q. P indicates that an interrupt is pending (has been sent + * to a queue and is waiting for an EOI). Q indicates that the + * interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee + * that a given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + * + * The following offsets into the ESB MMIO allow to read or + * manipulate the PQ bits. They must be used with an 8-bytes + * load instruction. They all return the previous state of the + * interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a + * store at 0 and some ESBs support doing a trigger via a + * separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + /* * Thread Management (aka "TM") registers */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..0e77005cf021 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -72,41 +72,6 @@ struct xive_q { atomic_t pending_count; }; -/* - * "magic" Event State Buffer (ESB) MMIO offsets. - * - * Each interrupt source has a 2-bit state machine called ESB - * which can be controlled by MMIO. It's made of 2 bits, P and - * Q. P indicates that an interrupt is pending (has been sent - * to a queue and is waiting for an EOI). Q indicates that the - * interrupt has been triggered while pending. - * - * This acts as a coalescing mechanism in order to guarantee - * that a given interrupt only occurs at most once in a queue. - * - * When doing an EOI, the Q bit will indicate if the interrupt - * needs to be re-triggered. - * - * The following offsets into the ESB MMIO allow to read or - * manipulate the PQ bits. They must be used with an 8-bytes - * load instruction. They all return the previous state of the - * interrupt (atomically). - * - * Additionally, some ESB pages support doing an EOI via a - * store at 0 and some ESBs support doing a trigger via a - * separate trigger page. - */ -#define XIVE_ESB_STORE_EOI 0x400 /* Store */ -#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ -#define XIVE_ESB_GET 0x800 /* Load */ -#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ -#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ -#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ -#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ - -#define XIVE_ESB_VAL_P 0x2 -#define XIVE_ESB_VAL_Q 0x1 - /* Global enable flags for the XIVE support */ extern bool __xive_enabled; -- cgit v1.2.3 From 7f1c410da59090f9bb2300efebbc3b717594d64c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:39:28 +1100 Subject: powerpc/xive: Add interrupt flag to disable automatic EOI This will be used by KVM in order to keep escalation interrupts in the non-EOI (masked) state after they fire. They will be re-enabled directly in HW by KVM when needed. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 3 +++ arch/powerpc/sysdev/xive/common.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 0e77005cf021..b619a5585cd6 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -58,6 +58,9 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 +/* Special flag set by KVM for excalation interrupts */ +#define XIVE_IRQ_NO_EOI 0x80 + #define XIVE_INVALID_CHIP_ID -1 /* A queue tracking structure in a CPU */ diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a3b8d7d1316e..2547b6021e6a 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) * EOI the source if it hasn't been disabled and hasn't * been passed-through to a KVM guest */ - if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && + !(xd->flags & XIVE_IRQ_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); /* -- cgit v1.2.3 From a208fa8f33031b9e0aba44c7d1b7e68eb0cbd29e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 Jan 2018 11:16:26 -0800 Subject: crypto: hash - annotate algorithms taking optional key We need to consistently enforce that keyed hashes cannot be used without setting the key. To do this we need a reliable way to determine whether a given hash algorithm is keyed or not. AF_ALG currently does this by checking for the presence of a ->setkey() method. However, this is actually slightly broken because the CRC-32 algorithms implement ->setkey() but can also be used without a key. (The CRC-32 "key" is not actually a cryptographic key but rather represents the initial state. If not overridden, then a default initial state is used.) Prepare to fix this by introducing a flag CRYPTO_ALG_OPTIONAL_KEY which indicates that the algorithm has a ->setkey() method, but it is not required to be called. Then set it on all the CRC-32 algorithms. The same also applies to the Adler-32 implementation in Lustre. Also, the cryptd and mcryptd templates have to pass through the flag from their underlying algorithm. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/crypto/crc32-ce-glue.c | 2 ++ arch/arm64/crypto/crc32-ce-glue.c | 2 ++ arch/powerpc/crypto/crc32c-vpmsum_glue.c | 1 + arch/s390/crypto/crc32-vx.c | 3 +++ arch/sparc/crypto/crc32c_glue.c | 1 + arch/x86/crypto/crc32-pclmul_glue.c | 1 + arch/x86/crypto/crc32c-intel_glue.c | 1 + crypto/crc32_generic.c | 1 + crypto/crc32c_generic.c | 1 + crypto/cryptd.c | 7 +++---- crypto/mcryptd.c | 7 +++---- drivers/crypto/bfin_crc.c | 3 ++- drivers/crypto/stm32/stm32_crc32.c | 2 ++ drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c | 1 + include/linux/crypto.h | 6 ++++++ 15 files changed, 30 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 1b0e0e86ee9c..96e62ec105d0 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -188,6 +188,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32", .base.cra_driver_name = "crc32-arm-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, }, { @@ -203,6 +204,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-arm-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c index 624f4137918c..34b4e3d46aab 100644 --- a/arch/arm64/crypto/crc32-ce-glue.c +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -185,6 +185,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32", .base.cra_driver_name = "crc32-arm64-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, }, { @@ -200,6 +201,7 @@ static struct shash_alg crc32_pmull_algs[] = { { .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-arm64-ce", .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = 1, .base.cra_module = THIS_MODULE, } }; diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index f058e0c3e4d4..fd1d6c83f0c0 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -141,6 +141,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vpmsum", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 436865926c26..423ee05887e6 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -239,6 +239,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32", .cra_driver_name = "crc32-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -259,6 +260,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32be", .cra_driver_name = "crc32be-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, @@ -279,6 +281,7 @@ static struct shash_alg crc32_vx_algs[] = { .cra_name = "crc32c", .cra_driver_name = "crc32c-vx", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CRC32_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crc_ctx), .cra_module = THIS_MODULE, diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index d1064e46efe8..8aa664638c3c 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-sparc64", .cra_priority = SPARC_CR_OPCODE_PRIORITY, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_alignmask = 7, diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 27226df3f7d8..c8d9cdacbf10 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -162,6 +162,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-pclmul", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index c194d5717ae5..5773e1161072 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -226,6 +226,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-intel", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index aa2a25fc7482..718cbce8d169 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -133,6 +133,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 4c0a0e271876..372320399622 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -146,6 +146,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-generic", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct chksum_ctx), diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 457ae3e66a41..addca7bae33f 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -896,10 +896,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = salg->digestsize; inst->alg.halg.statesize = salg->statesize; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index ace346b976b3..fe5129d6ff4e 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -516,10 +516,9 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, if (err) goto out_free_inst; - type = CRYPTO_ALG_ASYNC; - if (alg->cra_flags & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_INTERNAL; - inst->alg.halg.base.cra_flags = type; + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->cra_flags & (CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = halg->digestsize; inst->alg.halg.statesize = halg->statesize; diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index a118b9bed669..bfbf8bf77f03 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -494,7 +494,8 @@ static struct ahash_alg algs = { .cra_driver_name = DRIVER_NAME, .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bfin_crypto_crc_ctx), .cra_alignmask = 3, diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c index 090582baecfe..8f09b8430893 100644 --- a/drivers/crypto/stm32/stm32_crc32.c +++ b/drivers/crypto/stm32/stm32_crc32.c @@ -208,6 +208,7 @@ static struct shash_alg algs[] = { .cra_name = "crc32", .cra_driver_name = DRIVER_NAME, .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct stm32_crc_ctx), @@ -229,6 +230,7 @@ static struct shash_alg algs[] = { .cra_name = "crc32c", .cra_driver_name = DRIVER_NAME, .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_alignmask = 3, .cra_ctxsize = sizeof(struct stm32_crc_ctx), diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c index 2e5d311d2438..db81ed527452 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto-adler.c @@ -120,6 +120,7 @@ static struct shash_alg alg = { .cra_name = "adler32", .cra_driver_name = "adler32-zlib", .cra_priority = 100, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 231e59f90d32..d2e33a90825b 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -106,6 +106,12 @@ */ #define CRYPTO_ALG_INTERNAL 0x00002000 +/* + * Set if the algorithm has a ->setkey() method but can be used without + * calling it first, i.e. there is a default key. + */ +#define CRYPTO_ALG_OPTIONAL_KEY 0x00004000 + /* * Transform masks and values (for crt_flags). */ -- cgit v1.2.3 From cf4674c46c66e45f238f8f7e81af2a444b970c0a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 19 Aug 2017 15:26:01 -0500 Subject: signal/powerpc: Document conflicts with SI_USER and SIGFPE and SIGTRAP Setting si_code to 0 results in a userspace seeing an si_code of 0. This is the same si_code as SI_USER. Posix and common sense requires that SI_USER not be a signal specific si_code. As such this use of 0 for the si_code is a pretty horribly broken ABI. Further use of si_code == 0 guaranteed that copy_siginfo_to_user saw a value of __SI_KILL and now sees a value of SIL_KILL with the result that uid and pid fields are copied and which might copying the si_addr field by accident but certainly not by design. Making this a very flakey implementation. Utilizing FPE_FIXME and TRAP_FIXME, siginfo_layout() will now return SIL_FAULT and the appropriate fields will be reliably copied. Possible ABI fixes includee: - Send the signal without siginfo - Don't generate a signal - Possibly assign and use an appropriate si_code - Don't handle cases which can't happen Cc: Paul Mackerras Cc: Kumar Gala Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Ref: 9bad068c24d7 ("[PATCH] ppc32: support for e500 and 85xx") Ref: 0ed70f6105ef ("PPC32: Provide proper siginfo information on various exceptions.") History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: "Eric W. Biederman" --- arch/powerpc/include/uapi/asm/siginfo.h | 15 +++++++++++++++ arch/powerpc/kernel/traps.c | 10 +++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h index 1a691141e49f..444ca6c9989a 100644 --- a/arch/powerpc/include/uapi/asm/siginfo.h +++ b/arch/powerpc/include/uapi/asm/siginfo.h @@ -18,4 +18,19 @@ #undef NSIGTRAP #define NSIGTRAP 4 +/* + * SIGFPE si_codes + */ +#ifdef __KERNEL__ +#define FPE_FIXME 0 /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + +/* + * SIGTRAP si_codes + */ +#ifdef __KERNEL__ +#define TRAP_FIXME 0 /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + + #endif /* _ASM_POWERPC_SIGINFO_H */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f3eb61be0d30..f2e6e1838952 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -917,7 +917,7 @@ void unknown_exception(struct pt_regs *regs) printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); - _exception(SIGTRAP, regs, 0, 0); + _exception(SIGTRAP, regs, TRAP_FIXME, 0); exception_exit(prev_state); } @@ -939,7 +939,7 @@ bail: void RunModeException(struct pt_regs *regs) { - _exception(SIGTRAP, regs, 0, 0); + _exception(SIGTRAP, regs, TRAP_FIXME, 0); } void single_step_exception(struct pt_regs *regs) @@ -978,7 +978,7 @@ static void emulate_single_step(struct pt_regs *regs) static inline int __parse_fpscr(unsigned long fpscr) { - int ret = 0; + int ret = FPE_FIXME; /* Invalid operation */ if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) @@ -1929,7 +1929,7 @@ void SPEFloatingPointException(struct pt_regs *regs) extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; int fpexc_mode; - int code = 0; + int code = FPE_FIXME; int err; flush_spe_to_thread(current); @@ -1998,7 +1998,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) printk(KERN_ERR "unrecognized spe instruction " "in %s at %lx\n", current->comm, regs->nip); } else { - _exception(SIGFPE, regs, 0, regs->nip); + _exception(SIGFPE, regs, FPE_FIXME, regs->nip); return; } } -- cgit v1.2.3 From 2f82a46f66c8754dfe690d469899d12819b19c58 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 19 Jul 2017 22:18:51 -0500 Subject: signal: Remove _sys_private and _overrun_incr from struct compat_siginfo We have never passed either field to or from userspace so just remove them. Signed-off-by: "Eric W. Biederman" --- arch/arm64/include/asm/compat.h | 1 - arch/parisc/include/asm/compat.h | 1 - arch/powerpc/include/asm/compat.h | 1 - arch/s390/include/asm/compat.h | 1 - arch/sparc/include/asm/compat.h | 1 - arch/tile/include/asm/compat.h | 2 -- arch/x86/include/asm/compat.h | 2 -- arch/x86/kernel/signal_compat.c | 2 +- 8 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index a3c7f271ad4c..dd32fe19ec58 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -174,7 +174,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index acf8aa07cbe0..cf3bcacec027 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -155,7 +155,6 @@ typedef struct compat_siginfo { int _overrun; /* overrun count */ char _pad[sizeof(unsigned int) - sizeof(int)]; compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 8a2aecfe9b02..e02de2fd56e3 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -145,7 +145,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 5e6a63641a5f..3a187c4932a5 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -213,7 +213,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index fa38c78de0f0..2d9f4fd5f74a 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -175,7 +175,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ } _timer; /* POSIX.1b signals */ diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 62a7b83025dd..59ab9fa784b3 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -136,8 +136,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ } _timer; /* POSIX.1b signals */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 2cbd75dd2fd3..1b6886a78562 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -151,8 +151,6 @@ typedef struct compat_siginfo { compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ } _timer; /* POSIX.1b signals */ diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 8c6da1a643da..85425ea30661 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -64,7 +64,7 @@ static inline void signal_compat_build_tests(void) CHECK_SI_SIZE (_kill, 2*sizeof(int)); CHECK_CSI_OFFSET(_timer); - CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_CSI_SIZE (_timer, 3*sizeof(int)); CHECK_SI_SIZE (_timer, 6*sizeof(int)); CHECK_CSI_OFFSET(_rt); -- cgit v1.2.3 From 57bf5a8963f80fb3828c46c3e3a5b2dd790e09a7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Dec 2017 16:05:15 +0100 Subject: dma-mapping: clear harmful GFP_* flags in common code Lift the code from x86 so that we behave consistently. In the future we should probably warn if any of these is set. Signed-off-by: Christoph Hellwig Acked-by: Jesper Nilsson Acked-by: Geert Uytterhoeven [m68k] --- arch/cris/arch-v32/drivers/pci/dma.c | 3 --- arch/h8300/kernel/dma.c | 3 --- arch/m68k/kernel/dma.c | 2 -- arch/mips/cavium-octeon/dma-octeon.c | 3 --- arch/mips/loongson64/common/dma-swiotlb.c | 3 --- arch/mips/mm/dma-default.c | 3 --- arch/mips/netlogic/common/nlm-dma.c | 3 --- arch/mn10300/mm/dma-alloc.c | 3 --- arch/nios2/mm/dma-mapping.c | 3 --- arch/powerpc/kernel/dma.c | 3 --- arch/x86/kernel/pci-dma.c | 2 -- include/linux/dma-mapping.h | 7 +++++++ 12 files changed, 7 insertions(+), 31 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index dbbd3816cc0b..8c3802244ef3 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -22,9 +22,6 @@ static void *v32_dma_alloc(struct device *dev, size_t size, { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; diff --git a/arch/h8300/kernel/dma.c b/arch/h8300/kernel/dma.c index 225dd0a188dc..d44ba5db4ac3 100644 --- a/arch/h8300/kernel/dma.c +++ b/arch/h8300/kernel/dma.c @@ -16,9 +16,6 @@ static void *dma_alloc(struct device *dev, size_t size, { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (dev == NULL || (*dev->dma_mask < 0xffffffff)) gfp |= GFP_DMA; ret = (void *)__get_free_pages(gfp, get_order(size)); diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 87ef73a93856..c01b9b8f97bf 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -76,8 +76,6 @@ static void *m68k_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); if (dev == NULL || (*dev->dma_mask < 0xffffffff)) gfp |= GFP_DMA; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index c64bd87f0b6e..5baf79fce643 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -161,9 +161,6 @@ static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (IS_ENABLED(CONFIG_ZONE_DMA) && dev == NULL) gfp |= __GFP_DMA; else if (IS_ENABLED(CONFIG_ZONE_DMA) && diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index ef07740cee61..15388c24a504 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -15,9 +15,6 @@ static void *loongson_dma_alloc_coherent(struct device *dev, size_t size, { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if ((IS_ENABLED(CONFIG_ISA) && dev == NULL) || (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask < DMA_BIT_MASK(32))) diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index e3e94d05f0fd..237532e89919 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -93,9 +93,6 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) { gfp_t dma_flag; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - #ifdef CONFIG_ISA if (dev == NULL) dma_flag = __GFP_DMA; diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c index 0ec9d9da6d51..49c975b6aa28 100644 --- a/arch/mips/netlogic/common/nlm-dma.c +++ b/arch/mips/netlogic/common/nlm-dma.c @@ -47,9 +47,6 @@ static char *nlm_swiotlb; static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - #ifdef CONFIG_ZONE_DMA32 if (dev->coherent_dma_mask <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; diff --git a/arch/mn10300/mm/dma-alloc.c b/arch/mn10300/mm/dma-alloc.c index 86108d2496b3..e3910d4db102 100644 --- a/arch/mn10300/mm/dma-alloc.c +++ b/arch/mn10300/mm/dma-alloc.c @@ -37,9 +37,6 @@ static void *mn10300_dma_alloc(struct device *dev, size_t size, goto done; } - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (dev == NULL || dev->coherent_dma_mask < 0xffffffff) gfp |= GFP_DMA; diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c index 7040c1adbb5e..4be815519dd4 100644 --- a/arch/nios2/mm/dma-mapping.c +++ b/arch/nios2/mm/dma-mapping.c @@ -63,9 +63,6 @@ static void *nios2_dma_alloc(struct device *dev, size_t size, { void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - /* optimized page clearing */ gfp |= __GFP_ZERO; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6d5d04ccf3b4..76079841d3d0 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -105,9 +105,6 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, }; #endif /* CONFIG_FSL_SOC */ - /* ignore region specifiers */ - flag &= ~(__GFP_HIGHMEM); - page = alloc_pages_node(node, flag, get_order(size)); if (page == NULL) return NULL; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8439e6de6156..61a8f1cb3829 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -87,7 +87,6 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_mask = dma_alloc_coherent_mask(dev, flag); - flag &= ~__GFP_ZERO; again: page = NULL; /* CMA can be used only in the context which permits sleeping */ @@ -139,7 +138,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) if (!*dev) *dev = &x86_dma_fallback_dev; - *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!is_device_dma_capable(*dev)) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 16add66d5b68..d036e78b9ae9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -518,6 +518,13 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; + /* + * Let the implementation decide on the zone to allocate from, and + * decide on the way of zeroing the memory given that the memory + * returned should always be zeroed. + */ + flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_ZERO); + if (!arch_dma_alloc_attrs(&dev, &flag)) return NULL; if (!ops->alloc) -- cgit v1.2.3 From a37a3710f3f91bfd14b0b20018e464a529d72471 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Dec 2017 14:01:48 +0100 Subject: powerpc: rename swiotlb_dma_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll need that name for a generic implementation soon. Signed-off-by: Christoph Hellwig Acked-by: Christian König Reviewed-by: Konrad Rzeszutek Wilk --- arch/powerpc/include/asm/swiotlb.h | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 4 ++-- arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 9341ee804d19..f65ecf57b66c 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -13,7 +13,7 @@ #include -extern const struct dma_map_ops swiotlb_dma_ops; +extern const struct dma_map_ops powerpc_swiotlb_dma_ops; extern unsigned int ppc_swiotlb_enable; int __init swiotlb_setup_bus_notifier(void); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index f1e99b9cee97..506ac4fafac5 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -const struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops powerpc_swiotlb_dma_ops = { .alloc = __dma_nommu_alloc_coherent, .free = __dma_nommu_free_coherent, .mmap = dma_nommu_mmap_coherent, @@ -89,7 +89,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, /* May need to bounce if the device can't address all of DRAM */ if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) - set_dma_ops(dev, &swiotlb_dma_ops); + set_dma_ops(dev, &powerpc_swiotlb_dma_ops); return NOTIFY_DONE; } diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 76079841d3d0..da20569de9d4 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) struct dev_archdata __maybe_unused *sd = &dev->archdata; #ifdef CONFIG_SWIOTLB - if (sd->max_direct_dma_addr && dev->dma_ops == &swiotlb_dma_ops) + if (sd->max_direct_dma_addr && dev->dma_ops == &powerpc_swiotlb_dma_ops) pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); #endif diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index e4d0133bbeeb..61e07c78d64f 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -118,7 +118,7 @@ static void setup_swiotlb_ops(struct pci_controller *hose) { if (ppc_swiotlb_enable) { hose->controller_ops.dma_dev_setup = pci_dma_dev_setup_swiotlb; - set_pci_dma_ops(&swiotlb_dma_ops); + set_pci_dma_ops(&powerpc_swiotlb_dma_ops); } } #else -- cgit v1.2.3 From 7f2c8bbd321f18e4ccfd262748bd58fb7d4bb1db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Dec 2017 14:14:54 +0100 Subject: swiotlb: rename swiotlb_free to swiotlb_exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christoph Hellwig Acked-by: Christian König Reviewed-by: Konrad Rzeszutek Wilk --- arch/powerpc/kernel/dma-swiotlb.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- include/linux/swiotlb.h | 4 ++-- lib/swiotlb.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 506ac4fafac5..88f3963ca30f 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -121,7 +121,7 @@ static int __init check_swiotlb_enabled(void) if (ppc_swiotlb_enable) swiotlb_print_info(); else - swiotlb_free(); + swiotlb_exit(); return 0; } diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0d77603c2f50..0ee0f8f34251 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -120,7 +120,7 @@ void __init pci_swiotlb_late_init(void) { /* An IOMMU turned us off. */ if (!swiotlb) - swiotlb_free(); + swiotlb_exit(); else { printk(KERN_INFO "PCI-DMA: " "Using software bounce buffering for IO (SWIOTLB)\n"); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 24ed817082ee..606375e35d87 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -115,10 +115,10 @@ extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB -extern void __init swiotlb_free(void); +extern void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); #else -static inline void swiotlb_free(void) { } +static inline void swiotlb_exit(void) { } static inline unsigned int swiotlb_max_segment(void) { return 0; } #endif diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 125c1062119f..cf5311908fa9 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -417,7 +417,7 @@ cleanup2: return -ENOMEM; } -void __init swiotlb_free(void) +void __init swiotlb_exit(void) { if (!io_tlb_orig_addr) return; -- cgit v1.2.3 From b713da69e4c91d9addada4e58d26df1c9b5cd840 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Jul 2017 15:53:17 -0400 Subject: signal: unify compat_siginfo_t --EWB Added #ifdef CONFIG_X86_X32_ABI to arch/x86/kernel/signal_compat.c Changed #ifdef CONFIG_X86_X32 to #ifdef CONFIG_X86_X32_ABI in linux/compat.h CONFIG_X86_X32 is set when the user requests X32 support. CONFIG_X86_X32_ABI is set when the user requests X32 support and the tool-chain has X32 allowing X32 support to be built. Signed-off-by: Al Viro Signed-off-by: Eric W. Biederman --- arch/arm64/include/asm/compat.h | 63 --------------------------- arch/mips/include/asm/compat.h | 71 ------------------------------ arch/parisc/include/asm/compat.h | 63 --------------------------- arch/powerpc/include/asm/compat.h | 62 --------------------------- arch/s390/include/asm/compat.h | 72 ------------------------------- arch/sparc/include/asm/compat.h | 56 ------------------------ arch/tile/include/asm/compat.h | 58 ------------------------- arch/x86/include/asm/compat.h | 80 ---------------------------------- arch/x86/kernel/signal_compat.c | 4 ++ include/linux/compat.h | 90 +++++++++++++++++++++++++++++++++++++++ 10 files changed, 94 insertions(+), 525 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index dd32fe19ec58..c00c62e1a4a3 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -150,69 +150,6 @@ typedef u32 compat_old_sigset_t; typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[128/sizeof(int) - 3]; - - /* kill() */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid32_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid32_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - compat_pid_t _pid; /* which child */ - __compat_uid32_t _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - compat_uptr_t _addr; /* faulting insn/memory ref. */ - short _addr_lsb; /* LSB of the reported address */ - } _sigfault; - - /* SIGPOLL */ - struct { - compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - /* SIGSYS */ - struct { - compat_uptr_t _call_addr; /* calling user insn */ - int _syscall; /* triggering system call number */ - compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 49691331ada4..fe7d445f675f 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -126,79 +126,8 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -/* Can't use the generic version because si_code and si_errno are swapped */ - #define SI_PAD_SIZE32 (128/sizeof(int) - 3) -typedef struct compat_siginfo { - int si_signo; - int si_code; - int si_errno; - - union { - int _pad[128 / sizeof(int) - 3]; - - /* kill() */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid32_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid32_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - compat_pid_t _pid; /* which child */ - __compat_uid32_t _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - compat_uptr_t _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif - short _addr_lsb; /* LSB of the reported address */ - struct { - compat_uptr_t _lower; - compat_uptr_t _upper; - } _addr_bnd; - } _sigfault; - - /* SIGPOLL */ - struct { - compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - struct { - compat_uptr_t _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index cf3bcacec027..c22db5323244 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -130,69 +130,6 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[128/sizeof(int) - 3]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - char _pad[sizeof(unsigned int) - sizeof(int)]; - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - /* SIGSYS */ - struct { - compat_uptr_t _call_addr; /* calling user insn */ - int _syscall; /* triggering system call number */ - compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index e02de2fd56e3..8a2363221b0c 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -119,70 +119,8 @@ typedef u32 compat_old_sigset_t; typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - #define SI_PAD_SIZE32 (128/sizeof(int) - 3) -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[SI_PAD_SIZE32]; - - /* kill() */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - compat_pid_t _pid; /* sender's pid */ - __compat_uid_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - compat_pid_t _pid; /* which child */ - __compat_uid_t _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - /* SIGSYS */ - struct { - unsigned int _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 3a187c4932a5..9830fb6b076e 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -189,78 +189,6 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[128/sizeof(int) - 3]; - - /* kill() */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - pid_t _pid; /* sender's pid */ - uid_t _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - pid_t _pid; /* which child */ - uid_t _uid; /* sender's uid */ - int _status;/* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - __u32 _addr; /* faulting insn/memory ref. - pointer */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - -/* - * How these fields are to be accessed. - */ -#define si_pid _sifields._kill._pid -#define si_uid _sifields._kill._uid -#define si_status _sifields._sigchld._status -#define si_utime _sifields._sigchld._utime -#define si_stime _sifields._sigchld._stime -#define si_value _sifields._rt._sigval -#define si_int _sifields._rt._sigval.sival_int -#define si_ptr _sifields._rt._sigval.sival_ptr -#define si_addr _sifields._sigfault._addr -#define si_band _sifields._sigpoll._band -#define si_fd _sifields._sigpoll._fd -#define si_tid _sifields._timer._tid -#define si_overrun _sifields._timer._overrun - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 2d9f4fd5f74a..c3688ae98b90 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -149,64 +149,8 @@ typedef u32 compat_old_sigset_t; typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - #define SI_PAD_SIZE32 (128/sizeof(int) - 3) -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[SI_PAD_SIZE32]; - - /* kill() */ - struct { - compat_pid_t _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - compat_pid_t _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - compat_pid_t _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ - struct { - u32 _addr; /* faulting insn/memory ref. */ - int _trapno; - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 59ab9fa784b3..c6b7613256b4 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -110,66 +110,8 @@ struct compat_flock64 { typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - #define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3) -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[COMPAT_SI_PAD_SIZE]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 1b6886a78562..0b76fc91f672 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -127,86 +127,6 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[128/sizeof(int) - 3]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGCHLD (x32 version) */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_s64 _utime; - compat_s64 _stime; - } _sigchld_x32; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - short int _addr_lsb; /* Valid LSB of the reported address. */ - union { - /* used when si_code=SEGV_BNDERR */ - struct { - compat_uptr_t _lower; - compat_uptr_t _upper; - } _addr_bnd; - /* used when si_code=SEGV_PKUERR */ - compat_u32 _pkey; - }; - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - struct { - unsigned int _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 85425ea30661..27495909932d 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -75,9 +75,11 @@ static inline void signal_compat_build_tests(void) CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); +#ifdef CONFIG_X86_X32_ABI CHECK_CSI_OFFSET(_sigchld_x32); CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); /* no _sigchld_x32 in the generic siginfo_t */ +#endif CHECK_CSI_OFFSET(_sigfault); CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); @@ -169,9 +171,11 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, if (!x32_ABI) { put_user_ex(from->si_utime, &to->si_utime); put_user_ex(from->si_stime, &to->si_stime); +#ifdef CONFIG_X86_X32_ABI } else { put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); +#endif } put_user_ex(from->si_status, &to->si_status); /* FALL THROUGH */ diff --git a/include/linux/compat.h b/include/linux/compat.h index 0fc36406f32c..8f8e3ef247de 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -157,6 +157,96 @@ struct compat_sigaction { compat_sigset_t sa_mask __packed; }; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +typedef struct compat_siginfo { + int si_signo; +#ifndef __ARCH_HAS_SWAPPED_SIGINFO + int si_errno; + int si_code; +#else + int si_code; + int si_errno; +#endif + + union { + int _pad[128/sizeof(int) - 3]; + + /* kill() */ + struct { + compat_pid_t _pid; /* sender's pid */ + __compat_uid32_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + } _timer; + + /* POSIX.1b signals */ + struct { + compat_pid_t _pid; /* sender's pid */ + __compat_uid32_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + compat_pid_t _pid; /* which child */ + __compat_uid32_t _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + +#ifdef CONFIG_X86_X32_ABI + /* SIGCHLD (x32 version) */ + struct { + compat_pid_t _pid; /* which child */ + __compat_uid32_t _uid; /* sender's uid */ + int _status; /* exit code */ + compat_s64 _utime; + compat_s64 _stime; + } _sigchld_x32; +#endif + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ + struct { + compat_uptr_t _addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + short int _addr_lsb; /* Valid LSB of the reported address. */ + union { + /* used when si_code=SEGV_BNDERR */ + struct { + compat_uptr_t _lower; + compat_uptr_t _upper; + } _addr_bnd; + /* used when si_code=SEGV_PKUERR */ + u32 _pkey; + }; + } _sigfault; + + /* SIGPOLL */ + struct { + compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; + } _sifields; +} compat_siginfo_t; + /* * These functions operate on 32- or 64-bit specs depending on * COMPAT_USE_64BIT_TIME, hence the void user pointer arguments. -- cgit v1.2.3 From ad2b1ab57ddb9cd0a9e93aae8b5f3ca9a0c77f1d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 13 Jan 2018 15:12:46 -0600 Subject: signal/powerpc: Remove redefinition of NSIGTRAP on powerpc NSIGTRAP is 4 in the generic siginfo and powerpc just undefines NSGTRAP and redefine it as 4. That accomplishes nothing so remove the duplication. Signed-off-by: "Eric W. Biederman" --- arch/powerpc/include/uapi/asm/siginfo.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h index 444ca6c9989a..9f142451a01f 100644 --- a/arch/powerpc/include/uapi/asm/siginfo.h +++ b/arch/powerpc/include/uapi/asm/siginfo.h @@ -15,9 +15,6 @@ #include -#undef NSIGTRAP -#define NSIGTRAP 4 - /* * SIGFPE si_codes */ -- cgit v1.2.3 From 212a36a17efe4d696d1e3c31ebd79a9fb0cbb14b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 31 Jul 2017 17:15:31 -0500 Subject: signal: Unify and correct copy_siginfo_from_user32 The function copy_siginfo_from_user32 is used for two things, in ptrace since the dawn of siginfo for arbirarily modifying a signal that user space sees, and in sigqueueinfo to send a signal with arbirary siginfo data. Create a single copy of copy_siginfo_from_user32 that all architectures share, and teach it to handle all of the cases in the siginfo union. In the generic version of copy_siginfo_from_user32 ensure that all of the fields in siginfo are initialized so that the siginfo structure can be safely copied to userspace if necessary. When copying the embedded sigval union copy the si_int member. That ensures the 32bit values passes through the kernel unchanged. Signed-off-by: "Eric W. Biederman" --- arch/arm64/kernel/signal32.c | 10 ----- arch/mips/kernel/signal32.c | 10 ----- arch/parisc/kernel/signal32.c | 44 ---------------------- arch/parisc/kernel/signal32.h | 1 - arch/powerpc/kernel/signal_32.c | 9 ----- arch/s390/kernel/compat_signal.c | 48 ------------------------ arch/sparc/kernel/signal32.c | 16 -------- arch/tile/kernel/compat_signal.c | 18 --------- arch/x86/kernel/signal_compat.c | 21 ----------- include/linux/compat.h | 2 +- kernel/signal.c | 81 ++++++++++++++++++++++++++++++++++++++++ 11 files changed, 82 insertions(+), 178 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 22711ee8e36c..4377907dbb70 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -195,16 +195,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return err; } -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || - copy_from_user(to->_sifields._pad, - from->_sifields._pad, SI_PAD_SIZE)) - return -EFAULT; - - return 0; -} - /* * VFP save/restore code. * diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index cf5c7c05e5a3..500b5e4634ea 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -133,13 +133,3 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) } return err; } - -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - if (copy_from_user(to, from, 3*sizeof(int)) || - copy_from_user(to->_sifields._pad, - from->_sifields._pad, SI_PAD_SIZE32)) - return -EFAULT; - - return 0; -} diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index 41afa9cd1f55..558e32475c35 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -261,50 +261,6 @@ setup_sigcontext32(struct compat_sigcontext __user *sc, struct compat_regfile __ return err; } -int -copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from) -{ - compat_uptr_t addr; - int err; - - if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); - - if (to->si_code < 0) - err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(to->si_signo, to->si_code)) { - case SIL_CHLD: - err |= __get_user(to->si_utime, &from->si_utime); - err |= __get_user(to->si_stime, &from->si_stime); - err |= __get_user(to->si_status, &from->si_status); - default: - case SIL_KILL: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - break; - case SIL_FAULT: - err |= __get_user(addr, &from->si_addr); - to->si_addr = compat_ptr(addr); - break; - case SIL_POLL: - err |= __get_user(to->si_band, &from->si_band); - err |= __get_user(to->si_fd, &from->si_fd); - break; - case SIL_RT: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(to->si_int, &from->si_int); - break; - } - } - return err; -} - int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from) { diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index 719e7417732c..d25858e4db63 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -35,7 +35,6 @@ struct compat_ucontext { /* ELF32 signal handling */ int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from); -int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from); /* In a deft move of uber-hackery, we decide to carry the top half of all * 64-bit registers in a non-portable, non-ABI, hidden structure. diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 9ffd73296f64..ee62ff7b296c 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -933,15 +933,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) #define copy_siginfo_to_user copy_siginfo_to_user32 -int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) -{ - if (copy_from_user(to, from, 3*sizeof(int)) || - copy_from_user(to->_sifields._pad, - from->_sifields._pad, SI_PAD_SIZE32)) - return -EFAULT; - - return 0; -} #endif /* CONFIG_PPC64 */ /* diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index ef246940b44c..d77ce14ffa5c 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -102,54 +102,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return err ? -EFAULT : 0; } -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - int err; - u32 tmp; - - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); - - if (to->si_code < 0) - err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(to->si_signo, to->si_code)) { - case SIL_RT: - err |= __get_user(to->si_int, &from->si_int); - /* fallthrough */ - case SIL_KILL: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - break; - case SIL_CHLD: - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(to->si_utime, &from->si_utime); - err |= __get_user(to->si_stime, &from->si_stime); - err |= __get_user(to->si_status, &from->si_status); - break; - case SIL_FAULT: - err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __force __user *) - (u64) (tmp & PSW32_ADDR_INSN); - break; - case SIL_POLL: - err |= __get_user(to->si_band, &from->si_band); - err |= __get_user(to->si_fd, &from->si_fd); - break; - case SIL_TIMER: - err |= __get_user(to->si_tid, &from->si_tid); - err |= __get_user(to->si_overrun, &from->si_overrun); - err |= __get_user(to->si_int, &from->si_int); - break; - default: - break; - } - } - return err ? -EFAULT : 0; -} - /* Store registers needed to create the signal frame */ static void store_sigregs(void) { diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 54a6159b9cd8..8022bb4c65a5 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -123,22 +123,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return err; } -/* CAUTION: This is just a very minimalist implementation for the - * sake of compat_sys_rt_sigqueueinfo() - */ -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - if (!access_ok(VERIFY_WRITE, from, sizeof(compat_siginfo_t))) - return -EFAULT; - - if (copy_from_user(to, from, 3*sizeof(int)) || - copy_from_user(to->_sifields._pad, from->_sifields._pad, - SI_PAD_SIZE)) - return -EFAULT; - - return 0; -} - /* Checks if the fp is valid. We always build signal frames which are * 16-byte aligned, therefore we can always enforce that the restore * frame has that property as well. diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 971d87a1d8cf..4e7f40a10eb3 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -105,24 +105,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr return err; } -int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) -{ - int err; - - if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) - return -EFAULT; - - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); - - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(to->si_int, &from->si_int); - - return err; -} - /* The assembly shim for this function arranges to ignore the return value. */ long compat_sys_rt_sigreturn(void) { diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index feb3ac135d0c..59148de2d83f 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -207,24 +207,3 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return __copy_siginfo_to_user32(to, from, in_x32_syscall()); } -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - int err = 0; - u32 ptr32; - - if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - - get_user_try { - get_user_ex(to->si_signo, &from->si_signo); - get_user_ex(to->si_errno, &from->si_errno); - get_user_ex(to->si_code, &from->si_code); - - get_user_ex(to->si_pid, &from->si_pid); - get_user_ex(to->si_uid, &from->si_uid); - get_user_ex(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); - } get_user_catch(err); - - return err; -} diff --git a/include/linux/compat.h b/include/linux/compat.h index e698ec1908d9..8a9643857c4a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -510,7 +510,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); -int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); +int copy_siginfo_from_user32(siginfo_t *to, const struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); diff --git a/kernel/signal.c b/kernel/signal.c index 4c3f4448c5f1..5211b1b57163 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2814,6 +2814,87 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) return err; } +#ifdef CONFIG_COMPAT +int copy_siginfo_from_user32(struct siginfo *to, + const struct compat_siginfo __user *ufrom) +{ + struct compat_siginfo from; + + if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo))) + return -EFAULT; + + clear_siginfo(to); + to->si_signo = from.si_signo; + to->si_errno = from.si_errno; + to->si_code = from.si_code; + switch(siginfo_layout(from.si_signo, from.si_code)) { + case SIL_KILL: + to->si_pid = from.si_pid; + to->si_uid = from.si_uid; + break; + case SIL_TIMER: + to->si_tid = from.si_tid; + to->si_overrun = from.si_overrun; + to->si_int = from.si_int; + break; + case SIL_POLL: + to->si_band = from.si_band; + to->si_fd = from.si_fd; + break; + case SIL_FAULT: + to->si_addr = compat_ptr(from.si_addr); +#ifdef __ARCH_SI_TRAPNO + to->si_trapno = from.si_trapno; +#endif +#ifdef BUS_MCEERR_AR + if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AR)) + to->si_addr_lsb = from.si_addr_lsb; +#endif +#ifdef BUS_MCEER_AO + if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AO)) + to->si_addr_lsb = from.si_addr_lsb; +#endif +#ifdef SEGV_BNDERR + if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_BNDERR)) { + to->si_lower = compat_ptr(from.si_lower); + to->si_upper = compat_ptr(from.si_upper); + } +#endif +#ifdef SEGV_PKUERR + if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_PKUERR)) + to->si_pkey = from.si_pkey; +#endif + break; + case SIL_CHLD: + to->si_pid = from.si_pid; + to->si_uid = from.si_uid; + to->si_status = from.si_status; +#ifdef CONFIG_X86_X32_ABI + if (in_x32_syscall()) { + to->si_utime = from._sifields._sigchld_x32._utime; + to->si_stime = from._sifields._sigchld_x32._stime; + } else +#endif + { + to->si_utime = from.si_utime; + to->si_stime = from.si_stime; + } + break; + case SIL_RT: + to->si_pid = from.si_pid; + to->si_uid = from.si_uid; + to->si_int = from.si_int; + break; + case SIL_SYS: + to->si_call_addr = compat_ptr(from.si_call_addr); + to->si_syscall = from.si_syscall; + to->si_arch = from.si_arch; + break; + } + return 0; +} +#endif /* CONFIG_COMPAT */ + /** * do_sigtimedwait - wait for queued signals specified in @which * @which: queued signals to wait for -- cgit v1.2.3 From ea64d5acc8f033cd586182ae31531246cdeaea73 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 15 Jan 2018 18:03:33 -0600 Subject: signal: Unify and correct copy_siginfo_to_user32 Among the existing architecture specific versions of copy_siginfo_to_user32 there are several different implementation problems. Some architectures fail to handle all of the cases in in the siginfo union. Some architectures perform a blind copy of the siginfo union when the si_code is negative. A blind copy suggests the data is expected to be in 32bit siginfo format, which means that receiving such a signal via signalfd won't work, or that the data is in 64bit siginfo and the code is copying nonsense to userspace. Create a single instance of copy_siginfo_to_user32 that all of the architectures can share, and teach it to handle all of the cases in the siginfo union correctly, with the assumption that siginfo is stored internally to the kernel is 64bit siginfo format. A special case is made for x86 x32 format. This is needed as presence of both x32 and ia32 on x86_64 results in two different 32bit signal formats. By allowing this small special case there winds up being exactly one code base that needs to be maintained between all of the architectures. Vastly increasing the testing base and the chances of finding bugs. As the x86 copy of copy_siginfo_to_user32 the call of the x86 signal_compat_build_tests were moved into sigaction_compat_abi, so that they will keep running. Signed-off-by: "Eric W. Biederman" --- arch/arm64/kernel/signal32.c | 70 ---------------------------- arch/mips/include/asm/compat.h | 2 - arch/mips/kernel/signal32.c | 57 ----------------------- arch/parisc/kernel/signal32.c | 62 ------------------------- arch/parisc/kernel/signal32.h | 2 - arch/powerpc/include/asm/compat.h | 2 - arch/powerpc/kernel/signal_32.c | 57 ----------------------- arch/s390/kernel/compat_signal.c | 52 --------------------- arch/sparc/include/asm/compat.h | 2 - arch/sparc/kernel/signal32.c | 53 --------------------- arch/tile/include/asm/compat.h | 2 - arch/tile/kernel/compat_signal.c | 55 ---------------------- arch/x86/include/asm/compat.h | 4 ++ arch/x86/include/asm/fpu/signal.h | 6 --- arch/x86/kernel/signal_compat.c | 96 +-------------------------------------- kernel/signal.c | 90 ++++++++++++++++++++++++++++++++++++ 16 files changed, 96 insertions(+), 516 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 4377907dbb70..cbc4edd1b1eb 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -125,76 +125,6 @@ static inline int get_sigset_t(sigset_t *set, return 0; } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - int err; - - if (!access_ok(VERIFY_WRITE, to, sizeof(*to))) - return -EFAULT; - - /* If you change siginfo_t structure, please be sure - * this code is fixed accordingly. - * It should never copy any pad contained in the structure - * to avoid security leaks, but must copy the generic - * 3 ints plus the relevant union member. - * This routine must convert siginfo from 64bit to 32bit as well - * at the same time. - */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, - SI_PAD_SIZE); - else switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_KILL: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_POLL: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_FAULT: - err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, - &to->si_addr); -#ifdef BUS_MCEERR_AO - /* - * Other callers might not initialize the si_lsb field, - * so check explicitly for the right codes here. - */ - if (from->si_signo == SIGBUS && - (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) - err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); -#endif - break; - case SIL_CHLD: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_status, &to->si_status); - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - break; - case SIL_RT: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_SYS: - err |= __put_user((compat_uptr_t)(unsigned long) - from->si_call_addr, &to->si_call_addr); - err |= __put_user(from->si_syscall, &to->si_syscall); - err |= __put_user(from->si_arch, &to->si_arch); - break; - } - return err; -} - /* * VFP save/restore code. * diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index fe7d445f675f..946681db8dc3 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -126,8 +126,6 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -#define SI_PAD_SIZE32 (128/sizeof(int) - 3) - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 500b5e4634ea..c4db910a8794 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -76,60 +76,3 @@ SYSCALL_DEFINE3(32_sigaction, long, sig, const struct compat_sigaction __user *, return ret; } - -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - int err; - - if (!access_ok (VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. - This routine must convert siginfo from 64bit to 32bit as well - at the same time. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_CHLD: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - case SIL_KILL: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_FAULT: - err |= __put_user((unsigned long)from->si_addr, &to->si_addr); - break; - case SIL_POLL: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_RT: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_SYS: - err |= __copy_to_user(&to->si_call_addr, &from->si_call_addr, - sizeof(compat_uptr_t)); - err |= __put_user(from->si_syscall, &to->si_syscall); - err |= __put_user(from->si_arch, &to->si_arch); - break; - } - } - return err; -} diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index 558e32475c35..e8ef3eb69449 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -260,65 +260,3 @@ setup_sigcontext32(struct compat_sigcontext __user *sc, struct compat_regfile __ return err; } - -int -copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from) -{ - compat_uptr_t addr; - compat_int_t val; - int err; - - if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. - This routine must convert siginfo from 64bit to 32bit as well - at the same time. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_CHLD: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - case SIL_KILL: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_FAULT: - addr = ptr_to_compat(from->si_addr); - err |= __put_user(addr, &to->si_addr); - break; - case SIL_POLL: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - val = (compat_int_t)from->si_int; - err |= __put_user(val, &to->si_int); - break; - case SIL_RT: - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_pid, &to->si_pid); - val = (compat_int_t)from->si_int; - err |= __put_user(val, &to->si_int); - break; - case SIL_SYS: - err |= __put_user(ptr_to_compat(from->si_call_addr), &to->si_call_addr); - err |= __put_user(from->si_syscall, &to->si_syscall); - err |= __put_user(from->si_arch, &to->si_arch); - break; - } - } - return err; -} diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index d25858e4db63..a271dc0976ce 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -34,8 +34,6 @@ struct compat_ucontext { /* ELF32 signal handling */ -int copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from); - /* In a deft move of uber-hackery, we decide to carry the top half of all * 64-bit registers in a non-portable, non-ABI, hidden structure. * Userspace can read the hidden structure if it *wants* but is never diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 8a2363221b0c..62168e1158f1 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -119,8 +119,6 @@ typedef u32 compat_old_sigset_t; typedef u32 compat_sigset_word; -#define SI_PAD_SIZE32 (128/sizeof(int) - 3) - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index ee62ff7b296c..aded81169648 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -873,63 +873,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, #endif #ifdef CONFIG_PPC64 -int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) -{ - int err; - - if (!access_ok (VERIFY_WRITE, d, sizeof(*d))) - return -EFAULT; - - /* If you change siginfo_t structure, please be sure - * this code is fixed accordingly. - * It should never copy any pad contained in the structure - * to avoid security leaks, but must copy the generic - * 3 ints plus the relevant union member. - * This routine must convert siginfo from 64bit to 32bit as well - * at the same time. - */ - err = __put_user(s->si_signo, &d->si_signo); - err |= __put_user(s->si_errno, &d->si_errno); - err |= __put_user(s->si_code, &d->si_code); - if (s->si_code < 0) - err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad, - SI_PAD_SIZE32); - else switch(siginfo_layout(s->si_signo, s->si_code)) { - case SIL_CHLD: - err |= __put_user(s->si_pid, &d->si_pid); - err |= __put_user(s->si_uid, &d->si_uid); - err |= __put_user(s->si_utime, &d->si_utime); - err |= __put_user(s->si_stime, &d->si_stime); - err |= __put_user(s->si_status, &d->si_status); - break; - case SIL_FAULT: - err |= __put_user((unsigned int)(unsigned long)s->si_addr, - &d->si_addr); - break; - case SIL_POLL: - err |= __put_user(s->si_band, &d->si_band); - err |= __put_user(s->si_fd, &d->si_fd); - break; - case SIL_TIMER: - err |= __put_user(s->si_tid, &d->si_tid); - err |= __put_user(s->si_overrun, &d->si_overrun); - err |= __put_user(s->si_int, &d->si_int); - break; - case SIL_SYS: - err |= __put_user(ptr_to_compat(s->si_call_addr), &d->si_call_addr); - err |= __put_user(s->si_syscall, &d->si_syscall); - err |= __put_user(s->si_arch, &d->si_arch); - break; - case SIL_RT: - err |= __put_user(s->si_int, &d->si_int); - /* fallthrough */ - case SIL_KILL: - err |= __put_user(s->si_pid, &d->si_pid); - err |= __put_user(s->si_uid, &d->si_uid); - break; - } - return err; -} #define copy_siginfo_to_user copy_siginfo_to_user32 diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index d77ce14ffa5c..18c1eeb847b2 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -50,58 +50,6 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - int err; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. - This routine must convert siginfo from 64bit to 32bit as well - at the same time. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_RT: - err |= __put_user(from->si_int, &to->si_int); - /* fallthrough */ - case SIL_KILL: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_CHLD: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - break; - case SIL_FAULT: - err |= __put_user((unsigned long) from->si_addr, - &to->si_addr); - break; - case SIL_POLL: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - default: - break; - } - } - return err ? -EFAULT : 0; -} - /* Store registers needed to create the signal frame */ static void store_sigregs(void) { diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index c3688ae98b90..615283e16f22 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -149,8 +149,6 @@ typedef u32 compat_old_sigset_t; typedef u32 compat_sigset_word; -#define SI_PAD_SIZE32 (128/sizeof(int) - 3) - #define COMPAT_OFF_T_MAX 0x7fffffff /* diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 8022bb4c65a5..44d379db3f64 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -70,59 +70,6 @@ struct rt_signal_frame32 { /* __siginfo_rwin_t * */u32 rwin_save; } __attribute__((aligned(8))); -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - int err; - - if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - /* If you change siginfo_t structure, please be sure - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. - This routine must convert siginfo from 64bit to 32bit as well - at the same time. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - if (from->si_code < 0) - err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else { - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_CHLD: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - default: - case SIL_KILL: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_FAULT: - err |= __put_user(from->si_trapno, &to->si_trapno); - err |= __put_user((unsigned long)from->si_addr, &to->si_addr); - break; - case SIL_POLL: - err |= __put_user(from->si_band, &to->si_band); - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_RT: - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; - } - } - return err; -} - /* Checks if the fp is valid. We always build signal frames which are * 16-byte aligned, therefore we can always enforce that the restore * frame has that property as well. diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index c6b7613256b4..769ff6ac0bf5 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -110,8 +110,6 @@ struct compat_flock64 { typedef u32 compat_sigset_word; -#define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3) - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 4e7f40a10eb3..a703bd0e0488 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -50,61 +50,6 @@ struct compat_rt_sigframe { struct compat_ucontext uc; }; -int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from) -{ - int err; - - if (!access_ok(VERIFY_WRITE, to, sizeof(struct compat_siginfo))) - return -EFAULT; - - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user(from->si_code, &to->si_code); - - if (from->si_code < 0) { - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - err |= __put_user(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_FAULT: - break; - case SIL_CHLD: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - /* FALL THROUGH */ - default: - case SIL_KILL: - err |= __put_user(from->si_uid, &to->si_uid); - break; - case SIL_POLL: - err |= __put_user(from->si_fd, &to->si_fd); - break; - case SIL_TIMER: - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_int, &to->si_int); - break; - case SIL_RT: - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; - } - } - return err; -} - /* The assembly shim for this function arranges to ignore the return value. */ long compat_sys_rt_sigreturn(void) { diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 0b76fc91f672..e1c8dab86670 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -249,4 +249,8 @@ static inline bool in_compat_syscall(void) } #define in_compat_syscall in_compat_syscall /* override the generic impl */ +struct compat_siginfo; +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, + const siginfo_t *from, bool x32_ABI); + #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 4df2754ef380..44bbc39a57b3 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -20,12 +20,6 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif -#ifdef CONFIG_COMPAT -int __copy_siginfo_to_user32(compat_siginfo_t __user *to, - const siginfo_t *from, bool x32_ABI); -#endif - - extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); extern void convert_to_fxsr(struct task_struct *tsk, diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 59148de2d83f..ac057f9b0763 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -98,6 +98,8 @@ static inline void signal_compat_build_tests(void) void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) { + signal_compat_build_tests(); + /* Don't leak in-kernel non-uapi flags to user-space */ if (oact) oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); @@ -113,97 +115,3 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) if (in_x32_syscall()) act->sa.sa_flags |= SA_X32_ABI; } - -int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, - bool x32_ABI) -{ - int err = 0; - - signal_compat_build_tests(); - - if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - put_user_try { - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - put_user_ex(from->si_signo, &to->si_signo); - put_user_ex(from->si_errno, &to->si_errno); - put_user_ex(from->si_code, &to->si_code); - - if (from->si_code < 0) { - put_user_ex(from->si_pid, &to->si_pid); - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - put_user_ex(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_FAULT: - if (from->si_signo == SIGBUS && - (from->si_code == BUS_MCEERR_AR || - from->si_code == BUS_MCEERR_AO)) - put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); - - if (from->si_signo == SIGSEGV) { - if (from->si_code == SEGV_BNDERR) { - compat_uptr_t lower = (unsigned long)from->si_lower; - compat_uptr_t upper = (unsigned long)from->si_upper; - put_user_ex(lower, &to->si_lower); - put_user_ex(upper, &to->si_upper); - } - if (from->si_code == SEGV_PKUERR) - put_user_ex(from->si_pkey, &to->si_pkey); - } - break; - case SIL_SYS: - put_user_ex(from->si_syscall, &to->si_syscall); - put_user_ex(from->si_arch, &to->si_arch); - break; - case SIL_CHLD: - if (!x32_ABI) { - put_user_ex(from->si_utime, &to->si_utime); - put_user_ex(from->si_stime, &to->si_stime); -#ifdef CONFIG_X86_X32_ABI - } else { - put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); - put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); -#endif - } - put_user_ex(from->si_status, &to->si_status); - /* FALL THROUGH */ - case SIL_KILL: - put_user_ex(from->si_uid, &to->si_uid); - break; - case SIL_POLL: - put_user_ex(from->si_fd, &to->si_fd); - break; - case SIL_TIMER: - put_user_ex(from->si_overrun, &to->si_overrun); - put_user_ex(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - case SIL_RT: - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(from->si_int, &to->si_int); - break; - } - } - } put_user_catch(err); - - return err; -} - -/* from syscall's path, where we know the ABI */ -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - return __copy_siginfo_to_user32(to, from, in_x32_syscall()); -} - diff --git a/kernel/signal.c b/kernel/signal.c index bebe44265b8b..4976f05aa09b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2815,6 +2815,96 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) } #ifdef CONFIG_COMPAT +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const struct siginfo *from) +#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) +{ + return __copy_siginfo_to_user32(to, from, in_x32_syscall()); +} +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, + const struct siginfo *from, bool x32_ABI) +#endif +{ + struct compat_siginfo new; + memset(&new, 0, sizeof(new)); + + new.si_signo = from->si_signo; + new.si_errno = from->si_errno; + new.si_code = from->si_code; + switch(siginfo_layout(from->si_signo, from->si_code)) { + case SIL_KILL: + new.si_pid = from->si_pid; + new.si_uid = from->si_uid; + break; + case SIL_TIMER: + new.si_tid = from->si_tid; + new.si_overrun = from->si_overrun; + new.si_int = from->si_int; + break; + case SIL_POLL: + new.si_band = from->si_band; + new.si_fd = from->si_fd; + break; + case SIL_FAULT: + new.si_addr = ptr_to_compat(from->si_addr); +#ifdef __ARCH_SI_TRAPNO + new.si_trapno = from->si_trapno; +#endif +#ifdef BUS_MCEERR_AR + if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AR)) + new.si_addr_lsb = from->si_addr_lsb; +#endif +#ifdef BUS_MCEERR_AO + if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AO)) + new.si_addr_lsb = from->si_addr_lsb; +#endif +#ifdef SEGV_BNDERR + if ((from->si_signo == SIGSEGV) && + (from->si_code == SEGV_BNDERR)) { + new.si_lower = ptr_to_compat(from->si_lower); + new.si_upper = ptr_to_compat(from->si_upper); + } +#endif +#ifdef SEGV_PKUERR + if ((from->si_signo == SIGSEGV) && + (from->si_code == SEGV_PKUERR)) + new.si_pkey = from->si_pkey; +#endif + + break; + case SIL_CHLD: + new.si_pid = from->si_pid; + new.si_uid = from->si_uid; + new.si_status = from->si_status; +#ifdef CONFIG_X86_X32_ABI + if (x32_ABI) { + new._sifields._sigchld_x32._utime = from->si_utime; + new._sifields._sigchld_x32._stime = from->si_stime; + } else +#endif + { + new.si_utime = from->si_utime; + new.si_stime = from->si_stime; + } + break; + case SIL_RT: + new.si_pid = from->si_pid; + new.si_uid = from->si_uid; + new.si_int = from->si_int; + break; + case SIL_SYS: + new.si_call_addr = ptr_to_compat(from->si_call_addr); + new.si_syscall = from->si_syscall; + new.si_arch = from->si_arch; + break; + } + + if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) + return -EFAULT; + + return 0; +} + int copy_siginfo_from_user32(struct siginfo *to, const struct compat_siginfo __user *ufrom) { -- cgit v1.2.3 From 35f80debaef07bdaeffbbb20a6e999d8ac47972c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:46:35 -0600 Subject: powerpc/numa: Look up device node in of_get_assoc_arrays() Look up the device node for the associativity array property instead of having it passed in as a parameter. This changes precedes an update in which the calling routines for of_get_assoc_arrays() will not have the device node pointer to pass in. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index adb6364f4091..a0214aa2dd78 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -466,19 +466,27 @@ struct assoc_arrays { * indicating the size of each associativity array, followed by a list * of N associativity arrays. */ -static int of_get_assoc_arrays(struct device_node *memory, - struct assoc_arrays *aa) +static int of_get_assoc_arrays(struct assoc_arrays *aa) { + struct device_node *memory; const __be32 *prop; u32 len; + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!memory) + return -1; + prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); - if (!prop || len < 2 * sizeof(unsigned int)) + if (!prop || len < 2 * sizeof(unsigned int)) { + of_node_put(memory); return -1; + } aa->n_arrays = of_read_number(prop++, 1); aa->array_sz = of_read_number(prop++, 1); + of_node_put(memory); + /* Now that we know the number of arrays and size of each array, * revalidate the size of the property read in. */ @@ -661,7 +669,7 @@ static void __init parse_drconf_memory(struct device_node *memory) if (!lmb_size) return; - rc = of_get_assoc_arrays(memory, &aa); + rc = of_get_assoc_arrays(&aa); if (rc) return; @@ -996,7 +1004,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, if (!lmb_size) return -1; - rc = of_get_assoc_arrays(memory, &aa); + rc = of_get_assoc_arrays(&aa); if (rc) return -1; -- cgit v1.2.3 From 22508f3dc985f6ed948293297f728605075fe6a7 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:46:44 -0600 Subject: powerpc/numa: Look up device node in of_get_usable_memory() Look up the device node for the usable memory property instead of having it passed in as a parameter. This changes precedes an update in which the calling routines for of_get_usable_memory() will not have the device node pointer to pass in. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index a0214aa2dd78..baba6403488b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -184,11 +184,19 @@ static const __be32 *of_get_associativity(struct device_node *dev) * it exists (the property exists only in kexec/kdump kernels, * added by kexec-tools) */ -static const __be32 *of_get_usable_memory(struct device_node *memory) +static const __be32 *of_get_usable_memory(void) { + struct device_node *memory; const __be32 *prop; u32 len; + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!memory) + return NULL; + prop = of_get_property(memory, "linux,drconf-usable-memory", &len); + of_node_put(memory); + if (!prop || len < sizeof(unsigned int)) return NULL; return prop; @@ -674,7 +682,7 @@ static void __init parse_drconf_memory(struct device_node *memory) return; /* check if this is a kexec/kdump kernel */ - usm = of_get_usable_memory(memory); + usm = of_get_usable_memory(); if (usm != NULL) is_kexec_kdump = 1; -- cgit v1.2.3 From b88fc309d6ad0abee74053b4a8f78a1a4ed5e5db Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:46:53 -0600 Subject: powerpc/numa: Look up associativity array in of_drconf_to_nid_single Look up the associativity arrays in of_drconf_to_nid_single when deriving the nid for a LMB instead of having it passed in as a parameter. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index baba6403488b..d25278adaead 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -509,26 +509,30 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) * This is like of_node_to_nid_single() for memory represented in the * ibm,dynamic-reconfiguration-memory node. */ -static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, - struct assoc_arrays *aa) +static int of_drconf_to_nid_single(struct of_drconf_cell *drmem) { + struct assoc_arrays aa = { .arrays = NULL }; int default_nid = 0; int nid = default_nid; - int index; + int rc, index; + + rc = of_get_assoc_arrays(&aa); + if (rc) + return default_nid; - if (min_common_depth > 0 && min_common_depth <= aa->array_sz && + if (min_common_depth > 0 && min_common_depth <= aa.array_sz && !(drmem->flags & DRCONF_MEM_AI_INVALID) && - drmem->aa_index < aa->n_arrays) { - index = drmem->aa_index * aa->array_sz + min_common_depth - 1; - nid = of_read_number(&aa->arrays[index], 1); + drmem->aa_index < aa.n_arrays) { + index = drmem->aa_index * aa.array_sz + min_common_depth - 1; + nid = of_read_number(&aa.arrays[index], 1); if (nid == 0xffff || nid >= MAX_NUMNODES) nid = default_nid; if (nid > 0) { - index = drmem->aa_index * aa->array_sz; + index = drmem->aa_index * aa.array_sz; initialize_distance_lookup_table(nid, - &aa->arrays[index]); + &aa.arrays[index]); } } @@ -664,10 +668,9 @@ static inline int __init read_usm_ranges(const __be32 **usm) static void __init parse_drconf_memory(struct device_node *memory) { const __be32 *uninitialized_var(dm), *usm; - unsigned int n, rc, ranges, is_kexec_kdump = 0; + unsigned int n, ranges, is_kexec_kdump = 0; unsigned long lmb_size, base, size, sz; int nid; - struct assoc_arrays aa = { .arrays = NULL }; n = of_get_drconf_memory(memory, &dm); if (!n) @@ -677,10 +680,6 @@ static void __init parse_drconf_memory(struct device_node *memory) if (!lmb_size) return; - rc = of_get_assoc_arrays(&aa); - if (rc) - return; - /* check if this is a kexec/kdump kernel */ usm = of_get_usable_memory(); if (usm != NULL) @@ -711,7 +710,7 @@ static void __init parse_drconf_memory(struct device_node *memory) base = read_n_cells(n_mem_addr_cells, &usm); size = read_n_cells(n_mem_size_cells, &usm); } - nid = of_drconf_to_nid_single(&drmem, &aa); + nid = of_drconf_to_nid_single(&drmem); fake_numa_create_new_node( ((base + size) >> PAGE_SHIFT), &nid); @@ -999,9 +998,8 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, unsigned long scn_addr) { const __be32 *dm; - unsigned int drconf_cell_cnt, rc; + unsigned int drconf_cell_cnt; unsigned long lmb_size; - struct assoc_arrays aa; int nid = -1; drconf_cell_cnt = of_get_drconf_memory(memory, &dm); @@ -1012,10 +1010,6 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, if (!lmb_size) return -1; - rc = of_get_assoc_arrays(&aa); - if (rc) - return -1; - for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { struct of_drconf_cell drmem; @@ -1031,7 +1025,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, || (scn_addr >= (drmem.base_addr + lmb_size))) continue; - nid = of_drconf_to_nid_single(&drmem, &aa); + nid = of_drconf_to_nid_single(&drmem); break; } -- cgit v1.2.3 From 6c6ea53725b357fa3deac96d8d2d4ee785b67c6b Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:47:08 -0600 Subject: powerpc/mm: Separate ibm, dynamic-memory data from DT format We currently have code to parse the dynamic reconfiguration LMB information from the ibm,dynamic-meory device tree property in multiple locations; numa.c, prom.c, and pseries/hotplug-memory.c. In anticipation of adding support for a version 2 of the ibm,dynamic-memory property this patch aims to separate the device tree information from the device tree format. Doing this requires a two step process to avoid a possibly very large bootmem allocation early in boot. During initial boot, new routines are provided to walk the device tree property and make a call-back for each LMB. The second step (introduced in later patches) will allocate an array of LMB information that can be used directly without needing to know the DT format. This approach provides the benefit of consolidating the device tree property parsing to a single location and (eventually) providing a common data structure for retrieving LMB information. This patch introduces a routine to walk the ibm,dynamic-memory property in the flattened device tree and updates the prom.c code to use this to initialize memory. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 48 ++++++++++++++++ arch/powerpc/kernel/prom.c | 115 +++++++++++++++++---------------------- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/drmem.c | 76 ++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 67 deletions(-) create mode 100644 arch/powerpc/include/asm/drmem.h create mode 100644 arch/powerpc/mm/drmem.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h new file mode 100644 index 000000000000..8b1fe465ec9d --- /dev/null +++ b/arch/powerpc/include/asm/drmem.h @@ -0,0 +1,48 @@ +/* + * drmem.h: Power specific logical memory block representation + * + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERPC_LMB_H +#define _ASM_POWERPC_LMB_H + +struct drmem_lmb { + u64 base_addr; + u32 drc_index; + u32 aa_index; + u32 flags; +}; + +struct drmem_lmb_info { + struct drmem_lmb *lmbs; + int n_lmbs; + u32 lmb_size; +}; + +extern struct drmem_lmb_info *drmem_info; + +#define for_each_drmem_lmb_in_range(lmb, start, end) \ + for ((lmb) = (start); (lmb) <= (end); (lmb)++) + +#define for_each_drmem_lmb(lmb) \ + for_each_drmem_lmb_in_range((lmb), \ + &drmem_info->lmbs[0], \ + &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + +static inline u32 drmem_lmb_size(void) +{ + return drmem_info->lmb_size; +} + +#ifdef CONFIG_PPC_PSERIES +void __init walk_drmem_lmbs_early(unsigned long node, + void (*func)(struct drmem_lmb *, const __be32 **)); +#endif + +#endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b15bae265c90..4dffef947b8a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -455,92 +456,74 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES /* - * Interpret the ibm,dynamic-memory property in the - * /ibm,dynamic-reconfiguration-memory node. + * Interpret the ibm dynamic reconfiguration memory LMBs. * This contains a list of memory blocks along with NUMA affinity * information. */ -static int __init early_init_dt_scan_drconf_memory(unsigned long node) +static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm) { - const __be32 *dm, *ls, *usm; - int l; - unsigned long n, flags; - u64 base, size, memblock_size; - unsigned int is_kexec_kdump = 0, rngs; - - ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) - return 0; - memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); + u64 base, size; + int is_kexec_kdump = 0, rngs; - dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(__be32)) - return 0; + base = lmb->base_addr; + size = drmem_lmb_size(); + rngs = 1; - n = of_read_number(dm++, 1); /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) - return 0; + /* + * Skip this block if the reserved bit is set in flags + * or if the block is not assigned to this partition. + */ + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) + return; - /* check if this is a kexec/kdump kernel. */ - usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", - &l); - if (usm != NULL) + if (*usm) is_kexec_kdump = 1; - for (; n != 0; --n) { - base = dt_mem_next_cell(dt_root_addr_cells, &dm); - flags = of_read_number(&dm[3], 1); - /* skip DRC index, pad, assoc. list index, flags */ - dm += 4; - /* skip this block if the reserved bit is set in flags - or if the block is not assigned to this partition */ - if ((flags & DRCONF_MEM_RESERVED) || - !(flags & DRCONF_MEM_ASSIGNED)) - continue; - size = memblock_size; - rngs = 1; + if (is_kexec_kdump) { + /* + * For each memblock in ibm,dynamic-memory, a + * corresponding entry in linux,drconf-usable-memory + * property contains a counter 'p' followed by 'p' + * (base, size) duple. Now read the counter from + * linux,drconf-usable-memory property + */ + rngs = dt_mem_next_cell(dt_root_size_cells, usm); + if (!rngs) /* there are no (base, size) duple */ + return; + } + + do { if (is_kexec_kdump) { - /* - * For each memblock in ibm,dynamic-memory, a corresponding - * entry in linux,drconf-usable-memory property contains - * a counter 'p' followed by 'p' (base, size) duple. - * Now read the counter from - * linux,drconf-usable-memory property - */ - rngs = dt_mem_next_cell(dt_root_size_cells, &usm); - if (!rngs) /* there are no (base, size) duple */ + base = dt_mem_next_cell(dt_root_addr_cells, usm); + size = dt_mem_next_cell(dt_root_size_cells, usm); + } + + if (iommu_is_off) { + if (base >= 0x80000000ul) continue; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; } - do { - if (is_kexec_kdump) { - base = dt_mem_next_cell(dt_root_addr_cells, - &usm); - size = dt_mem_next_cell(dt_root_size_cells, - &usm); - } - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } - memblock_add(base, size); - } while (--rngs); - } - memblock_dump_all(); - return 0; + + DBG("Adding: %llx -> %llx\n", base, size); + memblock_add(base, size); + } while (--rngs); } -#else -#define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ static int __init early_init_dt_scan_memory_ppc(unsigned long node, const char *uname, int depth, void *data) { +#ifdef CONFIG_PPC_PSERIES if (depth == 1 && - strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) - return early_init_dt_scan_drconf_memory(node); + strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { + walk_drmem_lmbs_early(node, early_init_drmem_lmb); + return 0; + } +#endif return early_init_dt_scan_memory(node, uname, depth, data); } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 76a6b057d454..8d271bfe2d94 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ - init-common.o mmu_context.o + init-common.o mmu_context.o drmem.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c new file mode 100644 index 000000000000..f8ee0f355405 --- /dev/null +++ b/arch/powerpc/mm/drmem.c @@ -0,0 +1,76 @@ +/* + * Dynamic reconfiguration memory support + * + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "drmem: " fmt + +#include +#include +#include +#include +#include +#include + +static struct drmem_lmb_info __drmem_info; +struct drmem_lmb_info *drmem_info = &__drmem_info; + +#ifdef CONFIG_PPC_PSERIES +static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, + const __be32 **prop) +{ + const __be32 *p = *prop; + + lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + lmb->drc_index = of_read_number(p++, 1); + + p++; /* skip reserved field */ + + lmb->aa_index = of_read_number(p++, 1); + lmb->flags = of_read_number(p++, 1); + + *prop = p; +} + +static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + struct drmem_lmb lmb; + u32 i, n_lmbs; + + n_lmbs = of_read_number(prop++, 1); + + for (i = 0; i < n_lmbs; i++) { + read_drconf_v1_cell(&lmb, &prop); + func(&lmb, &usm); + } +} + +void __init walk_drmem_lmbs_early(unsigned long node, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + const __be32 *prop, *usm; + int len; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); + if (!prop || len < dt_root_size_cells * sizeof(__be32)) + return; + + drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + + usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len); + + prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); + if (prop) + __walk_drmem_v1_lmbs(prop, usm, func); + + memblock_dump_all(); +} + +#endif -- cgit v1.2.3 From 514a9cb3316a08d63063a40a70f11b4318d3c06c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:47:21 -0600 Subject: powerpc/numa: Update numa code use walk_drmem_lmbs Update code in powerpc/numa.c to use the walk_drmem_lmbs() routine instead of parsing the device tree directly. This is in anticipation of introducing a new ibm,dynamic-memory-v2 property with a different format. This will allow the numa code to use a single initialization routine per-LMB irregardless of the device tree format. Additionally, to support additional routines in numa.c that need to look up LMB information, an late_init routine is added to drmem.c to allocate the array of LMB information. This LMB array will provide per-LMB information to separate the LMB data from the device tree format. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 4 + arch/powerpc/mm/drmem.c | 100 +++++++++++++++++- arch/powerpc/mm/numa.c | 223 +++++++++------------------------------ 3 files changed, 155 insertions(+), 172 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 8b1fe465ec9d..9dbfd38fa385 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -40,6 +40,10 @@ static inline u32 drmem_lmb_size(void) return drmem_info->lmb_size; } +u64 drmem_lmb_memory_max(void); +void __init walk_drmem_lmbs(struct device_node *dn, + void (*func)(struct drmem_lmb *, const __be32 **)); + #ifdef CONFIG_PPC_PSERIES void __init walk_drmem_lmbs_early(unsigned long node, void (*func)(struct drmem_lmb *, const __be32 **)); diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index f8ee0f355405..5888ac3ca8a9 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -21,7 +21,14 @@ static struct drmem_lmb_info __drmem_info; struct drmem_lmb_info *drmem_info = &__drmem_info; -#ifdef CONFIG_PPC_PSERIES +u64 drmem_lmb_memory_max(void) +{ + struct drmem_lmb *last_lmb; + + last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; + return last_lmb->base_addr + drmem_lmb_size(); +} + static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, const __be32 **prop) { @@ -52,6 +59,7 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, } } +#ifdef CONFIG_PPC_PSERIES void __init walk_drmem_lmbs_early(unsigned long node, void (*func)(struct drmem_lmb *, const __be32 **)) { @@ -74,3 +82,93 @@ void __init walk_drmem_lmbs_early(unsigned long node, } #endif + +static int __init init_drmem_lmb_size(struct device_node *dn) +{ + const __be32 *prop; + int len; + + if (drmem_info->lmb_size) + return 0; + + prop = of_get_property(dn, "ibm,lmb-size", &len); + if (!prop || len < dt_root_size_cells * sizeof(__be32)) { + pr_info("Could not determine LMB size\n"); + return -1; + } + + drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + return 0; +} + +/* + * Returns the property linux,drconf-usable-memory if + * it exists (the property exists only in kexec/kdump kernels, + * added by kexec-tools) + */ +static const __be32 *of_get_usable_memory(struct device_node *dn) +{ + const __be32 *prop; + u32 len; + + prop = of_get_property(dn, "linux,drconf-usable-memory", &len); + if (!prop || len < sizeof(unsigned int)) + return NULL; + + return prop; +} + +void __init walk_drmem_lmbs(struct device_node *dn, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + const __be32 *prop, *usm; + + if (init_drmem_lmb_size(dn)) + return; + + usm = of_get_usable_memory(dn); + + prop = of_get_property(dn, "ibm,dynamic-memory", NULL); + if (prop) + __walk_drmem_v1_lmbs(prop, usm, func); +} + +static void __init init_drmem_v1_lmbs(const __be32 *prop) +{ + struct drmem_lmb *lmb; + + drmem_info->n_lmbs = of_read_number(prop++, 1); + + drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), + GFP_KERNEL); + if (!drmem_info->lmbs) + return; + + for_each_drmem_lmb(lmb) + read_drconf_v1_cell(lmb, &prop); +} + +static int __init drmem_init(void) +{ + struct device_node *dn; + const __be32 *prop; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dn) { + pr_info("No dynamic reconfiguration memory found\n"); + return 0; + } + + if (init_drmem_lmb_size(dn)) { + of_node_put(dn); + return 0; + } + + prop = of_get_property(dn, "ibm,dynamic-memory", NULL); + if (prop) + init_drmem_v1_lmbs(prop); + + of_node_put(dn); + return 0; +} +late_initcall(drmem_init); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index d25278adaead..268c7a2d9a5b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -40,6 +40,7 @@ #include #include #include +#include static int numa_enabled = 1; @@ -179,29 +180,6 @@ static const __be32 *of_get_associativity(struct device_node *dev) return of_get_property(dev, "ibm,associativity", NULL); } -/* - * Returns the property linux,drconf-usable-memory if - * it exists (the property exists only in kexec/kdump kernels, - * added by kexec-tools) - */ -static const __be32 *of_get_usable_memory(void) -{ - struct device_node *memory; - const __be32 *prop; - u32 len; - - memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (!memory) - return NULL; - - prop = of_get_property(memory, "linux,drconf-usable-memory", &len); - of_node_put(memory); - - if (!prop || len < sizeof(unsigned int)) - return NULL; - return prop; -} - int __node_distance(int a, int b) { int i; @@ -395,69 +373,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf) return result; } -/* - * Read the next memblock list entry from the ibm,dynamic-memory property - * and return the information in the provided of_drconf_cell structure. - */ -static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp) -{ - const __be32 *cp; - - drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); - - cp = *cellp; - drmem->drc_index = of_read_number(cp, 1); - drmem->reserved = of_read_number(&cp[1], 1); - drmem->aa_index = of_read_number(&cp[2], 1); - drmem->flags = of_read_number(&cp[3], 1); - - *cellp = cp + 4; -} - -/* - * Retrieve and validate the ibm,dynamic-memory property of the device tree. - * - * The layout of the ibm,dynamic-memory property is a number N of memblock - * list entries followed by N memblock list entries. Each memblock list entry - * contains information as laid out in the of_drconf_cell struct above. - */ -static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm) -{ - const __be32 *prop; - u32 len, entries; - - prop = of_get_property(memory, "ibm,dynamic-memory", &len); - if (!prop || len < sizeof(unsigned int)) - return 0; - - entries = of_read_number(prop++, 1); - - /* Now that we know the number of entries, revalidate the size - * of the property read in to ensure we have everything - */ - if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) - return 0; - - *dm = prop; - return entries; -} - -/* - * Retrieve and validate the ibm,lmb-size property for drconf memory - * from the device tree. - */ -static u64 of_get_lmb_size(struct device_node *memory) -{ - const __be32 *prop; - u32 len; - - prop = of_get_property(memory, "ibm,lmb-size", &len); - if (!prop || len < sizeof(unsigned int)) - return 0; - - return read_n_cells(n_mem_size_cells, &prop); -} - struct assoc_arrays { u32 n_arrays; u32 array_sz; @@ -509,7 +424,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) * This is like of_node_to_nid_single() for memory represented in the * ibm,dynamic-reconfiguration-memory node. */ -static int of_drconf_to_nid_single(struct of_drconf_cell *drmem) +static int of_drconf_to_nid_single(struct drmem_lmb *lmb) { struct assoc_arrays aa = { .arrays = NULL }; int default_nid = 0; @@ -521,16 +436,16 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem) return default_nid; if (min_common_depth > 0 && min_common_depth <= aa.array_sz && - !(drmem->flags & DRCONF_MEM_AI_INVALID) && - drmem->aa_index < aa.n_arrays) { - index = drmem->aa_index * aa.array_sz + min_common_depth - 1; + !(lmb->flags & DRCONF_MEM_AI_INVALID) && + lmb->aa_index < aa.n_arrays) { + index = lmb->aa_index * aa.array_sz + min_common_depth - 1; nid = of_read_number(&aa.arrays[index], 1); if (nid == 0xffff || nid >= MAX_NUMNODES) nid = default_nid; if (nid > 0) { - index = drmem->aa_index * aa.array_sz; + index = lmb->aa_index * aa.array_sz; initialize_distance_lookup_table(nid, &aa.arrays[index]); } @@ -665,62 +580,48 @@ static inline int __init read_usm_ranges(const __be32 **usm) * Extract NUMA information from the ibm,dynamic-reconfiguration-memory * node. This assumes n_mem_{addr,size}_cells have been set. */ -static void __init parse_drconf_memory(struct device_node *memory) +static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm) { - const __be32 *uninitialized_var(dm), *usm; - unsigned int n, ranges, is_kexec_kdump = 0; - unsigned long lmb_size, base, size, sz; + unsigned int ranges, is_kexec_kdump = 0; + unsigned long base, size, sz; int nid; - n = of_get_drconf_memory(memory, &dm); - if (!n) - return; - - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) + /* + * Skip this block if the reserved bit is set in flags (0x80) + * or if the block is not assigned to this partition (0x8) + */ + if ((lmb->flags & DRCONF_MEM_RESERVED) + || !(lmb->flags & DRCONF_MEM_ASSIGNED)) return; - /* check if this is a kexec/kdump kernel */ - usm = of_get_usable_memory(); - if (usm != NULL) + if (*usm) is_kexec_kdump = 1; - for (; n != 0; --n) { - struct of_drconf_cell drmem; - - read_drconf_cell(&drmem, &dm); + base = lmb->base_addr; + size = drmem_lmb_size(); + ranges = 1; - /* skip this block if the reserved bit is set in flags (0x80) - or if the block is not assigned to this partition (0x8) */ - if ((drmem.flags & DRCONF_MEM_RESERVED) - || !(drmem.flags & DRCONF_MEM_ASSIGNED)) - continue; - - base = drmem.base_addr; - size = lmb_size; - ranges = 1; + if (is_kexec_kdump) { + ranges = read_usm_ranges(usm); + if (!ranges) /* there are no (base, size) duple */ + return; + } + do { if (is_kexec_kdump) { - ranges = read_usm_ranges(&usm); - if (!ranges) /* there are no (base, size) duple */ - continue; + base = read_n_cells(n_mem_addr_cells, usm); + size = read_n_cells(n_mem_size_cells, usm); } - do { - if (is_kexec_kdump) { - base = read_n_cells(n_mem_addr_cells, &usm); - size = read_n_cells(n_mem_size_cells, &usm); - } - nid = of_drconf_to_nid_single(&drmem); - fake_numa_create_new_node( - ((base + size) >> PAGE_SHIFT), - &nid); - node_set_online(nid); - sz = numa_enforce_memory_limit(base, size); - if (sz) - memblock_set_node(base, sz, - &memblock.memory, nid); - } while (--ranges); - } + + nid = of_drconf_to_nid_single(lmb); + fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), + &nid); + node_set_online(nid); + sz = numa_enforce_memory_limit(base, size); + if (sz) + memblock_set_node(base, sz, &memblock.memory, nid); + } while (--ranges); } static int __init parse_numa_properties(void) @@ -815,8 +716,10 @@ new_range: * ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (memory) - parse_drconf_memory(memory); + if (memory) { + walk_drmem_lmbs(memory, numa_setup_drmem_lmb); + of_node_put(memory); + } return 0; } @@ -994,38 +897,26 @@ early_param("topology_updates", early_topology_updates); * memory represented in the device tree by the property * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory. */ -static int hot_add_drconf_scn_to_nid(struct device_node *memory, - unsigned long scn_addr) +static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { - const __be32 *dm; - unsigned int drconf_cell_cnt; + struct drmem_lmb *lmb; unsigned long lmb_size; int nid = -1; - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - if (!drconf_cell_cnt) - return -1; - - lmb_size = of_get_lmb_size(memory); - if (!lmb_size) - return -1; - - for (; drconf_cell_cnt != 0; --drconf_cell_cnt) { - struct of_drconf_cell drmem; - - read_drconf_cell(&drmem, &dm); + lmb_size = drmem_lmb_size(); + for_each_drmem_lmb(lmb) { /* skip this block if it is reserved or not assigned to * this partition */ - if ((drmem.flags & DRCONF_MEM_RESERVED) - || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) + || !(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; - if ((scn_addr < drmem.base_addr) - || (scn_addr >= (drmem.base_addr + lmb_size))) + if ((scn_addr < lmb->base_addr) + || (scn_addr >= (lmb->base_addr + lmb_size))) continue; - nid = of_drconf_to_nid_single(&drmem); + nid = of_drconf_to_nid_single(lmb); break; } @@ -1090,7 +981,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - nid = hot_add_drconf_scn_to_nid(memory, scn_addr); + nid = hot_add_drconf_scn_to_nid(scn_addr); of_node_put(memory); } else { nid = hot_add_node_scn_to_nid(scn_addr); @@ -1106,11 +997,7 @@ static u64 hot_add_drconf_memory_max(void) { struct device_node *memory = NULL; struct device_node *dn = NULL; - unsigned int drconf_cell_cnt = 0; - u64 lmb_size = 0; - const __be32 *dm = NULL; const __be64 *lrdr = NULL; - struct of_drconf_cell drmem; dn = of_find_node_by_path("/rtas"); if (dn) { @@ -1122,14 +1009,8 @@ static u64 hot_add_drconf_memory_max(void) memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - lmb_size = of_get_lmb_size(memory); - - /* Advance to the last cell, each cell has 6 32 bit integers */ - dm += (drconf_cell_cnt - 1) * 6; - read_drconf_cell(&drmem, &dm); of_node_put(memory); - return drmem.base_addr + lmb_size; + return drmem_lmb_memory_max(); } return 0; } -- cgit v1.2.3 From 6195a5001f1d11e1ff6a7e47a865f4b42c1bb28c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:47:31 -0600 Subject: powerpc/pseries: Update memory hotplug code to use drmem LMB array Update the pseries memory hotplug code to use the newly added dynamic reconfiguration LMB array. Doing this is required for the upcoming support of version 2 of the dynamic reconfiguration device tree property. In addition, making this change cleans up the code that parses the LMB information as we no longer need to worry about device tree format. This allows us to discard one of the first steps on memory hotplug where we make a working copy of the device tree property and convert the entire property to cpu format. Instead we just use the LMB array directly while holding the memory hotplug lock. This patch also moves the updating of the device tree property to powerpc/mm/drmem.c. This allows to the hotplug code to work without needing to know the device tree format and provides a single routine for updating the device tree property. This new routine will handle determination of the proper device tree format and generate a properly formatted device tree property. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 18 + arch/powerpc/mm/drmem.c | 81 ++++ arch/powerpc/platforms/pseries/hotplug-memory.c | 516 +++++++++--------------- 3 files changed, 297 insertions(+), 318 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 9dbfd38fa385..464e061a6616 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -40,9 +40,27 @@ static inline u32 drmem_lmb_size(void) return drmem_info->lmb_size; } +#define DRMEM_LMB_RESERVED 0x80000000 + +static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb) +{ + lmb->flags |= DRMEM_LMB_RESERVED; +} + +static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb) +{ + lmb->flags &= ~DRMEM_LMB_RESERVED; +} + +static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb) +{ + return lmb->flags & DRMEM_LMB_RESERVED; +} + u64 drmem_lmb_memory_max(void); void __init walk_drmem_lmbs(struct device_node *dn, void (*func)(struct drmem_lmb *, const __be32 **)); +int drmem_update_dt(void); #ifdef CONFIG_PPC_PSERIES void __init walk_drmem_lmbs_early(unsigned long node, diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 5888ac3ca8a9..05ba0c7dcbed 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -29,6 +29,87 @@ u64 drmem_lmb_memory_max(void) return last_lmb->base_addr + drmem_lmb_size(); } +static u32 drmem_lmb_flags(struct drmem_lmb *lmb) +{ + /* + * Return the value of the lmb flags field minus the reserved + * bit used internally for hotplug processing. + */ + return lmb->flags & ~DRMEM_LMB_RESERVED; +} + +static struct property *clone_property(struct property *prop, u32 prop_sz) +{ + struct property *new_prop; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return NULL; + + new_prop->name = kstrdup(prop->name, GFP_KERNEL); + new_prop->value = kzalloc(prop_sz, GFP_KERNEL); + if (!new_prop->name || !new_prop->value) { + kfree(new_prop->name); + kfree(new_prop->value); + kfree(new_prop); + return NULL; + } + + new_prop->length = prop_sz; +#if defined(CONFIG_OF_DYNAMIC) + of_property_set_flag(new_prop, OF_DYNAMIC); +#endif + return new_prop; +} + +static int drmem_update_dt_v1(struct device_node *memory, + struct property *prop) +{ + struct property *new_prop; + struct of_drconf_cell *dr_cell; + struct drmem_lmb *lmb; + u32 *p; + + new_prop = clone_property(prop, prop->length); + if (!new_prop) + return -1; + + p = new_prop->value; + *p++ = cpu_to_be32(drmem_info->n_lmbs); + + dr_cell = (struct of_drconf_cell *)p; + + for_each_drmem_lmb(lmb) { + dr_cell->base_addr = cpu_to_be64(lmb->base_addr); + dr_cell->drc_index = cpu_to_be32(lmb->drc_index); + dr_cell->aa_index = cpu_to_be32(lmb->aa_index); + dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb)); + + dr_cell++; + } + + of_update_property(memory, new_prop); + return 0; +} + +int drmem_update_dt(void) +{ + struct device_node *memory; + struct property *prop; + int rc = -1; + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!memory) + return -1; + + prop = of_find_property(memory, "ibm,dynamic-memory", NULL); + if (prop) + rc = drmem_update_dt_v1(memory, prop); + + of_node_put(memory); + return rc; +} + static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, const __be32 **prop) { diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 1d48ab424bd9..2043bc2b77b3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "pseries.h" static bool rtas_hp_event; @@ -100,100 +101,6 @@ static struct property *dlpar_clone_property(struct property *prop, return new_prop; } -static struct property *dlpar_clone_drconf_property(struct device_node *dn) -{ - struct property *prop, *new_prop; - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - prop = of_find_property(dn, "ibm,dynamic-memory", NULL); - if (!prop) - return NULL; - - new_prop = dlpar_clone_property(prop, prop->length); - if (!new_prop) - return NULL; - - /* Convert the property to cpu endian-ness */ - p = new_prop->value; - *p = be32_to_cpu(*p); - - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr); - lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index); - lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index); - lmbs[i].flags = be32_to_cpu(lmbs[i].flags); - } - - return new_prop; -} - -static void dlpar_update_drconf_property(struct device_node *dn, - struct property *prop) -{ - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i; - - /* Convert the property back to BE */ - p = prop->value; - num_lmbs = *p; - *p = cpu_to_be32(*p); - p++; - - lmbs = (struct of_drconf_cell *)p; - for (i = 0; i < num_lmbs; i++) { - lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr); - lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index); - lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index); - lmbs[i].flags = cpu_to_be32(lmbs[i].flags); - } - - rtas_hp_event = true; - of_update_property(dn, prop); - rtas_hp_event = false; -} - -static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb) -{ - struct device_node *dn; - struct property *prop; - struct of_drconf_cell *lmbs; - u32 *p, num_lmbs; - int i; - - dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (!dn) - return -ENODEV; - - prop = dlpar_clone_drconf_property(dn); - if (!prop) { - of_node_put(dn); - return -ENODEV; - } - - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == lmb->drc_index) { - lmbs[i].flags = lmb->flags; - lmbs[i].aa_index = lmb->aa_index; - - dlpar_update_drconf_property(dn, prop); - break; - } - } - - of_node_put(dn); - return 0; -} - static u32 find_aa_index(struct device_node *dr_node, struct property *ala_prop, const u32 *lmb_assoc) { @@ -256,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node, return aa_index; } -static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) +static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb) { struct device_node *parent, *lmb_node, *dr_node; struct property *ala_prop; @@ -299,9 +206,9 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) return aa_index; } -static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb) +static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb) { - int aa_index; + int rc, aa_index; lmb->flags |= DRCONF_MEM_ASSIGNED; @@ -313,17 +220,29 @@ static int dlpar_add_device_tree_lmb(struct of_drconf_cell *lmb) } lmb->aa_index = aa_index; - return dlpar_update_device_tree_lmb(lmb); + + rtas_hp_event = true; + rc = drmem_update_dt(); + rtas_hp_event = false; + + return rc; } -static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb) { + int rc; + lmb->flags &= ~DRCONF_MEM_ASSIGNED; lmb->aa_index = 0xffffffff; - return dlpar_update_device_tree_lmb(lmb); + + rtas_hp_event = true; + rc = drmem_update_dt(); + rtas_hp_event = false; + + return rc; } -static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) { unsigned long section_nr; struct mem_section *mem_sect; @@ -336,7 +255,36 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) return mem_block; } -static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online) +static int get_lmb_range(u32 drc_index, int n_lmbs, + struct drmem_lmb **start_lmb, + struct drmem_lmb **end_lmb) +{ + struct drmem_lmb *lmb, *start, *end; + struct drmem_lmb *last_lmb; + + start = NULL; + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { + start = lmb; + break; + } + } + + if (!start) + return -EINVAL; + + end = &start[n_lmbs - 1]; + + last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; + if (end > last_lmb) + return -EINVAL; + + *start_lmb = start; + *end_lmb = end; + return 0; +} + +static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) { struct memory_block *mem_block; int rc; @@ -357,13 +305,13 @@ static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online) return rc; } -static int dlpar_online_lmb(struct of_drconf_cell *lmb) +static int dlpar_online_lmb(struct drmem_lmb *lmb) { return dlpar_change_lmb_state(lmb, true); } #ifdef CONFIG_MEMORY_HOTREMOVE -static int dlpar_offline_lmb(struct of_drconf_cell *lmb) +static int dlpar_offline_lmb(struct drmem_lmb *lmb) { return dlpar_change_lmb_state(lmb, false); } @@ -426,7 +374,7 @@ static int pseries_remove_mem_node(struct device_node *np) return 0; } -static bool lmb_is_removable(struct of_drconf_cell *lmb) +static bool lmb_is_removable(struct drmem_lmb *lmb) { int i, scns_per_block; int rc = 1; @@ -458,9 +406,9 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) return rc ? true : false; } -static int dlpar_add_lmb(struct of_drconf_cell *); +static int dlpar_add_lmb(struct drmem_lmb *); -static int dlpar_remove_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; int nid, rc; @@ -484,28 +432,25 @@ static int dlpar_remove_lmb(struct of_drconf_cell *lmb) return 0; } -static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, - struct property *prop) +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { - struct of_drconf_cell *lmbs; + struct drmem_lmb *lmb; int lmbs_removed = 0; int lmbs_available = 0; - u32 num_lmbs, *p; - int i, rc; + int rc; pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove); if (lmbs_to_remove == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = 0; i < num_lmbs; i++) { - if (lmb_is_removable(&lmbs[i])) + for_each_drmem_lmb(lmb) { + if (lmb_is_removable(lmb)) lmbs_available++; + + if (lmbs_available == lmbs_to_remove) + break; } if (lmbs_available < lmbs_to_remove) { @@ -514,45 +459,47 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, return -EINVAL; } - for (i = 0; i < num_lmbs && lmbs_removed < lmbs_to_remove; i++) { - rc = dlpar_remove_lmb(&lmbs[i]); + for_each_drmem_lmb(lmb) { + rc = dlpar_remove_lmb(lmb); if (rc) continue; - lmbs_removed++; - /* Mark this lmb so we can add it later if all of the * requested LMBs cannot be removed. */ - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); + + lmbs_removed++; + if (lmbs_removed == lmbs_to_remove) + break; } if (lmbs_removed != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB back, drc index %x\n", - lmbs[i].drc_index); + lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); pr_info("Memory at %llx was hot-removed\n", - lmbs[i].base_addr); + lmb->base_addr); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = 0; } @@ -560,26 +507,21 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, return rc; } -static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_remove_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmb_found; - int i, rc; + int rc; pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (!rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); break; } @@ -590,35 +532,30 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) if (rc) pr_info("Failed to hot-remove memory at %llx\n", - lmbs[i].base_addr); + lmb->base_addr); else - pr_info("Memory at %llx was hot-removed\n", lmbs[i].base_addr); + pr_info("Memory at %llx was hot-removed\n", lmb->base_addr); return rc; } -static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_readd_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmb_found; - int i, rc; + int rc; pr_info("Attempting to update LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (!rc) { - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); } break; } @@ -629,20 +566,18 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) if (rc) pr_info("Failed to update memory at %llx\n", - lmbs[i].base_addr); + lmb->base_addr); else - pr_info("Memory at %llx was updated\n", lmbs[i].base_addr); + pr_info("Memory at %llx was updated\n", lmb->base_addr); return rc; } -static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, - struct property *prop) +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, rc, start_lmb_found; - int lmbs_available = 0, start_index = 0, end_index; + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int lmbs_available = 0; + int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", lmbs_to_remove, drc_index); @@ -650,29 +585,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, if (lmbs_to_remove == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - start_lmb_found = 0; - - /* Navigate to drc_index */ - while (start_index < num_lmbs) { - if (lmbs[start_index].drc_index == drc_index) { - start_lmb_found = 1; - break; - } - - start_index++; - } - - if (!start_lmb_found) + rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb); + if (rc) return -EINVAL; - end_index = start_index + lmbs_to_remove; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_RESERVED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) break; lmbs_available++; @@ -681,42 +600,43 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, if (lmbs_available < lmbs_to_remove) return -EINVAL; - for (i = start_index; i < end_index; i++) { - if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED)) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) break; - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); } if (rc) { pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n"); - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); pr_info("Memory at %llx (drc index %x) was hot-removed\n", - lmbs[i].base_addr, lmbs[i].drc_index); + lmb->base_addr, lmb->drc_index); - lmbs[i].reserved = 0; + drmem_remove_lmb_reservation(lmb); } } @@ -737,32 +657,30 @@ static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) { return -EOPNOTSUPP; } -static int dlpar_remove_lmb(struct of_drconf_cell *lmb) +static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_count(u32 lmbs_to_remove, - struct property *prop) +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_remove_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_readd_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, - struct property *prop) +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { return -EOPNOTSUPP; } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int dlpar_add_lmb(struct of_drconf_cell *lmb) +static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; int nid, rc; @@ -801,77 +719,79 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) return rc; } -static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) +static int dlpar_memory_add_by_count(u32 lmbs_to_add) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; + struct drmem_lmb *lmb; int lmbs_available = 0; int lmbs_added = 0; - int i, rc; + int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); if (lmbs_to_add == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - /* Validate that there are enough LMBs to satisfy the request */ - for (i = 0; i < num_lmbs; i++) { - if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED)) + for_each_drmem_lmb(lmb) { + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; + + if (lmbs_available == lmbs_to_add) + break; } if (lmbs_available < lmbs_to_add) return -EINVAL; - for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (rc) continue; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) { - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); continue; } - lmbs_added++; - /* Mark this lmb so we can remove it later if all of the * requested LMBs cannot be added. */ - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); + + lmbs_added++; + if (lmbs_added == lmbs_to_add) + break; } if (lmbs_added != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) pr_err("Failed to remove LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); else - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = 0; i < num_lmbs; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) continue; pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, lmbs[i].drc_index); - lmbs[i].reserved = 0; + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); } rc = 0; } @@ -879,28 +799,22 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add, struct property *prop) return rc; } -static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) +static int dlpar_memory_add_by_index(u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, lmb_found; - int rc; + struct drmem_lmb *lmb; + int rc, lmb_found; pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index); - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - lmb_found = 0; - for (i = 0; i < num_lmbs; i++) { - if (lmbs[i].drc_index == drc_index) { + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { lmb_found = 1; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (!rc) { - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); } break; @@ -914,18 +828,16 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop) pr_info("Failed to hot-add memory, drc index %x\n", drc_index); else pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, drc_index); + lmb->base_addr, drc_index); return rc; } -static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, - struct property *prop) +static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { - struct of_drconf_cell *lmbs; - u32 num_lmbs, *p; - int i, rc, start_lmb_found; - int lmbs_available = 0, start_index = 0, end_index; + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int lmbs_available = 0; + int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", lmbs_to_add, drc_index); @@ -933,29 +845,13 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, if (lmbs_to_add == 0) return -EINVAL; - p = prop->value; - num_lmbs = *p++; - lmbs = (struct of_drconf_cell *)p; - start_lmb_found = 0; - - /* Navigate to drc_index */ - while (start_index < num_lmbs) { - if (lmbs[start_index].drc_index == drc_index) { - start_lmb_found = 1; - break; - } - - start_index++; - } - - if (!start_lmb_found) + rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb); + if (rc) return -EINVAL; - end_index = start_index + lmbs_to_add; - /* Validate that the LMBs in this range are not reserved */ - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_RESERVED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) break; lmbs_available++; @@ -964,46 +860,48 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, if (lmbs_available < lmbs_to_add) return -EINVAL; - for (i = start_index; i < end_index; i++) { - if (lmbs[i].flags & DRCONF_MEM_ASSIGNED) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; - rc = dlpar_acquire_drc(lmbs[i].drc_index); + rc = dlpar_acquire_drc(lmb->drc_index); if (rc) break; - rc = dlpar_add_lmb(&lmbs[i]); + rc = dlpar_add_lmb(lmb); if (rc) { - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); break; } - lmbs[i].reserved = 1; + drmem_mark_lmb_reserved(lmb); } if (rc) { pr_err("Memory indexed-count-add failed, removing any added LMBs\n"); - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; - rc = dlpar_remove_lmb(&lmbs[i]); + rc = dlpar_remove_lmb(lmb); if (rc) pr_err("Failed to remove LMB, drc index %x\n", - be32_to_cpu(lmbs[i].drc_index)); + lmb->drc_index); else - dlpar_release_drc(lmbs[i].drc_index); + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); } rc = -EINVAL; } else { - for (i = start_index; i < end_index; i++) { - if (!lmbs[i].reserved) + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) continue; pr_info("Memory at %llx (drc index %x) was hot-added\n", - lmbs[i].base_addr, lmbs[i].drc_index); - lmbs[i].reserved = 0; + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); } } @@ -1012,37 +910,23 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index, int dlpar_memory(struct pseries_hp_errorlog *hp_elog) { - struct device_node *dn; - struct property *prop; u32 count, drc_index; int rc; lock_device_hotplug(); - dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (!dn) { - rc = -EINVAL; - goto dlpar_memory_out; - } - - prop = dlpar_clone_drconf_property(dn); - if (!prop) { - rc = -EINVAL; - goto dlpar_memory_out; - } - switch (hp_elog->action) { case PSERIES_HP_ELOG_ACTION_ADD: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) { count = hp_elog->_drc_u.drc_count; - rc = dlpar_memory_add_by_count(count, prop); + rc = dlpar_memory_add_by_count(count); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_add_by_index(drc_index, prop); + rc = dlpar_memory_add_by_index(drc_index); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) { count = hp_elog->_drc_u.ic.count; drc_index = hp_elog->_drc_u.ic.index; - rc = dlpar_memory_add_by_ic(count, drc_index, prop); + rc = dlpar_memory_add_by_ic(count, drc_index); } else { rc = -EINVAL; } @@ -1051,14 +935,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) { count = hp_elog->_drc_u.drc_count; - rc = dlpar_memory_remove_by_count(count, prop); + rc = dlpar_memory_remove_by_count(count); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_remove_by_index(drc_index, prop); + rc = dlpar_memory_remove_by_index(drc_index); } else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) { count = hp_elog->_drc_u.ic.count; drc_index = hp_elog->_drc_u.ic.index; - rc = dlpar_memory_remove_by_ic(count, drc_index, prop); + rc = dlpar_memory_remove_by_ic(count, drc_index); } else { rc = -EINVAL; } @@ -1066,7 +950,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; case PSERIES_HP_ELOG_ACTION_READD: drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_readd_by_index(drc_index, prop); + rc = dlpar_memory_readd_by_index(drc_index); break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); @@ -1074,10 +958,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - dlpar_free_property(prop); - -dlpar_memory_out: - of_node_put(dn); unlock_device_hotplug(); return rc; } -- cgit v1.2.3 From 2c77721552e565e900705f4499067f8d37be3976 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:47:42 -0600 Subject: powerpc: Move of_drconf_cell struct to asm/drmem.h Now that the powerpc code parses dynamic reconfiguration memory LMB information from the LMB array and not the device tree directly we can move the of_drconf_cell struct to drmem.h where it fits better. In addition, the struct is renamed to of_drconf_cell_v1 in anticipation of upcoming support for version 2 of the dynamic reconfiguration property and the members are typed as __be* values to reflect how they exist in the device tree. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 19 +++++++++++++++++++ arch/powerpc/include/asm/prom.h | 16 ---------------- arch/powerpc/mm/drmem.c | 4 ++-- arch/powerpc/platforms/pseries/hotplug-memory.c | 6 +++--- 4 files changed, 24 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 464e061a6616..54241456420e 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -35,6 +35,25 @@ extern struct drmem_lmb_info *drmem_info; &drmem_info->lmbs[0], \ &drmem_info->lmbs[drmem_info->n_lmbs - 1]) +/* + * The of_drconf_cell_v1 struct defines the layout of the LMB data + * specified in the ibm,dynamic-memory device tree property. + * The property itself is a 32-bit value specifying the number of + * LMBs followed by an array of of_drconf_cell_v1 entries, one + * per LMB. + */ +struct of_drconf_cell_v1 { + __be64 base_addr; + __be32 drc_index; + __be32 reserved; + __be32 aa_index; + __be32 flags; +}; + +#define DRCONF_MEM_ASSIGNED 0x00000008 +#define DRCONF_MEM_AI_INVALID 0x00000040 +#define DRCONF_MEM_RESERVED 0x00000080 + static inline u32 drmem_lmb_size(void) { return drmem_info->lmb_size; diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 825bd5998701..f0a30a003bd8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -80,22 +80,6 @@ extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); -/* The of_drconf_cell struct defines the layout of the LMB array - * specified in the device tree property - * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory - */ -struct of_drconf_cell { - u64 base_addr; - u32 drc_index; - u32 reserved; - u32 aa_index; - u32 flags; -}; - -#define DRCONF_MEM_ASSIGNED 0x00000008 -#define DRCONF_MEM_AI_INVALID 0x00000040 -#define DRCONF_MEM_RESERVED 0x00000080 - /* * There are two methods for telling firmware what our capabilities are. * Newer machines have an "ibm,client-architecture-support" method on the diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 05ba0c7dcbed..61bc183013ae 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -66,7 +66,7 @@ static int drmem_update_dt_v1(struct device_node *memory, struct property *prop) { struct property *new_prop; - struct of_drconf_cell *dr_cell; + struct of_drconf_cell_v1 *dr_cell; struct drmem_lmb *lmb; u32 *p; @@ -77,7 +77,7 @@ static int drmem_update_dt_v1(struct device_node *memory, p = new_prop->value; *p++ = cpu_to_be32(drmem_info->n_lmbs); - dr_cell = (struct of_drconf_cell *)p; + dr_cell = (struct of_drconf_cell_v1 *)p; for_each_drmem_lmb(lmb) { dr_cell->base_addr = cpu_to_be64(lmb->base_addr); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2043bc2b77b3..c1578f54c626 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -996,7 +996,7 @@ static int pseries_add_mem_node(struct device_node *np) static int pseries_update_drconf_memory(struct of_reconfig_data *pr) { - struct of_drconf_cell *new_drmem, *old_drmem; + struct of_drconf_cell_v1 *new_drmem, *old_drmem; unsigned long memblock_size; u32 entries; __be32 *p; @@ -1019,11 +1019,11 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr) * of_drconf_cell's. */ entries = be32_to_cpu(*p++); - old_drmem = (struct of_drconf_cell *)p; + old_drmem = (struct of_drconf_cell_v1 *)p; p = (__be32 *)pr->prop->value; p++; - new_drmem = (struct of_drconf_cell *)p; + new_drmem = (struct of_drconf_cell_v1 *)p; for (i = 0; i < entries; i++) { if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && -- cgit v1.2.3 From 2b31e3aec1dbaae5f0a6f0e485fcb1ff1aceb6cf Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:47:53 -0600 Subject: powerpc/drmem: Add support for ibm, dynamic-memory-v2 property The Power Hypervisor has introduced a new device tree format for the property describing the dynamic reconfiguration LMBs for a system, ibm,dynamic-memory-v2. This new format condenses the size of the property, especially on large memory systems, by reporting sets of LMBs that have the same properties (flags and associativity array index). This patch updates the powerpc/mm/drmem.c code to provide routines that can parse the new device tree format during the walk_drmem_lmb* routines used during boot, the creation of the LMB array, and updating the device tree to create a new property in the proper format for ibm,dynamic-memory-v2. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/drmem.h | 13 +++ arch/powerpc/mm/drmem.c | 192 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 201 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 54241456420e..ce242b9ea8c6 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -50,6 +50,19 @@ struct of_drconf_cell_v1 { __be32 flags; }; +/* + * Version 2 of the ibm,dynamic-memory property is defined as a + * 32-bit value specifying the number of LMB sets followed by an + * array of of_drconf_cell_v2 entries, one per LMB set. + */ +struct of_drconf_cell_v2 { + u32 seq_lmbs; + u64 base_addr; + u32 drc_index; + u32 aa_index; + u32 flags; +} __packed; + #define DRCONF_MEM_ASSIGNED 0x00000008 #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 61bc183013ae..1604110c4238 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -92,6 +92,84 @@ static int drmem_update_dt_v1(struct device_node *memory, return 0; } +static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, + struct drmem_lmb *lmb) +{ + dr_cell->base_addr = cpu_to_be64(lmb->base_addr); + dr_cell->drc_index = cpu_to_be32(lmb->drc_index); + dr_cell->aa_index = cpu_to_be32(lmb->aa_index); + dr_cell->flags = cpu_to_be32(lmb->flags); +} + +static int drmem_update_dt_v2(struct device_node *memory, + struct property *prop) +{ + struct property *new_prop; + struct of_drconf_cell_v2 *dr_cell; + struct drmem_lmb *lmb, *prev_lmb; + u32 lmb_sets, prop_sz, seq_lmbs; + u32 *p; + + /* First pass, determine how many LMB sets are needed. */ + lmb_sets = 0; + prev_lmb = NULL; + for_each_drmem_lmb(lmb) { + if (!prev_lmb) { + prev_lmb = lmb; + lmb_sets++; + continue; + } + + if (prev_lmb->aa_index != lmb->aa_index || + prev_lmb->flags != lmb->flags) + lmb_sets++; + + prev_lmb = lmb; + } + + prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32); + new_prop = clone_property(prop, prop_sz); + if (!new_prop) + return -1; + + p = new_prop->value; + *p++ = cpu_to_be32(lmb_sets); + + dr_cell = (struct of_drconf_cell_v2 *)p; + + /* Second pass, populate the LMB set data */ + prev_lmb = NULL; + seq_lmbs = 0; + for_each_drmem_lmb(lmb) { + if (prev_lmb == NULL) { + /* Start of first LMB set */ + prev_lmb = lmb; + init_drconf_v2_cell(dr_cell, lmb); + seq_lmbs++; + continue; + } + + if (prev_lmb->aa_index != lmb->aa_index || + prev_lmb->flags != lmb->flags) { + /* end of one set, start of another */ + dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); + dr_cell++; + + init_drconf_v2_cell(dr_cell, lmb); + seq_lmbs = 1; + } else { + seq_lmbs++; + } + + prev_lmb = lmb; + } + + /* close out last LMB set */ + dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs); + of_update_property(memory, new_prop); + return 0; +} + int drmem_update_dt(void) { struct device_node *memory; @@ -103,8 +181,13 @@ int drmem_update_dt(void) return -1; prop = of_find_property(memory, "ibm,dynamic-memory", NULL); - if (prop) + if (prop) { rc = drmem_update_dt_v1(memory, prop); + } else { + prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL); + if (prop) + rc = drmem_update_dt_v2(memory, prop); + } of_node_put(memory); return rc; @@ -140,6 +223,47 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, } } +static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, + const __be32 **prop) +{ + const __be32 *p = *prop; + + dr_cell->seq_lmbs = of_read_number(p++, 1); + dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + dr_cell->drc_index = of_read_number(p++, 1); + dr_cell->aa_index = of_read_number(p++, 1); + dr_cell->flags = of_read_number(p++, 1); + + *prop = p; +} + +static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, + void (*func)(struct drmem_lmb *, const __be32 **)) +{ + struct of_drconf_cell_v2 dr_cell; + struct drmem_lmb lmb; + u32 i, j, lmb_sets; + + lmb_sets = of_read_number(prop++, 1); + + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &prop); + + for (j = 0; j < dr_cell.seq_lmbs; j++) { + lmb.base_addr = dr_cell.base_addr; + dr_cell.base_addr += drmem_lmb_size(); + + lmb.drc_index = dr_cell.drc_index; + dr_cell.drc_index++; + + lmb.aa_index = dr_cell.aa_index; + lmb.flags = dr_cell.flags; + + func(&lmb, &usm); + } + } +} + #ifdef CONFIG_PPC_PSERIES void __init walk_drmem_lmbs_early(unsigned long node, void (*func)(struct drmem_lmb *, const __be32 **)) @@ -156,8 +280,14 @@ void __init walk_drmem_lmbs_early(unsigned long node, usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len); prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); - if (prop) + if (prop) { __walk_drmem_v1_lmbs(prop, usm, func); + } else { + prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", + &len); + if (prop) + __walk_drmem_v2_lmbs(prop, usm, func); + } memblock_dump_all(); } @@ -210,8 +340,13 @@ void __init walk_drmem_lmbs(struct device_node *dn, usm = of_get_usable_memory(dn); prop = of_get_property(dn, "ibm,dynamic-memory", NULL); - if (prop) + if (prop) { __walk_drmem_v1_lmbs(prop, usm, func); + } else { + prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); + if (prop) + __walk_drmem_v2_lmbs(prop, usm, func); + } } static void __init init_drmem_v1_lmbs(const __be32 *prop) @@ -229,6 +364,50 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) read_drconf_v1_cell(lmb, &prop); } +static void __init init_drmem_v2_lmbs(const __be32 *prop) +{ + struct drmem_lmb *lmb; + struct of_drconf_cell_v2 dr_cell; + const __be32 *p; + u32 i, j, lmb_sets; + int lmb_index; + + lmb_sets = of_read_number(prop++, 1); + + /* first pass, calculate the number of LMBs */ + p = prop; + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &p); + drmem_info->n_lmbs += dr_cell.seq_lmbs; + } + + drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb), + GFP_KERNEL); + if (!drmem_info->lmbs) + return; + + /* second pass, read in the LMB information */ + lmb_index = 0; + p = prop; + + for (i = 0; i < lmb_sets; i++) { + read_drconf_v2_cell(&dr_cell, &p); + + for (j = 0; j < dr_cell.seq_lmbs; j++) { + lmb = &drmem_info->lmbs[lmb_index++]; + + lmb->base_addr = dr_cell.base_addr; + dr_cell.base_addr += drmem_info->lmb_size; + + lmb->drc_index = dr_cell.drc_index; + dr_cell.drc_index++; + + lmb->aa_index = dr_cell.aa_index; + lmb->flags = dr_cell.flags; + } + } +} + static int __init drmem_init(void) { struct device_node *dn; @@ -246,8 +425,13 @@ static int __init drmem_init(void) } prop = of_get_property(dn, "ibm,dynamic-memory", NULL); - if (prop) + if (prop) { init_drmem_v1_lmbs(prop); + } else { + prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); + if (prop) + init_drmem_v2_lmbs(prop); + } of_node_put(dn); return 0; -- cgit v1.2.3 From 0c38ed6f6f0b78a404fe46767d21504b37af8705 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 1 Dec 2017 10:48:03 -0600 Subject: powerpc/pseries: Enable support of ibm,dynamic-memory-v2 Add required bits to the architecture vector to enable support of the ibm,dynamic-memory-v2 device tree property. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/firmware.h | 3 ++- arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/prom_init.c | 1 + arch/powerpc/platforms/pseries/firmware.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 8645897472b1..832df61f30ef 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -51,6 +51,7 @@ #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) +#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -67,7 +68,7 @@ enum { FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | - FW_FEATURE_HPT_RESIZE, + FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index f0a30a003bd8..9f27866e3126 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -143,6 +143,7 @@ extern int of_get_ibm_chip_id(struct device_node *np); #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ +#define OV5_DRMEM_V2 0x1680 /* ibm,dynamic-reconfiguration-v2 */ #define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */ #define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */ #define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 02190e90c7ae..acf4b2e0530c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -869,6 +869,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, + .byte22 = OV5_FEAT(OV5_DRMEM_V2), .intarch = 0, .mmu = 0, .hash_ext = 0, diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 63cc82ad58ac..aac3ea2911b2 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -114,6 +114,7 @@ static __initdata struct vec5_fw_feature vec5_fw_features_table[] = { {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY}, {FW_FEATURE_PRRN, OV5_PRRN}, + {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, }; static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) -- cgit v1.2.3 From 94d3084a0f8cbca9ceea2835ac842c7c92c8790c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 8 Dec 2017 17:34:29 +0100 Subject: powerpc/32s: Fix compile error with CONFIG_PPC_PTDUMP This patch remove CONFIG_PPC_HTDUMP if not PPC_BOOK3S_64 to avoid below compile failure on BOOK3S_32: In file included from arch/powerpc/mm/dump_hashpagetable.c:27:0: ./arch/powerpc/include/asm/plpar_wrappers.h: In function 'get_cede_latency_hint': ./arch/powerpc/include/asm/plpar_wrappers.h:27:2: error: implicit declaration of function 'get_lppaca' [-Werror=implicit-function-declaration] ... arch/powerpc/mm/dump_hashpagetable.c: At top level: arch/powerpc/mm/dump_hashpagetable.c:69:13: error: 'SLB_VSID_B' undeclared here (not in a function) ... arch/powerpc/mm/dump_hashpagetable.c:506:38: error: 'VMEMMAP_BASE' undeclared (first use in this function) arch/powerpc/mm/dump_hashpagetable.c:506:35: error: assignment makes integer from pointer without a cast [-Werror] Fixes: dd5ac03e0955 ("powerpc/mm: Fix page table dump build on non-Book3S") Signed-off-by: Christophe Leroy [mpe: Trim change log] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 657c33cd4eee..5d18169fff58 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -368,7 +368,7 @@ config PPC_PTDUMP config PPC_HTDUMP def_bool y - depends on PPC_PTDUMP && PPC_BOOK3S + depends on PPC_PTDUMP && PPC_BOOK3S_64 config PPC_FAST_ENDIAN_SWITCH bool "Deprecated fast endian-switch syscall" -- cgit v1.2.3 From 9625e69a3818cc00fd85632719b4c6c12f7f1b1e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 31 Jan 2017 17:54:38 -0800 Subject: powerpc: make use of for_each_node_by_type() instead of open-coding it Instead of manually coding the loop with of_find_node_by_type(), let's switch to the standard macro for iterating over nodes with given type. Also fixed a couple of refcount leaks in the aforementioned loops. Signed-off-by: Dmitry Torokhov Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 9 +++++++-- arch/powerpc/platforms/cell/spu_manage.c | 4 ++-- arch/powerpc/platforms/powermac/pic.c | 7 ++++--- arch/powerpc/platforms/powermac/smp.c | 4 ++-- arch/powerpc/platforms/pseries/lparcfg.c | 4 ++-- 5 files changed, 17 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2075322cd225..a20e390cb0dd 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -470,13 +470,13 @@ static void __init cpu_init_thread_core_maps(int tpc) */ void __init smp_setup_cpu_maps(void) { - struct device_node *dn = NULL; + struct device_node *dn; int cpu = 0; int nthreads = 1; DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { + for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; int j, len; @@ -516,6 +516,11 @@ void __init smp_setup_cpu_maps(void) set_cpu_possible(cpu, true); cpu++; } + + if (cpu >= nr_cpu_ids) { + of_node_put(dn); + break; + } } /* If no SMT supported, nthreads is forced to 1 */ diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index f636ee22b203..5c409c98cca8 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -292,12 +292,12 @@ static int __init of_enumerate_spus(int (*fn)(void *data)) unsigned int n = 0; ret = -ENODEV; - for (node = of_find_node_by_type(NULL, "spe"); - node; node = of_find_node_by_type(node, "spe")) { + for_each_node_by_type(node, "spe") { ret = fn(node); if (ret) { printk(KERN_WARNING "%s: Error initializing %s\n", __func__, node->name); + of_node_put(node); break; } n++; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 666fceda5e48..57bbff465964 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -486,15 +486,16 @@ static int __init pmac_pic_probe_mpic(void) struct device_node *np, *master = NULL, *slave = NULL; /* We can have up to 2 MPICs cascaded */ - for (np = NULL; (np = of_find_node_by_type(np, "open-pic")) - != NULL;) { + for_each_node_by_type(np, "open-pic") { if (master == NULL && of_get_property(np, "interrupts", NULL) == NULL) master = of_node_get(np); else if (slave == NULL) slave = of_node_get(np); - if (master && slave) + if (master && slave) { + of_node_put(np); break; + } } /* Check for bogus setups */ diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 2cd99eb30762..95275e0e2efa 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -774,8 +774,8 @@ static void __init smp_core99_probe(void) if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); /* Count CPUs in the device-tree */ - for (cpus = NULL; (cpus = of_find_node_by_type(cpus, "cpu")) != NULL;) - ++ncpus; + for_each_node_by_type(cpus, "cpu") + ++ncpus; printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus); diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index b2706c483067..f43eafe30c7f 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -370,10 +370,10 @@ static void parse_system_parameter_string(struct seq_file *m) */ static int lparcfg_count_active_processors(void) { - struct device_node *cpus_dn = NULL; + struct device_node *cpus_dn; int count = 0; - while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { + for_each_node_by_type(cpus_dn, "cpu") { #ifdef LPARCFG_DEBUG printk(KERN_ERR "cpus_dn %p\n", cpus_dn); #endif -- cgit v1.2.3 From 04b9c96eae72d862726f2f4bfcec2078240c33c5 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 15 Dec 2017 19:14:54 +1100 Subject: powerpc/crash: Remove the test for cpu_online in the IPI callback Our check was extra cautious, we've audited crash_send_ipi and it sends an IPI only to online CPU's. Removal of this check should have not functional impact on crash kdump. Signed-off-by: Balbir Singh Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/crash.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index cbabb5adccd9..29c56ca2ddfd 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -69,9 +69,6 @@ static void crash_ipi_callback(struct pt_regs *regs) int cpu = smp_processor_id(); - if (!cpu_online(cpu)) - return; - hard_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); -- cgit v1.2.3 From 4145f358644b970fcff293c09fdcc7939e8527d2 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 15 Dec 2017 19:14:55 +1100 Subject: powernv/kdump: Fix cases where the kdump kernel can get HMI's Certain HMI's such as malfunction error propagate through all threads/core on the system. If a thread was offline prior to us crashing the system and jumping to the kdump kernel, bad things happen when it wakes up due to an HMI in the kdump kernel. There are several possible ways to solve this problem 1. Put the offline cores in a state such that they are not woken up for machine check and HMI errors. This does not work, since we might need to wake up offline threads to handle TB errors 2. Ignore HMI errors, setup HMEER to mask HMI errors, but this still leads the window open for any MCEs and masking them for the duration of the dump might be a concern 3. Wake up offline CPUs, as in send them to crash_ipi_callback (not wake them up as in mark them online as seen by the hotplug). kexec does a wake_online_cpus() call, this patch does something similar, but instead sends an IPI and forces them to crash_ipi_callback() This patch takes approach #3. Care is taken to enable this only for powenv platforms via crash_wake_offline (a global value set at setup time). The crash code sends out IPI's to all CPU's which then move to crash_ipi_callback and kexec_smp_wait(). Signed-off-by: Balbir Singh Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kexec.h | 2 ++ arch/powerpc/kernel/crash.c | 13 ++++++++++++- arch/powerpc/kernel/smp.c | 18 ++++++++++++++++++ arch/powerpc/platforms/powernv/smp.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 4419d435639a..9dcbfa6bbb91 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern int crashing_cpu; extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); +extern void crash_ipi_callback(struct pt_regs *); +extern int crash_wake_offline; struct kimage; struct pt_regs; diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 29c56ca2ddfd..00b215125d3e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -44,6 +44,14 @@ #define REAL_MODE_TIMEOUT 10000 static int time_to_dump; +/* + * crash_wake_offline should be set to 1 by platforms that intend to wake + * up offline cpus prior to jumping to a kdump kernel. Currently powernv + * sets it to 1, since we want to avoid things from happening when an + * offline CPU wakes up due to something like an HMI (malfunction error), + * which propagates to all threads. + */ +int crash_wake_offline; #define CRASH_HANDLER_MAX 3 /* List of shutdown handles */ @@ -63,7 +71,7 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t cpus_in_crash; -static void crash_ipi_callback(struct pt_regs *regs) +void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; @@ -106,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu) printk(KERN_EMERG "Sending IPI to other CPUs\n"); + if (crash_wake_offline) + ncpus = num_present_cpus() - 1; + crash_send_ipi(crash_ipi_callback); smp_wmb(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e0a4c1f82e25..bbe7634b3a43 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -543,7 +543,25 @@ void smp_send_debugger_break(void) #ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { + int cpu; + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); + if (kdump_in_progress() && crash_wake_offline) { + for_each_present_cpu(cpu) { + if (cpu_online(cpu)) + continue; + /* + * crash_ipi_callback will wait for + * all cpus, including offline CPUs. + * We don't care about nmi_ipi_function. + * Offline cpus will jump straight into + * crash_ipi_callback, we can skip the + * entire NMI dance and waiting for + * cpus to clear pending mask, etc. + */ + do_smp_send_nmi_ipi(cpu); + } + } } #endif diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index ba030669eca1..9664c8461f03 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include "powernv.h" @@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void) } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + } else if ((srr1 & wmask) == SRR1_WAKERESET) { + irq_set_pending_from_srr1(srr1); + /* Does not return */ } + smp_mb(); + /* + * For kdump kernels, we process the ipi and jump to + * crash_ipi_callback + */ + if (kdump_in_progress()) { + /* + * If we got to this point, we've not used + * NMI's, otherwise we would have gone + * via the SRR1_WAKERESET path. We are + * using regular IPI's for waking up offline + * threads. + */ + struct pt_regs regs; + + ppc_save_regs(®s); + crash_ipi_callback(®s); + /* Does not return */ + } + if (cpu_core_split_required()) continue; @@ -371,5 +396,8 @@ void __init pnv_smp_init(void) #ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = pnv_smp_cpu_kill_self; +#ifdef CONFIG_KEXEC_CORE + crash_wake_offline = 1; +#endif #endif } -- cgit v1.2.3 From 9a3b849bfe5cba18492acf5add6ba544e43e024b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Jan 2018 08:29:49 +0100 Subject: powerpc/8xx: do not select CONFIG_PPC_LIB_RHEAP Since commit 0e6e01ff694ee ("CPM/QE: use genalloc to manage CPM/QE muram"), rheap is not used anymore. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index ae07470fde3c..c838581f81f1 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -33,7 +33,6 @@ config PPC_85xx config PPC_8xx bool "Freescale 8xx" select FSL_SOC - select PPC_LIB_RHEAP select SYS_SUPPORTS_HUGETLBFS config 40x -- cgit v1.2.3 From 2a45addd21de25f41c8f21a6f08f0980e583d596 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:19 +0100 Subject: powerpc/8xx: Remove CPU6 ERRATA Workaround CPU6 ERRATA affects only MPC860 revisions prior to C.0. Manufacturing of those revisiosn was stopped in 1999-2000. Therefore, it has been almost 20 years since this ERRATA has been fixed in the silicon. This patch removes the workaround for that ERRATA. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/configs/mpc866_ads_defconfig | 1 - arch/powerpc/include/asm/reg_8xx.h | 82 ------------------------------- arch/powerpc/kernel/head_8xx.S | 54 +++++--------------- arch/powerpc/platforms/8xx/Kconfig | 12 ----- 4 files changed, 12 insertions(+), 137 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index f1f176c29fa3..5320735395e7 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -13,7 +13,6 @@ CONFIG_EXPERT=y CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y CONFIG_8xx_COPYBACK=y -CONFIG_8xx_CPU6=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 53a7e2955d3e..7192eece6c3e 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -66,86 +66,4 @@ #define DC_DFWT 0x40000000 /* Data cache is forced write through */ #define DC_LES 0x20000000 /* Caches are little endian mode */ -#ifdef CONFIG_8xx_CPU6 -#define do_mtspr_cpu6(rn, rn_addr, v) \ - do { \ - int _reg_cpu6 = rn_addr, _tmp_cpu6; \ - asm volatile("stw %0, %1;" \ - "lwz %0, %1;" \ - "mtspr " __stringify(rn) ",%2" : \ - : "r" (_reg_cpu6), "m"(_tmp_cpu6), \ - "r" ((unsigned long)(v)) \ - : "memory"); \ - } while (0) - -#define do_mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ - : "r" ((unsigned long)(v)) \ - : "memory") -#define mtspr(rn, v) \ - do { \ - if (rn == SPRN_IMMR) \ - do_mtspr_cpu6(rn, 0x3d30, v); \ - else if (rn == SPRN_IC_CST) \ - do_mtspr_cpu6(rn, 0x2110, v); \ - else if (rn == SPRN_IC_ADR) \ - do_mtspr_cpu6(rn, 0x2310, v); \ - else if (rn == SPRN_IC_DAT) \ - do_mtspr_cpu6(rn, 0x2510, v); \ - else if (rn == SPRN_DC_CST) \ - do_mtspr_cpu6(rn, 0x3110, v); \ - else if (rn == SPRN_DC_ADR) \ - do_mtspr_cpu6(rn, 0x3310, v); \ - else if (rn == SPRN_DC_DAT) \ - do_mtspr_cpu6(rn, 0x3510, v); \ - else if (rn == SPRN_MI_CTR) \ - do_mtspr_cpu6(rn, 0x2180, v); \ - else if (rn == SPRN_MI_AP) \ - do_mtspr_cpu6(rn, 0x2580, v); \ - else if (rn == SPRN_MI_EPN) \ - do_mtspr_cpu6(rn, 0x2780, v); \ - else if (rn == SPRN_MI_TWC) \ - do_mtspr_cpu6(rn, 0x2b80, v); \ - else if (rn == SPRN_MI_RPN) \ - do_mtspr_cpu6(rn, 0x2d80, v); \ - else if (rn == SPRN_MI_CAM) \ - do_mtspr_cpu6(rn, 0x2190, v); \ - else if (rn == SPRN_MI_RAM0) \ - do_mtspr_cpu6(rn, 0x2390, v); \ - else if (rn == SPRN_MI_RAM1) \ - do_mtspr_cpu6(rn, 0x2590, v); \ - else if (rn == SPRN_MD_CTR) \ - do_mtspr_cpu6(rn, 0x3180, v); \ - else if (rn == SPRN_M_CASID) \ - do_mtspr_cpu6(rn, 0x3380, v); \ - else if (rn == SPRN_MD_AP) \ - do_mtspr_cpu6(rn, 0x3580, v); \ - else if (rn == SPRN_MD_EPN) \ - do_mtspr_cpu6(rn, 0x3780, v); \ - else if (rn == SPRN_M_TWB) \ - do_mtspr_cpu6(rn, 0x3980, v); \ - else if (rn == SPRN_MD_TWC) \ - do_mtspr_cpu6(rn, 0x3b80, v); \ - else if (rn == SPRN_MD_RPN) \ - do_mtspr_cpu6(rn, 0x3d80, v); \ - else if (rn == SPRN_M_TW) \ - do_mtspr_cpu6(rn, 0x3f80, v); \ - else if (rn == SPRN_MD_CAM) \ - do_mtspr_cpu6(rn, 0x3190, v); \ - else if (rn == SPRN_MD_RAM0) \ - do_mtspr_cpu6(rn, 0x3390, v); \ - else if (rn == SPRN_MD_RAM1) \ - do_mtspr_cpu6(rn, 0x3590, v); \ - else if (rn == SPRN_DEC) \ - do_mtspr_cpu6(rn, 0x2c00, v); \ - else if (rn == SPRN_TBWL) \ - do_mtspr_cpu6(rn, 0x3880, v); \ - else if (rn == SPRN_TBWU) \ - do_mtspr_cpu6(rn, 0x3a80, v); \ - else if (rn == SPRN_DPDR) \ - do_mtspr_cpu6(rn, 0x2d30, v); \ - else \ - do_mtspr(rn, v); \ - } while (0) -#endif - #endif /* _ASM_POWERPC_REG_8xx_H */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 4fee00d414e8..728b513c07b8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,23 +33,6 @@ #include #include -/* Macro to make the code more readable. */ -#ifdef CONFIG_8xx_CPU6 -#define SPRN_MI_TWC_ADDR 0x2b80 -#define SPRN_MI_RPN_ADDR 0x2d80 -#define SPRN_MD_TWC_ADDR 0x3b80 -#define SPRN_MD_RPN_ADDR 0x3d80 - -#define MTSPR_CPU6(spr, reg, treg) \ - li treg, spr##_ADDR; \ - stw treg, 12(r0); \ - lwz treg, 12(r0); \ - mtspr spr, reg -#else -#define MTSPR_CPU6(spr, reg, treg) \ - mtspr spr, reg -#endif - #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ #define SIMPLE_KERNEL_ADDRESS 1 @@ -326,7 +309,7 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -393,7 +376,7 @@ _ENTRY(ITLBMiss_cmp) /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ #if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) rlwimi r10, r11, 1, MI_SPS16K @@ -415,10 +398,10 @@ _ENTRY(ITLBMiss_cmp) #else rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ #endif - MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 @@ -512,7 +495,7 @@ _ENTRY(DTLBMiss_jmp) * It is bit 25 in the Linux PTE and bit 30 in the TWC */ rlwimi r11, r10, 32-5, 30, 30 - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + mtspr SPRN_MD_TWC, r11 /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) * In 16k pages mode, SPS is always 1 */ @@ -546,7 +529,7 @@ _ENTRY(DTLBMiss_jmp) rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ #endif rlwimi r10, r11, 0, 20, 20 /* clear 20 */ - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ mfspr r3, SPRN_SPRG_SCRATCH2 @@ -684,12 +667,12 @@ DTLBMissIMMR: mtcr r3 /* Set 512k byte guarded page and mark it valid */ li r10, MD_PS512K | MD_GUARDED | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r10, r11) + mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE - MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ @@ -701,11 +684,11 @@ DTLBMissLinear: mtcr r3 /* Set 8M byte page and mark it valid */ li r11, MD_PS8MEG | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ @@ -718,11 +701,11 @@ ITLBMissLinear: mtcr r3 /* Set 8M byte page and mark it valid */ li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) + mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */ + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ mfspr r3, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 @@ -933,13 +916,6 @@ start_here: */ lis r6, swapper_pg_dir@ha tophys(r6,r6) -#ifdef CONFIG_8xx_CPU6 - lis r4, cpu6_errata_word@h - ori r4, r4, cpu6_errata_word@l - li r3, 0x3f80 - stw r3, 12(r4) - lwz r3, 12(r4) -#endif mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l @@ -1094,12 +1070,6 @@ swapper_pg_dir: abatron_pteptrs: .space 8 -#ifdef CONFIG_8xx_CPU6 - .globl cpu6_errata_word -cpu6_errata_word: - .space 16 -#endif - #ifdef CONFIG_PPC_8xx_PERF_EVENT .globl itlb_miss_counter itlb_miss_counter: diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e2089d3de00c..d408162d5af4 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -116,18 +116,6 @@ config 8xx_GPIO If in doubt, say Y here. -config 8xx_CPU6 - bool "CPU6 Silicon Errata (860 Pre Rev. C)" - help - MPC860 CPUs, prior to Rev C have some bugs in the silicon, which - require workarounds for Linux (and most other OSes to work). If you - get a BUG() very early in boot, this might fix the problem. For - more details read the document entitled "MPC860 Family Device Errata - Reference" on Freescale's website. This option also incurs a - performance hit. - - If in doubt, say N here. - config 8xx_CPU15 bool "CPU15 Silicon Errata" depends on !HUGETLB_PAGE -- cgit v1.2.3 From bb9b5a8332533ac7935ca6edfc7ebf15a5366f11 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:21 +0100 Subject: powerpc/8xx: remove EXCEPTION_PROLOG/EPILOG_0 and change r3 to r12 EXCEPTION_PROLOG_0 and EXCEPTION_EPILOG_0 were added some time ago in order to regroup the two mtspr/mfspr to SCRATCH0 and SCRATCH1 and the mfcr/mtcr in order to ease entry and exit of function not using the full EXCEPTION_PROLOG. Since then, the mfcr/mtcr has been taken out, hence just leaving the two mtspr/mfspr in the macro. In order to improve readability of the exception functions, we remove those two macros and copy back the two mtspr/mfspr instead. As r10 and r11 are used for SCRATCH0 and SCRATCH1, lets also use r12 for SCRATCH2. It will also improve the readability/maintenance. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_8xx.S | 78 ++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 38 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 728b513c07b8..eda582b96dbf 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -117,15 +117,12 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - EXCEPTION_PROLOG_0; \ + mtspr SPRN_SPRG_SCRATCH0, r10; \ + mtspr SPRN_SPRG_SCRATCH1, r11; \ mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 -#define EXCEPTION_PROLOG_0 \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11 - #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -159,13 +156,6 @@ turn_on_mmu: SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -/* - * Exception exit code. - */ -#define EXCEPTION_EPILOG_0 \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - mfspr r11,SPRN_SPRG_SCRATCH1 - /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -309,10 +299,11 @@ SystemCall: #endif InstructionTLBMiss: + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mtspr SPRN_SPRG_SCRATCH2, r3 + mtspr SPRN_SPRG_SCRATCH2, r12 #endif - EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_8xx_PERF_EVENT lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) @@ -328,7 +319,7 @@ InstructionTLBMiss: /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mfcr r3 + mfcr r12 #endif #ifdef ITLB_MISS_KERNEL #if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) @@ -371,7 +362,7 @@ _ENTRY(ITLBMiss_cmp) lwz r10, 0(r10) /* Get the pte */ 4: #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mtcr r3 + mtcr r12 #endif /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 @@ -401,10 +392,11 @@ _ENTRY(ITLBMiss_cmp) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) - mfspr r3, SPRN_SPRG_SCRATCH2 + mfspr r12, SPRN_SPRG_SCRATCH2 #endif - EXCEPTION_EPILOG_0 rfi #ifdef CONFIG_HUGETLB_PAGE @@ -434,15 +426,16 @@ _ENTRY(ITLBMiss_cmp) . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_SPRG_SCRATCH2, r3 - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r12 #ifdef CONFIG_PPC_8xx_PERF_EVENT lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) addi r11, r11, 1 stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) #endif - mfcr r3 + mfcr r12 /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -482,7 +475,7 @@ _ENTRY(DTLBMiss_jmp) rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ 4: - mtcr r3 + mtcr r12 /* Insert the Guarded flag and APG into the TWC from the Linux PTE. * It is bit 26-27 of both the Linux PTE and the TWC (at least @@ -532,9 +525,10 @@ _ENTRY(DTLBMiss_jmp) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #ifdef CONFIG_HUGETLB_PAGE @@ -584,7 +578,8 @@ itlbie: */ . = 0x1400 DataTLBError: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 mfcr r10 mfspr r11, SPRN_DAR @@ -619,7 +614,8 @@ dtlbie: */ . = 0x1c00 DataBreakpoint: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 mfcr r10 mfspr r11, SPRN_SRR0 cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l @@ -635,13 +631,15 @@ DataBreakpoint: EXC_XFER_EE(0x1c00, do_break) 11: mtcr r10 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 rfi #ifdef CONFIG_PPC_8xx_PERF_EVENT . = 0x1d00 InstructionBreakpoint: - EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH0, r10 + mtspr SPRN_SPRG_SCRATCH1, r11 lis r10, (instruction_counter - PAGE_OFFSET)@ha lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10) addi r11, r11, -1 @@ -649,7 +647,8 @@ InstructionBreakpoint: lis r10, 0xffff ori r10, r10, 0x01 mtspr SPRN_COUNTA, r10 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 rfi #else EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) @@ -664,7 +663,7 @@ InstructionBreakpoint: * not enough space in the DataStoreTLBMiss area. */ DTLBMissIMMR: - mtcr r3 + mtcr r12 /* Set 512k byte guarded page and mark it valid */ li r10, MD_PS512K | MD_GUARDED | MD_SVALID mtspr SPRN_MD_TWC, r10 @@ -676,12 +675,13 @@ DTLBMissIMMR: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi DTLBMissLinear: - mtcr r3 + mtcr r12 /* Set 8M byte page and mark it valid */ li r11, MD_PS8MEG | MD_SVALID mtspr SPRN_MD_TWC, r11 @@ -692,13 +692,14 @@ DTLBMissLinear: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: - mtcr r3 + mtcr r12 /* Set 8M byte page and mark it valid */ li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC mtspr SPRN_MI_TWC, r11 @@ -707,8 +708,9 @@ ITLBMissLinear: _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - mfspr r3, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 rfi #endif -- cgit v1.2.3 From cd99ddbea250ee79027df6c469f51ad9e5452738 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:23 +0100 Subject: powerpc/8xx: Only perform perf counting when perf is in use. In TLB miss handlers, updating the perf counter is only useful when performing a perf analysis. As it has a noticeable overhead, let's only do it when needed. In order to do so, the exit of the miss handlers will be patched when starting/stopping 'perf': the first register restore instruction of each exit point will be replaced by a jump to the counting code. Once this is done, CONFIG_PPC_8xx_PERF_EVENT becomes useless as this feature doesn't add any overhead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/kernel/entry_32.S | 10 +++---- arch/powerpc/kernel/head_8xx.S | 47 ++++++++++++++++++++---------- arch/powerpc/perf/8xx-pmu.c | 52 +++++++++++++++++++++++++++++++--- arch/powerpc/perf/Makefile | 2 +- arch/powerpc/platforms/Kconfig.cputype | 7 ----- 6 files changed, 88 insertions(+), 32 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ce0930d68857..ab5c1588b487 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -236,6 +236,7 @@ #define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFMCI 0x4c00004c +#define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR 0x7c1103a6 @@ -383,6 +384,7 @@ #define __PPC_ME64(s) __PPC_MB64(s) #define __PPC_BI(s) (((s) & 0x1f) << 16) #define __PPC_CT(t) (((t) & 0x0f) << 21) +#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e780e1fbf6c2..eb8d01bae8c6 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -211,7 +211,7 @@ transfer_to_handler_cont: mflr r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -301,7 +301,7 @@ stack_ovf: lis r9,StackOverflow@ha addi r9,r9,StackOverflow@l LOAD_MSR_KERNEL(r10,MSR_KERNEL) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r9 @@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) lwz r7,_NIP(r1) lwz r2,GPR2(r1) lwz r1,GPR1(r1) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r7 @@ -727,7 +727,7 @@ fast_exception_return: lwz r10,_LINK(r11) mtlr r10 REST_GPR(10, r11) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR1,r9 @@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) .globl exc_exit_restart exc_exit_restart: lwz r12,_NIP(r1) -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) mtspr SPRN_NRI, r0 #endif mtspr SPRN_SRR0,r12 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index eda582b96dbf..641c9a9d4db2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -304,12 +304,6 @@ InstructionTLBMiss: #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r12 #endif -#ifdef CONFIG_PPC_8xx_PERF_EVENT - lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha - lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) - addi r11, r11, 1 - stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -392,6 +386,20 @@ _ENTRY(ITLBMiss_cmp) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ +_ENTRY(itlb_miss_exit_1) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) + mfspr r12, SPRN_SPRG_SCRATCH2 +#endif + rfi +#ifdef CONFIG_PERF_EVENTS +_ENTRY(itlb_miss_perf) + lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) @@ -429,12 +437,6 @@ DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 mtspr SPRN_SPRG_SCRATCH1, r11 mtspr SPRN_SPRG_SCRATCH2, r12 -#ifdef CONFIG_PPC_8xx_PERF_EVENT - lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha - lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) - addi r11, r11, 1 - stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) -#endif mfcr r12 /* If we are faulting a kernel address, we have to use the @@ -526,6 +528,18 @@ _ENTRY(DTLBMiss_jmp) /* Restore registers */ mtspr SPRN_DAR, r11 /* Tag DAR */ +_ENTRY(dtlb_miss_exit_1) + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r12, SPRN_SPRG_SCRATCH2 + rfi +#ifdef CONFIG_PERF_EVENTS +_ENTRY(dtlb_miss_perf) + lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 @@ -635,7 +649,7 @@ DataBreakpoint: mfspr r11, SPRN_SPRG_SCRATCH1 rfi -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS . = 0x1d00 InstructionBreakpoint: mtspr SPRN_SPRG_SCRATCH0, r10 @@ -675,6 +689,7 @@ DTLBMissIMMR: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ +_ENTRY(dtlb_miss_exit_2) mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 @@ -692,6 +707,7 @@ DTLBMissLinear: li r11, RPN_PATTERN mtspr SPRN_DAR, r11 /* Tag DAR */ +_ENTRY(dtlb_miss_exit_3) mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 @@ -708,6 +724,7 @@ ITLBMissLinear: _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ +_ENTRY(itlb_miss_exit_2) mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r12, SPRN_SPRG_SCRATCH2 @@ -1039,7 +1056,7 @@ initial_mmu: #endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS rlwinm r8, r8, 0, ~0xc #else rlwinm r8, r8, 0, ~0x8 @@ -1072,7 +1089,7 @@ swapper_pg_dir: abatron_pteptrs: .space 8 -#ifdef CONFIG_PPC_8xx_PERF_EVENT +#ifdef CONFIG_PERF_EVENTS .globl itlb_miss_counter itlb_miss_counter: .space 4 diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 3c39f05f0af3..6c0020d1c561 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -18,6 +18,7 @@ #include #include #include +#include #define PERF_8xx_ID_CPU_CYCLES 1 #define PERF_8xx_ID_HW_INSTRUCTIONS 2 @@ -30,8 +31,13 @@ extern unsigned long itlb_miss_counter, dtlb_miss_counter; extern atomic_t instruction_counter; +extern unsigned int itlb_miss_perf, dtlb_miss_perf; +extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2; +extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3; static atomic_t insn_ctr_ref; +static atomic_t itlb_miss_ref; +static atomic_t dtlb_miss_ref; static s64 get_insn_ctr(void) { @@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) val = get_insn_ctr(); break; case PERF_8xx_ID_ITLB_LOAD_MISS: + if (atomic_inc_return(&itlb_miss_ref) == 1) { + unsigned long target = (unsigned long)&itlb_miss_perf; + + patch_branch(&itlb_miss_exit_1, target, 0); +#ifndef CONFIG_PIN_TLB_TEXT + patch_branch(&itlb_miss_exit_2, target, 0); +#endif + } val = itlb_miss_counter; break; case PERF_8xx_ID_DTLB_LOAD_MISS: + if (atomic_inc_return(&dtlb_miss_ref) == 1) { + unsigned long target = (unsigned long)&dtlb_miss_perf; + + patch_branch(&dtlb_miss_exit_1, target, 0); + patch_branch(&dtlb_miss_exit_2, target, 0); + patch_branch(&dtlb_miss_exit_3, target, 0); + } val = dtlb_miss_counter; break; } @@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { + /* mfspr r10, SPRN_SPRG_SCRATCH0 */ + unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) | + __PPC_SPR(SPRN_SPRG_SCRATCH0); + mpc8xx_pmu_read(event); - if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS) - return; /* If it was the last user, stop counting to avoid useles overhead */ - if (atomic_dec_return(&insn_ctr_ref) == 0) - mtspr(SPRN_ICTRL, 7); + switch (event_type(event)) { + case PERF_8xx_ID_CPU_CYCLES: + break; + case PERF_8xx_ID_HW_INSTRUCTIONS: + if (atomic_dec_return(&insn_ctr_ref) == 0) + mtspr(SPRN_ICTRL, 7); + break; + case PERF_8xx_ID_ITLB_LOAD_MISS: + if (atomic_dec_return(&itlb_miss_ref) == 0) { + patch_instruction(&itlb_miss_exit_1, insn); +#ifndef CONFIG_PIN_TLB_TEXT + patch_instruction(&itlb_miss_exit_2, insn); +#endif + } + break; + case PERF_8xx_ID_DTLB_LOAD_MISS: + if (atomic_dec_return(&dtlb_miss_ref) == 0) { + patch_instruction(&dtlb_miss_exit_1, insn); + patch_instruction(&dtlb_miss_exit_2, insn); + patch_instruction(&dtlb_miss_exit_3, insn); + } + break; + } } static struct pmu mpc8xx_pmu = { diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 225c9c86d7c0..57ebc655d2ac 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o -obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o +obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c838581f81f1..a429d859f15d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -167,13 +167,6 @@ config PPC_FPU bool default y if PPC64 -config PPC_8xx_PERF_EVENT - bool "PPC 8xx perf events" - depends on PPC_8xx && PERF_EVENTS - help - This is Performance Events support for PPC 8xx. The 8xx doesn't - have a PMU but some events are emulated using 8xx features. - config FSL_EMB_PERFMON bool "Freescale Embedded Perfmon" depends on E500 || PPC_83xx -- cgit v1.2.3 From 5f356497c38448874f2d491cd214e9c283f5592d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:25 +0100 Subject: powerpc/8xx: remove unused _PAGE_WRITETHRU _PAGE_WRITETHRU is only used in: * AMIGA_Z2RAM block driver which is never activated on powerPC * Video/FB driver which is for PPC_PMAC Therefore, no need to spend time in 8xx TLB miss handlers for handling it. And by removing it, we free up bit 20 which then avoids having to clear it on each TLB miss. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 3 +-- arch/powerpc/include/asm/nohash/pgtable.h | 2 ++ arch/powerpc/kernel/head_8xx.S | 5 ----- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 6dc0180fd5c7..19a5ecaef265 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -41,8 +41,7 @@ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ #define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ #define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ -#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ -#define _PAGE_ACCESSED 0x0800 /* software: page referenced */ +#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index fc4376c8d444..e0864e683cf9 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -235,8 +235,10 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre #define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ _PAGE_COHERENT)) +#if _PAGE_WRITETHRU != 0 #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ _PAGE_COHERENT | _PAGE_WRITETHRU)) +#endif #define pgprot_cached_noncoherent(prot) \ (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL)) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 641c9a9d4db2..6399dcadf51d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -486,10 +486,6 @@ _ENTRY(DTLBMiss_jmp) * above. */ rlwimi r11, r10, 0, 26, 27 - /* Insert the WriteThru flag into the TWC from the Linux PTE. - * It is bit 25 in the Linux PTE and bit 30 in the TWC - */ - rlwimi r11, r10, 32-5, 30, 30 mtspr SPRN_MD_TWC, r11 /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) @@ -523,7 +519,6 @@ _ENTRY(DTLBMiss_jmp) #else rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ #endif - rlwimi r10, r11, 0, 20, 20 /* clear 20 */ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ -- cgit v1.2.3 From 812fadcb941a81d1f3948b10a95a4dce663da3e4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:27 +0100 Subject: powerpc/mm: extend _PAGE_PRIVILEGED to all CPUs commit ac29c64089b74 ("powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED") introduced _PAGE_PRIVILEGED for BOOK3S/64 This patch generalises _PAGE_PRIVILEGED for all CPUs, allowing to have either _PAGE_PRIVILEGED or _PAGE_USER or both. PPC_8xx has a _PAGE_SHARED flag which is set for and only for all non user pages. Lets rename it _PAGE_PRIVILEGED to remove confusion as it has nothing to do with Linux shared pages. On BookE, there's a _PAGE_BAP_SR which has to be set for kernel pages: defining _PAGE_PRIVILEGED as _PAGE_BAP_SR will make this generic Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 10 +--------- arch/powerpc/include/asm/nohash/pte-book3e.h | 1 + arch/powerpc/include/asm/pte-common.h | 24 ++++++++++++++++-------- arch/powerpc/kernel/head_8xx.S | 6 +++--- arch/powerpc/mm/8xx_mmu.c | 2 +- arch/powerpc/mm/dump_linuxpagetables.c | 11 +---------- arch/powerpc/mm/pgtable.c | 3 ++- arch/powerpc/mm/pgtable_32.c | 9 +-------- arch/powerpc/mm/pgtable_64.c | 14 +------------- 10 files changed, 28 insertions(+), 54 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 23b03449e2f1..8e28febedac3 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -15,7 +15,7 @@ #define _PAGE_BIT_SWAP_TYPE 0 #define _PAGE_RO 0 -#define _PAGE_SHARED 0 +#define _PAGE_USER 0 #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 19a5ecaef265..7c7040f015e2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -31,7 +31,7 @@ /* Definitions for 8xx embedded chips. */ #define _PAGE_PRESENT 0x0001 /* Page is valid */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ -#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ +#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ #define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ #define _PAGE_DIRTY 0x0100 /* C: page changed */ @@ -54,13 +54,5 @@ /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 -/* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) -#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE) -#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE | _PAGE_EXEC) - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */ diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 2da4532ca377..ccee8eb509bb 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -55,6 +55,7 @@ #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ +#define _PAGE_PRIVILEGED (_PAGE_BAP_SR) #define _PAGE_HASHPTE 0 #define _PAGE_BUSY 0 diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index ce142ef99ba7..0e6595a1b9d8 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -8,9 +8,6 @@ #ifndef _PAGE_HASHPTE #define _PAGE_HASHPTE 0 #endif -#ifndef _PAGE_SHARED -#define _PAGE_SHARED 0 -#endif #ifndef _PAGE_HWWRITE #define _PAGE_HWWRITE 0 #endif @@ -45,6 +42,14 @@ #ifndef _PAGE_PTE #define _PAGE_PTE 0 #endif +/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */ +#ifndef _PAGE_PRIVILEGED +#define _PAGE_PRIVILEGED 0 +#else +#ifndef _PAGE_USER +#define _PAGE_USER 0 +#endif +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -54,16 +59,18 @@ #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif #ifndef _PAGE_KERNEL_RO -#define _PAGE_KERNEL_RO (_PAGE_RO) +#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO) #endif #ifndef _PAGE_KERNEL_ROX -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC) #endif #ifndef _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE) #endif #ifndef _PAGE_KERNEL_RWX -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE | _PAGE_EXEC) #endif #ifndef _PAGE_HPTEFLAGS #define _PAGE_HPTEFLAGS _PAGE_HASHPTE @@ -85,7 +92,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); */ static inline bool pte_user(pte_t pte) { - return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; + return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER; } #endif /* __ASSEMBLY__ */ @@ -116,6 +123,7 @@ static inline bool pte_user(pte_t pte) #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ + _PAGE_PRIVILEGED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6399dcadf51d..642680389b7e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -678,7 +678,7 @@ DTLBMissIMMR: mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -696,7 +696,7 @@ DTLBMissLinear: li r11, MD_PS8MEG | MD_SVALID mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -715,7 +715,7 @@ ITLBMissLinear: li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index f29212e40f40..c2d9a6d709c3 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -67,7 +67,7 @@ void __init MMU_init_hw(void) /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ #ifdef CONFIG_PIN_TLB_DATA unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY; #ifdef CONFIG_PIN_TLB_IMMR int i = 29; #else diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 139dd1993eeb..d87ab2cabaa6 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -112,13 +112,8 @@ struct flag_info { static const struct flag_info flag_array[] = { { -#ifdef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_PRIVILEGED, - .val = 0, -#else - .mask = _PAGE_USER, + .mask = _PAGE_USER | _PAGE_PRIVILEGED, .val = _PAGE_USER, -#endif .set = "user", .clear = " ", }, { @@ -229,10 +224,6 @@ static const struct flag_info flag_array[] = { .mask = _PAGE_SPECIAL, .val = _PAGE_SPECIAL, .set = "special", - }, { - .mask = _PAGE_SHARED, - .val = _PAGE_SHARED, - .set = "shared", } }; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index a03ff3d99e0c..9f361ae571e9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte) return 0; #else return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER | + _PAGE_PRIVILEGED)) == (_PAGE_PRESENT | _PAGE_USER); #endif } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f6c7f54c0515..d35d9ad3c1cd 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 813ea22c3e00..c9a623c2d8a2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, /* * Force kernel mapping. */ -#if defined(CONFIG_PPC_BOOK3S_64) - flags |= _PAGE_PRIVILEGED; -#else flags &= ~_PAGE_USER; -#endif - - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags, caller); -- cgit v1.2.3 From 351750331fc1580cdb60d2efef04238f5faa89fe Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:29 +0100 Subject: powerpc/mm: Introduce _PAGE_NA Today, PAGE_NONE is defined as a page not having _PAGE_USER. In some circunstances, when the CPU supports it, it might be better to be able to flag a page with NO ACCESS. In a following patch, the 8xx will switch user access being flagged in the PMD, therefore it will not be possible anymore to use _PAGE_USER as a way to flag a page with no access. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/pte-common.h | 7 +++++-- arch/powerpc/mm/dump_linuxpagetables.c | 18 +++++++++++------- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8e28febedac3..60a0d7fd82bc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -14,6 +14,7 @@ */ #define _PAGE_BIT_SWAP_TYPE 0 +#define _PAGE_NA 0 #define _PAGE_RO 0 #define _PAGE_USER 0 diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index cc2bfec3aa3b..504a3c36ce5c 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - unsigned long clr = ~pte_val(entry) & _PAGE_RO; + unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA); pte_update(ptep, clr, set); } diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 0e6595a1b9d8..426a902816c5 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -50,6 +50,9 @@ #define _PAGE_USER 0 #endif #endif +#ifndef _PAGE_NA +#define _PAGE_NA 0 +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -122,7 +125,7 @@ static inline bool pte_user(pte_t pte) /* Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ + _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \ _PAGE_PRIVILEGED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) @@ -150,7 +153,7 @@ static inline bool pte_user(pte_t pte) * * Note due to the way vm flags are laid out, the bits are XWR */ -#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) #define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ _PAGE_EXEC) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index d87ab2cabaa6..876e2a3c79f2 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -117,16 +117,20 @@ static const struct flag_info flag_array[] = { .set = "user", .clear = " ", }, { -#if _PAGE_RO == 0 - .mask = _PAGE_RW, + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, .val = _PAGE_RW, -#else - .mask = _PAGE_RO, - .val = 0, -#endif .set = "rw", - .clear = "ro", }, { + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "ro", + }, { +#if _PAGE_NA != 0 + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "na", + }, { +#endif .mask = _PAGE_EXEC, .val = _PAGE_EXEC, .set = " X ", -- cgit v1.2.3 From de0f93873937e999fadaba011d368bc042af37b2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:31 +0100 Subject: powerpc/8xx: Remove _PAGE_USER and handle user access at PMD level As Linux kernel separates KERNEL and USER address spaces, there is therefore no need to flag USER access at page level. Today, the 8xx TLB handlers already handle user access in the L1 entry through Access Protection Groups, it is then natural to move the user access handling at PMD level once _PAGE_NA allows to handle PAGE_NONE protection without _PAGE_USER In the mean time, as we free up one bit in the PTE, we can use it to include SPS (page size flag) in the PTE and avoid handling it at every TLB miss hence removing special handling based on compiled page size. For _PAGE_EXEC, we rework it to use PP PTE bits, avoiding the copy of _PAGE_EXEC bit into the L1 entry. Unfortunatly we are not able to put it at the correct location as it conflicts with NA/RO/RW bits for data entries. Upper bits of APG in L1 entry overlap with PMD base address. In order to avoid having to filter that out, we set up all groups so that upper bits can have any value. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 3 +- arch/powerpc/include/asm/mmu-8xx.h | 34 ++++++++++---------- arch/powerpc/include/asm/nohash/32/pgalloc.h | 3 +- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 14 ++++++--- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/include/asm/pte-common.h | 6 ++++ arch/powerpc/kernel/head_8xx.S | 46 ++++++---------------------- arch/powerpc/mm/hugetlbpage.c | 2 +- 8 files changed, 48 insertions(+), 62 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 14c9d44f355b..1a4847f67ea8 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -47,8 +47,7 @@ static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); #ifdef CONFIG_PPC_8xx - return (pte_t *)__va(hpd_val(hpd) & - ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK)); + return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); #else return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 5bb3dbede41a..a568ff5c5fb8 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -29,17 +29,17 @@ #define MI_Kp 0x40000000 /* Should always be set */ /* - * All pages' PP exec bits are set to 000, which means Execute for Supervisor - * and no Execute for User. - * Then we use the APG to say whether accesses are according to Page rules, - * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) - * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER - * 0 (00) => Not User, no exec => 11 (all accesses performed as user) - * 1 (01) => User but no exec => 11 (all accesses performed as user) - * 2 (10) => Not User, exec => 01 (rights according to page definition) - * 3 (11) => User, exec => 00 (all accesses performed as supervisor) - */ -#define MI_APG_INIT 0xf4ffffff + * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC + * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means + * respectively NA for All or X for Supervisor and no access for User. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) + * Therefore, we define 2 APG groups. lsb is _PMD_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value + */ +#define MI_APG_INIT 0x44444444 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -102,17 +102,17 @@ #define MD_Kp 0x40000000 /* Should always be set */ /* - * All pages' PP data bits are set to either 000 or 011, which means + * All pages' PP data bits are set to either 000 or 011 or 001, which means * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user. + * Supervisor and no access for user and NA for ALL. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PAGE_USER + * Therefore, we define 2 APG groups. lsb is _PMD_USER * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor - * according to page definition) + * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We define all 16 groups so that all other bits of APG can take any value */ -#define MD_APG_INIT 0x4fffffff +#define MD_APG_INIT 0x44444444 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index d072139ff2e5..29d37bd1f3b3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -61,7 +61,8 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pte_page) { - *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT); + *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_USER | + _PMD_PRESENT); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 7c7040f015e2..f04cb46ae8a1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -32,27 +32,33 @@ #define _PAGE_PRESENT 0x0001 /* Page is valid */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ #define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ -#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ +#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/ #define _PAGE_DIRTY 0x0100 /* C: page changed */ /* These 4 software bits must be masked out when the L2 entry is loaded * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ -#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ +#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */ #define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ #define _PMD_PRESENT 0x0001 -#define _PMD_BAD 0x0ff0 +#define _PMD_BAD 0x0fd0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 +#define _PMD_USER 0x0020 /* APG 1 */ /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 +#ifdef CONFIG_PPC_16K_PAGES +#define _PAGE_PSIZE _PAGE_HUGE +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index e0864e683cf9..c56de1e8026f 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -126,7 +126,7 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte) { - return pte; + return __pte(pte_val(pte) | _PAGE_HUGE); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 426a902816c5..c4a72c7a8c83 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -53,6 +53,9 @@ #ifndef _PAGE_NA #define _PAGE_NA 0 #endif +#ifndef _PAGE_HUGE +#define _PAGE_HUGE 0 +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -61,6 +64,9 @@ #define _PMD_SIZE 0 #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif +#ifndef _PMD_USER +#define _PMD_USER 0 +#endif #ifndef _PAGE_KERNEL_RO #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO) #endif diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 642680389b7e..c3b831bb8bad 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -52,11 +52,7 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ -#ifdef CONFIG_PPC_16K_PAGES -#define RPN_PATTERN (0x00f0 | MD_SPS16K) -#else #define RPN_PATTERN 0x00f0 -#endif #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -358,31 +354,23 @@ _ENTRY(ITLBMiss_cmp) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r12 #endif - /* Insert the APG into the TWC from the Linux PTE. */ - rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 1, MI_SPS16K -#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - li r11, RPN_PATTERN + li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 20-23 and 28 must be clear. - * Software indicator bits 24, 25, 26, and 27 must be + * Software indicator bits 20 and 23 must be clear. + * Software indicator bits 22, 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ -#else - rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ -#endif + rlwimi r11, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ + rlwimi r10, r11, 0, 0x0ff0 /* Set 22, 24-27, clear 20,23 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ @@ -419,7 +407,6 @@ _ENTRY(itlb_miss_perf) rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK #endif lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b 20: /* 512k pages */ @@ -428,7 +415,6 @@ _ENTRY(itlb_miss_perf) /* Add level 2 base */ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b #endif @@ -479,20 +465,15 @@ _ENTRY(DTLBMiss_jmp) 4: mtcr r12 - /* Insert the Guarded flag and APG into the TWC from the Linux PTE. - * It is bit 26-27 of both the Linux PTE and the TWC (at least + /* Insert the Guarded flag into the TWC from the Linux PTE. + * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, 26, 27 + rlwimi r11, r10, 0, _PAGE_GUARDED mtspr SPRN_MD_TWC, r11 - /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) - * In 16k pages mode, SPS is always 1 */ -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) - rlwimi r10, r11, 1, MD_SPS16K -#endif /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: * Clear _PAGE_PRESENT and load that which will @@ -508,17 +489,12 @@ _ENTRY(DTLBMiss_jmp) rlwimi r10, r11, 0, _PAGE_PRESENT #endif /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ li r11, RPN_PATTERN -#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ -#else - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ -#endif mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ /* Restore registers */ @@ -552,7 +528,6 @@ _ENTRY(dtlb_miss_perf) rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK #endif lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b 20: /* 512k pages */ @@ -561,7 +536,6 @@ _ENTRY(dtlb_miss_perf) /* Add level 2 base */ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 lwz r10, 0(r10) /* Get the pte */ - rlwinm r11, r11, 0, 0xf b 4b #endif @@ -712,7 +686,7 @@ _ENTRY(dtlb_miss_exit_3) ITLBMissLinear: mtcr r12 /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC + li r11, MI_PS8MEG | MI_SVALID mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ @@ -994,7 +968,7 @@ initial_mmu: lis r8, KERNELBASE@h /* Create vaddr for TLB */ ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 - li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ + li r8, MI_PS8MEG /* Set 8M byte page */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index c7e5afe5e118..f5d1008f8574 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -96,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, *hpdp = __hugepd(__pa(new) | (shift_to_mmu_psize(pshift) << 2)); #elif defined(CONFIG_PPC_8xx) - *hpdp = __hugepd(__pa(new) | + *hpdp = __hugepd(__pa(new) | _PMD_USER | (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K) | _PMD_PRESENT); #else -- cgit v1.2.3 From 4f94b2c7462d9720b2afa7e8e8d4c19446bb31ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:33 +0100 Subject: powerpc/8xx: Use L1 entry APG to handle _PAGE_ACCESSED for CONFIG_SWAP When CONFIG_SWAP is set, the TLB miss handlers have to also take into account _PAGE_ACCESSED flag. At the moment it is done by anding _PAGE_ACCESSED into _PAGE_PRESENT using 3 instructions. This patch uses APG for handling _PAGE_ACCESSED, allowing to just copy _PAGE_ACCESSED bit into APG field, hence reducing the action to a single instruction. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-8xx.h | 34 +++++++++++++++++++++++----- arch/powerpc/kernel/head_8xx.S | 45 +++++++++++++++----------------------- arch/powerpc/mm/8xx_mmu.c | 2 +- 3 files changed, 47 insertions(+), 34 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index a568ff5c5fb8..2f806e329648 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -34,12 +34,20 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: + * When that bit is not set access is done iaw "all user" + * which means no access iaw page rules. + * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED + * 0x => No access => 11 (all accesses performed as user iaw page definition) + * 10 => No user => 01 (all accesses performed according to page definition) + * 11 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ +#ifdef CONFIG_SWAP +#define MI_APG_INIT 0xf4f4f4f4 +#else #define MI_APG_INIT 0x44444444 +#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -107,12 +115,20 @@ * Supervisor and no access for user and NA for ALL. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) + * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP: + * When that bit is not set access is done iaw "all user" + * which means no access iaw page rules. + * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED + * 0x => No access => 11 (all accesses performed as user iaw page definition) + * 10 => No user => 01 (all accesses performed according to page definition) + * 11 => User => 00 (all accesses performed as supervisor iaw page definition) * We define all 16 groups so that all other bits of APG can take any value */ +#ifdef CONFIG_SWAP +#define MD_APG_INIT 0xf4f4f4f4 +#else #define MD_APG_INIT 0x44444444 +#endif /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -164,6 +180,12 @@ */ #define SPRN_M_TW 799 +/* APGs */ +#define M_APG0 0x00000000 +#define M_APG1 0x00000020 +#define M_APG2 0x00000040 +#define M_APG3 0x00000060 + #ifndef __ASSEMBLY__ typedef struct { unsigned int id; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c3b831bb8bad..d8670a37d70c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -354,14 +354,13 @@ _ENTRY(ITLBMiss_cmp) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r12 #endif - /* Load the MI_TWC with the attributes for this "segment." */ - mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT + rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 #endif + /* Load the MI_TWC with the attributes for this "segment." */ + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. @@ -472,22 +471,14 @@ _ENTRY(DTLBMiss_jmp) * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED - mtspr SPRN_MD_TWC, r11 - - /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. - * We also need to know if the insn is a load/store, so: - * Clear _PAGE_PRESENT and load that which will - * trap into DTLB Error with store bit set accordinly. - */ - /* PRESENT=0x1, ACCESSED=0x20 - * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); - * r10 = (r10 & ~PRESENT) | r11; - */ #ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT + /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0 + * on that bit will represent a Non Access group + */ + rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1 #endif + mtspr SPRN_MD_TWC, r11 + /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -647,8 +638,8 @@ InstructionBreakpoint: */ DTLBMissIMMR: mtcr r12 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID + /* Set 512k byte guarded page and mark it valid and accessed */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2 mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ @@ -666,8 +657,8 @@ _ENTRY(dtlb_miss_exit_2) DTLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID + /* Set 8M byte page and mark it valid and accessed */ + li r11, MD_PS8MEG | MD_SVALID | M_APG2 mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ @@ -685,8 +676,8 @@ _ENTRY(dtlb_miss_exit_3) #ifndef CONFIG_PIN_TLB_TEXT ITLBMissLinear: mtcr r12 - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID + /* Set 8M byte page and mark it valid,accessed */ + li r11, MI_PS8MEG | MI_SVALID | M_APG2 mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ @@ -969,7 +960,7 @@ initial_mmu: ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 li r8, MI_PS8MEG /* Set 8M byte page */ - ori r8, r8, MI_SVALID /* Make it valid */ + ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */ mtspr SPRN_MI_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ @@ -996,7 +987,7 @@ initial_mmu: ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ + ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index c2d9a6d709c3..849f50cd62f2 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -79,7 +79,7 @@ void __init MMU_init_hw(void) for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { mtspr(SPRN_MD_CTR, ctr | (i << 8)); mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2); mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); addr += LARGE_PAGE_SIZE_8M; mem -= LARGE_PAGE_SIZE_8M; -- cgit v1.2.3 From bba9bc89b9e72bcf26632877b4823a7e92009b02 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 20 Dec 2017 12:51:00 +1100 Subject: powerpc: Add aacraid and nvme to powernv_defconfig These adapters can be found in a number of our systems, so let's enable the corresponding drivers by default. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/configs/powernv_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 4891bbed6258..9c450cb3e4f2 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -97,6 +97,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m +CONFIG_BLK_DEV_NVME=y CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_GENERIC=y @@ -113,6 +114,7 @@ CONFIG_SCSI_CXGB3_ISCSI=m CONFIG_SCSI_CXGB4_ISCSI=m CONFIG_SCSI_BNX2_ISCSI=m CONFIG_BE2ISCSI=m +CONFIG_SCSI_AACRAID=y CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -- cgit v1.2.3 From f5f563012a7002e64853c61d293d65b3de8d9dca Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jan 2018 17:10:13 +1100 Subject: powerpc: Make newline in cpuinfo unconditional We used to not put the newline between the CPU part and the summary part on UP kernels. This is a rather pointless ifdef so take it out. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index a20e390cb0dd..44d53b33c533 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -354,10 +354,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy / (500000/HZ), (loops_per_jiffy / (5000/HZ)) % 100); #endif - -#ifdef CONFIG_SMP seq_printf(m, "\n"); -#endif preempt_enable(); -- cgit v1.2.3 From fbadeb6bb1685f7a53869e240284ff083f5edaa6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jan 2018 17:10:14 +1100 Subject: powerpc: Cosmetic cleanup of cpuinfo_op Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 44d53b33c533..24da91768133 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -387,10 +387,10 @@ static void c_stop(struct seq_file *m, void *v) } const struct seq_operations cpuinfo_op = { - .start =c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, }; void __init check_for_initrd(void) -- cgit v1.2.3 From f5abe14f88e8259494d69614768789a67475b1ad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:28:45 +1100 Subject: powerpc/xive: Remove incorrect debug code WORD2 if the TIMA isn't byte accessible and isn't that useful to know about, take out the pr_devel statement. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/common.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a3b8d7d1316e..838ebdbfe4c5 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1269,11 +1269,6 @@ static void xive_setup_cpu(void) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); - /* Debug: Dump the TM state */ - pr_devel("CPU %d [HW 0x%02x] VT=%02x\n", - smp_processor_id(), hard_smp_processor_id(), - in_8(xive_tima + xive_tima_offset + TM_WORD2)); - /* The backend might have additional things to do */ if (xive_ops->setup_cpu) xive_ops->setup_cpu(smp_processor_id(), xc); -- cgit v1.2.3 From 872e2ae4bdd4c244f7617ffc08c39462192460ee Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:28:48 +1100 Subject: powerpc: Remove useless EXC_COMMON_HV The only difference between EXC_COMMON_HV and EXC_COMMON is that the former adds "2" to the trap number which is supposed to represent the fact that this is an "HV" interrupt which uses HSRR0/1. However KVM is the only one who cares and it has its own separate macros. In fact, we only have one user of EXC_COMMON_HV and it's for an unknown interrupt case. All the other ones already using EXC_COMMON. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/head-64.h | 7 +------ arch/powerpc/kernel/exceptions-64s.S | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index fdcff76e9a25..0a663dfc28b5 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -178,7 +178,7 @@ name: * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) * TRAMP_KVM - KVM handlers that get put into real, unrelocated - * EXC_COMMON_* - virt, relocated common handlers + * EXC_COMMON - virt, relocated common handlers * * The EXC handlers are given a name, and branch to name_common, or the * appropriate KVM or masking function. Vector handler verieties are as @@ -211,7 +211,6 @@ name: * EXC_COMMON_BEGIN/END - used to open-code the handler * EXC_COMMON * EXC_COMMON_ASYNC - * EXC_COMMON_HV * * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. @@ -413,10 +412,6 @@ name: EXC_COMMON_BEGIN(name); \ STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ -#define EXC_COMMON_HV(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_HEAD_64_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e441b469dc8f..175891c6909c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1318,7 +1318,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) b . #endif -EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) +EXC_COMMON(denorm_common, 0x1500, unknown_exception) #ifdef CONFIG_CBE_RAS -- cgit v1.2.3 From 2271db20e4b362405bacc0e4095df4177d38129e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 12 Jan 2018 13:28:49 +1100 Subject: powerpc: Use the TRAP macro whenever comparing a trap number Trap numbers can have extra bits at the bottom that need to be filtered out. There are a few cases where we don't do that. It's possible that we got lucky but better safe than sorry. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/fault.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..bcd4441304a5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1409,7 +1409,7 @@ void show_regs(struct pt_regs * regs) print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); - if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) + if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f3eb61be0d30..d61989be28e1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1564,7 +1564,7 @@ void facility_unavailable_exception(struct pt_regs *regs) u8 status; bool hv; - hv = (regs->trap == 0xf80); + hv = (TRAP(regs) == 0xf80); if (hv) value = mfspr(SPRN_HFSCR); else diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..c07896c19517 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -571,7 +571,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* kernel has accessed a bad area */ - switch (regs->trap) { + switch (TRAP(regs)) { case 0x300: case 0x380: printk(KERN_ALERT "Unable to handle kernel paging request for " -- cgit v1.2.3 From 4552d128c26e0f0f27a5bd2fadc24092b8f6c1d7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 24 Dec 2017 02:49:22 +1000 Subject: powerpc: System reset avoid interleaving oops using die synchronisation The die() oops path contains a serializing lock to prevent oops messages from being interleaved. In the case of a system reset initiated oops (e.g., qemu nmi command), __die was being called which lacks that synchronisation and oops reports could be interleaved across CPUs. A recent patch 4388c9b3a6ee7 ("powerpc: Do not send system reset request through the oops path") changed this to __die to avoid the debugger() call, but there is no real harm to calling it twice if the first time fell through. So go back to using die() here. This was observed to fix the problem. Fixes: 4388c9b3a6ee7 ("powerpc: Do not send system reset request through the oops path") Signed-off-by: Nicholas Piggin Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d61989be28e1..d139117d3339 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -337,7 +337,7 @@ void system_reset_exception(struct pt_regs *regs) * No debugger or crash dump registered, print logs then * panic. */ - __die("System Reset", regs, SIGABRT); + die("System Reset", regs, SIGABRT); mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); -- cgit v1.2.3 From d4748276ae14ce951a3254852dddc3675797c277 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 24 Dec 2017 01:15:50 +1000 Subject: powerpc/64s: Improve local TLB flush for boot and MCE on POWER9 There are several cases outside the normal address space management where a CPU's entire local TLB is to be flushed: 1. Booting the kernel, in case something has left stale entries in the TLB (e.g., kexec). 2. Machine check, to clean corrupted TLB entries. One other place where the TLB is flushed, is waking from deep idle states. The flush is a side-effect of calling ->cpu_restore with the intention of re-setting various SPRs. The flush itself is unnecessary because in the first case, the TLB should not acquire new corrupted TLB entries as part of sleep/wake (though they may be lost). This type of TLB flush is coded inflexibly, several times for each CPU type, and they have a number of problems with ISA v3.0B: - The current radix mode of the MMU is not taken into account, it is always done as a hash flushn For IS=2 (LPID-matching flush from host) and IS=3 with HV=0 (guest kernel flush), tlbie(l) is undefined if the R field does not match the current radix mode. - ISA v3.0B hash must flush the partition and process table caches as well. - ISA v3.0B radix must flush partition and process scoped translations, partition and process table caches, and also the page walk cache. So consolidate the flushing code and implement it in C and inline asm under the mm/ directory with the rest of the flush code. Add ISA v3.0B cases for radix and hash, and use the radix flush in radix environment. Provide a way for IS=2 (LPID flush) to specify the radix mode of the partition. Have KVM pass in the radix mode of the guest. Take out the flushes from early cputable/dt_cpu_ftrs detection hooks, and move it later in the boot process after, the MMU registers are set up and before relocation is first turned on. The TLB flush is no longer called when restoring from deep idle states. This was not be done as a separate step because booting secondaries uses the same cpu_restore as idle restore, which needs the TLB flush. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 1 + .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 3 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 34 ++++++ arch/powerpc/include/asm/cputable.h | 12 --- arch/powerpc/kernel/cpu_setup_power.S | 50 --------- arch/powerpc/kernel/cputable.c | 15 --- arch/powerpc/kernel/dt_cpu_ftrs.c | 30 ------ arch/powerpc/kernel/mce_power.c | 115 +-------------------- arch/powerpc/kvm/book3s_hv_ras.c | 6 +- arch/powerpc/mm/hash_native_64.c | 97 +++++++++++++++++ arch/powerpc/mm/hash_utils_64.c | 8 ++ arch/powerpc/mm/pgtable-radix.c | 6 ++ arch/powerpc/mm/tlb-radix.c | 66 ++++++++++++ 13 files changed, 219 insertions(+), 224 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 849ecaae9e79..64d02a704bcb 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -51,6 +51,7 @@ static inline void arch_leave_lazy_mmu_mode(void) #define arch_flush_lazy_mmu_mode() do {} while (0) +extern void hash__tlbiel_all(unsigned int action); extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 6a9e68003387..b8f9ad587087 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -11,6 +11,8 @@ static inline int mmu_get_ap(int psize) return mmu_psize_defs[psize].ap; } +extern void radix__tlbiel_all(unsigned int action); + extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, @@ -47,4 +49,5 @@ extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); + #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 58b576f654b3..9befb4df235c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -8,6 +8,40 @@ #include #include +/* TLB flush actions. Used as argument to tlbiel_all() */ +enum { + TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */ + TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ +}; + +static inline void tlbiel_all(void) +{ + /* + * This is used for host machine check and bootup. + * + * This uses early_radix_enabled and implementations use + * early_cpu_has_feature etc because that works early in boot + * and this is the machine check path which is not performance + * critical. + */ + if (early_radix_enabled()) + radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); + else + hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); +} + +static inline void tlbiel_all_lpid(bool radix) +{ + /* + * This is used for guest machine check. + */ + if (radix) + radix__tlbiel_all(TLB_INVAL_SCOPE_LPID); + else + hash__tlbiel_all(TLB_INVAL_SCOPE_LPID); +} + + #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE static inline void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 0546663a98db..78ca2a721d04 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -107,12 +107,6 @@ struct cpu_spec { * called in real mode to handle SLB and TLB errors. */ long (*machine_check_early)(struct pt_regs *regs); - - /* - * Processor specific routine to flush tlbs. - */ - void (*flush_tlb)(unsigned int action); - }; extern struct cpu_spec *cur_cpu_spec; @@ -133,12 +127,6 @@ extern void cpu_feature_keys_init(void); static inline void cpu_feature_keys_init(void) { } #endif -/* TLB flush actions. Used as argument to cpu_spec.flush_tlb() hook */ -enum { - TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */ - TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ -}; - #endif /* __ASSEMBLY__ */ /* CPU kernel features */ diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 610955fe8b81..730ade48329b 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -31,7 +31,6 @@ _GLOBAL(__setup_cpu_power7) mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 - bl __init_tlb_power7 mtlr r11 blr @@ -45,7 +44,6 @@ _GLOBAL(__restore_cpu_power7) mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 - bl __init_tlb_power7 mtlr r11 blr @@ -64,7 +62,6 @@ _GLOBAL(__setup_cpu_power8) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA206 bl __init_HFSCR - bl __init_tlb_power8 bl __init_PMU_HV bl __init_PMU_HV_ISA207 mtlr r11 @@ -86,7 +83,6 @@ _GLOBAL(__restore_cpu_power8) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA206 bl __init_HFSCR - bl __init_tlb_power8 bl __init_PMU_HV bl __init_PMU_HV_ISA207 mtlr r11 @@ -110,7 +106,6 @@ _GLOBAL(__setup_cpu_power9) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA300 bl __init_HFSCR - bl __init_tlb_power9 bl __init_PMU_HV mtlr r11 blr @@ -134,7 +129,6 @@ _GLOBAL(__restore_cpu_power9) li r4,0 /* LPES = 0 */ bl __init_LPCR_ISA300 bl __init_HFSCR - bl __init_tlb_power9 bl __init_PMU_HV mtlr r11 blr @@ -192,50 +186,6 @@ __init_HFSCR: mtspr SPRN_HFSCR,r3 blr -/* - * Clear the TLB using the specified IS form of tlbiel instruction - * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. - */ -__init_tlb_power7: - li r6,POWER7_TLB_SETS - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - ptesync -2: tlbiel r7 - addi r7,r7,0x1000 - bdnz 2b - ptesync -1: blr - -__init_tlb_power8: - li r6,POWER8_TLB_SETS - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - ptesync -2: tlbiel r7 - addi r7,r7,0x1000 - bdnz 2b - ptesync -1: blr - -/* - * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process - * and one for partition scope to clear process and partition table entries. - */ -__init_tlb_power9: - li r6,POWER9_TLB_SETS_HASH - 1 - mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ - li r8,0 - ptesync - PPC_TLBIEL(7, 8, 2, 1, 0) - PPC_TLBIEL(7, 8, 2, 0, 0) -2: addi r7,r7,0x1000 - PPC_TLBIEL(7, 8, 0, 0, 0) - bdnz 2b - ptesync -1: blr - __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1350f49d81a8..c40a9fc1e5d1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -74,9 +74,6 @@ extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power9(void); -extern void __flush_tlb_power7(unsigned int action); -extern void __flush_tlb_power8(unsigned int action); -extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -368,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, @@ -386,7 +382,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -404,7 +399,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .platform = "power9", }, { /* Power7 */ @@ -423,7 +417,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, @@ -443,7 +436,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, - .flush_tlb = __flush_tlb_power7, .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7+", }, @@ -463,7 +455,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -483,7 +474,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -503,7 +493,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -523,7 +512,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, - .flush_tlb = __flush_tlb_power8, .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, @@ -543,7 +531,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, @@ -563,7 +550,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, @@ -583,7 +569,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, - .flush_tlb = __flush_tlb_power9, .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8bdc2f96c5d6..945e2c29ad2d 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -77,8 +77,6 @@ struct dt_cpu_feature { * Set up the base CPU */ -extern void __flush_tlb_power8(unsigned int action); -extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); @@ -92,27 +90,6 @@ static struct { static void (*init_pmu_registers)(void); -static void cpufeatures_flush_tlb(void) -{ - /* - * This is a temporary measure to keep equivalent TLB flush as the - * cputable based setup code. - */ - switch (PVR_VER(mfspr(SPRN_PVR))) { - case PVR_POWER8: - case PVR_POWER8E: - case PVR_POWER8NVL: - __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL); - break; - case PVR_POWER9: - __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL); - break; - default: - pr_err("unknown CPU version for boot TLB flush\n"); - break; - } -} - static void __restore_cpu_cpufeatures(void) { /* @@ -137,8 +114,6 @@ static void __restore_cpu_cpufeatures(void) if (init_pmu_registers) init_pmu_registers(); - - cpufeatures_flush_tlb(); } static char dt_cpu_name[64]; @@ -157,7 +132,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, - .flush_tlb = NULL, .machine_check_early = NULL, .platform = NULL, }; @@ -412,7 +386,6 @@ static void init_pmu_power8(void) static int __init feat_enable_mce_power8(struct dt_cpu_feature *f) { cur_cpu_spec->platform = "power8"; - cur_cpu_spec->flush_tlb = __flush_tlb_power8; cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8; return 1; @@ -451,7 +424,6 @@ static void init_pmu_power9(void) static int __init feat_enable_mce_power9(struct dt_cpu_feature *f) { cur_cpu_spec->platform = "power9"; - cur_cpu_spec->flush_tlb = __flush_tlb_power9; cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9; return 1; @@ -752,8 +724,6 @@ static void __init cpufeatures_setup_finished(void) system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); - cpufeatures_flush_tlb(); - pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n", cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features); } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 644f7040b91c..fe6fc63251fe 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -58,115 +58,6 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) return pte_pfn(*ptep); } -static void flush_tlb_206(unsigned int num_sets, unsigned int action) -{ - unsigned long rb; - unsigned int i; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } - - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < num_sets; i++) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } - asm volatile("ptesync" : : : "memory"); -} - -static void flush_tlb_300(unsigned int num_sets, unsigned int action) -{ - unsigned long rb; - unsigned int i; - unsigned int r; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - rb = TLBIEL_INVAL_SET; - break; - case TLB_INVAL_SCOPE_LPID: - rb = TLBIEL_INVAL_SET_LPID; - break; - default: - BUG(); - break; - } - - asm volatile("ptesync" : : : "memory"); - - if (early_radix_enabled()) - r = 1; - else - r = 0; - - /* - * First flush table/PWC caches with set 0, then flush the - * rest of the sets, partition scope. Radix must then do it - * all again with process scope. Hash just has to flush - * process table. - */ - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : - "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r)); - for (i = 1; i < num_sets; i++) { - unsigned long set = i * (1< TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs. - * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID. - */ -void __flush_tlb_power7(unsigned int action) -{ - flush_tlb_206(POWER7_TLB_SETS, action); -} - -void __flush_tlb_power8(unsigned int action) -{ - flush_tlb_206(POWER8_TLB_SETS, action); -} - -void __flush_tlb_power9(unsigned int action) -{ - unsigned int num_sets; - - if (early_radix_enabled()) - num_sets = POWER9_TLB_SETS_RADIX; - else - num_sets = POWER9_TLB_SETS_HASH; - - flush_tlb_300(num_sets, action); -} - - /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 static void flush_and_reload_slb(void) @@ -226,10 +117,8 @@ static int mce_flush(int what) return 1; } if (what == MCE_FLUSH_TLB) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); - return 1; - } + tlbiel_all(); + return 1; } return 0; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..e61066bb6725 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -87,8 +87,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI); } if (dsisr & DSISR_MC_TLB_MULTI) { - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID); + tlbiel_all_lpid(vcpu->kvm->arch.radix); dsisr &= ~DSISR_MC_TLB_MULTI; } /* Any other errors we don't understand? */ @@ -105,8 +104,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) reload_slb(vcpu); break; case SRR1_MC_IFETCH_TLBMULTI: - if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_LPID); + tlbiel_all_lpid(vcpu->kvm->arch.radix); break; default: handled = 0; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 640cf566e986..a0675e91ad7d 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,6 +47,103 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); +static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) +{ + unsigned long rb; + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + + asm volatile("tlbiel %0" : : "r" (rb)); +} + +/* + * tlbiel instruction for hash, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 0; /* hash format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : "memory"); +} + + +static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa206(set, is); + + asm volatile("ptesync": : :"memory"); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and any caching of partition table + * entries. Then flush the remaining sets of the TLB. Hash mode uses + * partition scoped TLB translations. + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 0); + for (set = 1; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + + /* + * Now invalidate the process table cache. + * + * From ISA v3.0B p. 1078: + * The following forms are invalid. + * * PRS=1, R=0, and RIC!=2 (The only process-scoped + * HPT caching is of the Process Table.) + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 1); + + asm volatile("ptesync": : :"memory"); +} + +void hash__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + tlbiel_all_isa206(POWER8_TLB_SETS, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) + tlbiel_all_isa206(POWER7_TLB_SETS, is); + else + WARN(1, "%s called on pre-POWER7 CPU\n", __func__); + + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + static inline unsigned long ___tlbie(unsigned long vpn, int psize, int apsize, int ssize) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0c802ded6f21..69fa01e02b63 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1051,6 +1051,10 @@ void __init hash__early_init_mmu(void) pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #ifdef CONFIG_SMP @@ -1070,6 +1074,10 @@ void hash__early_init_mmu_secondary(void) } /* Initialize SLB */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfbbee941a76..27ccab28665b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -579,6 +579,9 @@ void __init radix__early_init_mmu(void) radix_init_iamr(); radix_init_pgtable(); + + if (cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } void radix__early_init_mmu_secondary(void) @@ -600,6 +603,9 @@ void radix__early_init_mmu_secondary(void) radix_init_amor(); } radix_init_iamr(); + + if (cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } void radix__mmu_cleanup_all(void) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 884f4b705b57..fbaa429471a1 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -23,6 +23,72 @@ #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 +/* + * tlbiel instruction for radix, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 1; /* radix format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : "memory"); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and the entire Page Walk Cache + * and partition table entries. Then flush the remaining sets of the + * TLB. + */ + tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 0); + + /* Do the same for process scoped entries. */ + tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); + for (set = 1; set < num_sets; set++) + tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); + + asm volatile("ptesync": : :"memory"); +} + +void radix__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); + else + WARN(1, "%s called on pre-POWER9 CPU\n", __func__); + + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + static inline void __tlbiel_pid(unsigned long pid, int set, unsigned long ric) { -- cgit v1.2.3 From 1513c33d7174e87a079cfa2666cb9a3eba56a0ea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:08 +1000 Subject: powerpc/powernv: Remove real mode access limit for early allocations This removes the RMA limit on powernv platform, which constrains early allocations such as PACAs and stacks. There are still other restrictions that must be followed, such as bolted SLB limits, but real mode addressing has no constraints. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 20 +++++++++++++------- arch/powerpc/mm/pgtable-radix.c | 37 +++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 69fa01e02b63..a386bd854b1c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1848,16 +1848,22 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - /* On LPAR systems, the first entry is our RMA region, - * non-LPAR 64-bit hash MMU systems don't have a limitation - * on real mode access, but using the first entry works well - * enough. We also clamp it to 1G to avoid some funky things + /* + * On virtualized systems the first entry is our RMA region aka VRMA, + * non-virtualized 64-bit hash MMU systems don't have a limitation + * on real mode access. + * + * We also clamp it to 1G to avoid some funky things * such as RTAS bugs etc... */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - /* Finally limit subsequent allocations */ - memblock_set_current_limit(ppc64_rma_size); + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); + } else { + ppc64_rma_size = ULONG_MAX; + } } #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 27ccab28665b..ddf584333bcf 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -628,22 +628,27 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, * physical on those processors */ BUG_ON(first_memblock_base != 0); - /* - * We limit the allocation that depend on ppc64_rma_size - * to first_memblock_size. We also clamp it to 1GB to - * avoid some funky things such as RTAS bugs. - * - * On radix config we really don't have a limitation - * on real mode access. But keeping it as above works - * well enough. - */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - /* - * Finally limit subsequent allocations. We really don't want - * to limit the memblock allocations to rma_size. FIXME!! should - * we even limit at all ? - */ - memblock_set_current_limit(first_memblock_base + first_memblock_size); + + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { + /* + * We limit the allocation that depend on ppc64_rma_size + * to first_memblock_size. We also clamp it to 1GB to + * avoid some funky things such as RTAS bugs. + * + * On radix config we really don't have a limitation + * on real mode access. But keeping it as above works + * well enough. + */ + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + /* + * Finally limit subsequent allocations. We really don't want + * to limit the memblock allocations to rma_size. FIXME!! should + * we even limit at all ? + */ + memblock_set_current_limit(first_memblock_base + first_memblock_size); + } else { + ppc64_rma_size = ULONG_MAX; + } } #ifdef CONFIG_MEMORY_HOTPLUG -- cgit v1.2.3 From 98ae0069cb78ca4f5d14f203e3bb43874591123f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:09 +1000 Subject: powerpc/pseries: radix is not subject to RMA limit, remove it The radix guest is not subject to the paravirtualized HPT VRMA limit, so remove that from ppc64_rma_size calculation for that platform. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ddf584333bcf..60927d019bbf 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -631,15 +631,12 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { /* - * We limit the allocation that depend on ppc64_rma_size - * to first_memblock_size. We also clamp it to 1GB to - * avoid some funky things such as RTAS bugs. + * Radix mode guests are not limited by RMA / VRMA addressing. * - * On radix config we really don't have a limitation - * on real mode access. But keeping it as above works - * well enough. + * We do clamp addresses to 1GB to avoid some funky things + * such as RTAS bugs. */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + ppc64_rma_size = 0x40000000; /* * Finally limit subsequent allocations. We really don't want * to limit the memblock allocations to rma_size. FIXME!! should -- cgit v1.2.3 From 47fee31dbdbd642d444c67a4df814339f6d8dd61 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:10 +1000 Subject: powerpc/64: rtas avoid accessing paca in 32-bit mode Commit 177ba7c647f3 ("powerpc/mm/radix: Limit paca allocation in radix") limited the paca allocation address to 1G on pSeries because RTAS return accesses the paca in 32-bit mode: On return from RTAS we access the paca variables and we have 64 bit disabled. This requires us to limit paca in 32 bit range. Fix this by setting ppc64_rma_size to first_memblock_size/1G range. Avoid this limit by switching to 64-bit mode before accessing any memory. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 36878b6ee8b8..371c05fe250a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1083,6 +1083,17 @@ __enter_rtas: rtas_return_loc: FIXUP_ENDIAN + /* + * Clear RI and set SF before anything. + */ + mfmsr r6 + li r0,MSR_RI + andc r6,r6,r0 + sldi r0,r0,(MSR_SF_LG - MSR_RI_LG) + or r6,r6,r0 + sync + mtmsrd r6 + /* relocation is off at this point */ GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ @@ -1091,12 +1102,6 @@ rtas_return_loc: 0: mflr r3 ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */ - mfmsr r6 - li r0,MSR_RI - andc r6,r6,r0 - sync - mtmsrd r6 - ld r1,PACAR1(r4) /* Restore our SP */ ld r4,PACASAVEDMSR(r4) /* Restore our MSR */ -- cgit v1.2.3 From 5eae82cab5d7e13292d53205c0cc145c110c92be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:11 +1000 Subject: powerpc/pseries: lift RTAS limit for radix With the previous patch to switch to 64-bit mode after returning from RTAS and before doing any memory accesses, the RMA limit need not be clamped to 1GB to avoid RTAS bugs. Keep the 1GB limit for older firmware (although this is more of a kernel concern than RTAS), and remove it starting with POWER9. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 60927d019bbf..573a9a2ee455 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -629,23 +629,10 @@ void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { - /* - * Radix mode guests are not limited by RMA / VRMA addressing. - * - * We do clamp addresses to 1GB to avoid some funky things - * such as RTAS bugs. - */ - ppc64_rma_size = 0x40000000; - /* - * Finally limit subsequent allocations. We really don't want - * to limit the memblock allocations to rma_size. FIXME!! should - * we even limit at all ? - */ - memblock_set_current_limit(first_memblock_base + first_memblock_size); - } else { - ppc64_rma_size = ULONG_MAX; - } + /* + * Radix mode is not limited by RMA / VRMA addressing. + */ + ppc64_rma_size = ULONG_MAX; } #ifdef CONFIG_MEMORY_HOTPLUG -- cgit v1.2.3 From c610d65c0ad00f1b7bfe2f1eb8f867ca74741685 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:12 +1000 Subject: powerpc/pseries: lift RTAS limit for hash With the previous patch to switch to 64-bit mode after returning from RTAS and before doing any memory accesses, the RMA limit need not be clamped to 1GB to avoid RTAS bugs. Keep the 1GB limit for older firmware (although this is more of a kernel concern than RTAS), and remove it starting with POWER9. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a386bd854b1c..64e704eefad1 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1853,11 +1853,13 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, * non-virtualized 64-bit hash MMU systems don't have a limitation * on real mode access. * - * We also clamp it to 1G to avoid some funky things - * such as RTAS bugs etc... + * For guests on platforms before POWER9, we clamp the it limit to 1G + * to avoid some funky things such as RTAS bugs etc... */ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + ppc64_rma_size = first_memblock_size; + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) + ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size); -- cgit v1.2.3 From 59f47eff03a08cd2d91310f1c15a5343fa0071e5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 4 Jan 2018 15:12:14 -0600 Subject: powerpc/pci: Use of_irq_parse_and_map_pci() helper Instead of calling both of_irq_parse_pci() and irq_create_of_mapping(), call of_irq_parse_and_map_pci(), which does the same thing. This will allow making of_irq_parse_pci() a private, static function. This changes the logic slightly in that the fallback path will also be taken if irq_create_of_mapping() fails internally. Signed-off-by: Rob Herring [bhelgaas: fold in virq init from Stephen Rothwell ] Signed-off-by: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0ac7aa346c69..344af823c3c4 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -339,8 +339,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) */ static int pci_read_irq_line(struct pci_dev *pci_dev) { - struct of_phandle_args oirq; - unsigned int virq; + unsigned int virq = 0; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -348,7 +347,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) memset(&oirq, 0xff, sizeof(oirq)); #endif /* Try to get a mapping from the device-tree */ - if (of_irq_parse_pci(pci_dev, &oirq)) { + if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) { u8 line, pin; /* If that fails, lets fallback to what is in the config @@ -372,11 +371,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_mapping(NULL, line); if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); - } else { - pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n", - oirq.args_count, oirq.args[0], oirq.args[1], oirq.np); - - virq = irq_create_of_mapping(&oirq); } if (!virq) { -- cgit v1.2.3 From d075745d893c78730e4a3b7a60fca23c2f764081 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jan 2018 20:51:13 +1100 Subject: KVM: PPC: Book3S HV: Improve handling of debug-trigger HMIs on POWER9 Hypervisor maintenance interrupts (HMIs) are generated by various causes, signalled by bits in the hypervisor maintenance exception register (HMER). In most cases calling OPAL to handle the interrupt is the correct thing to do, but the "debug trigger" HMIs signalled by PPC bit 17 (bit 46) of HMER are used to invoke software workarounds for hardware bugs, and OPAL does not have any code to handle this cause. The debug trigger HMI is used in POWER9 DD2.0 and DD2.1 chips to work around a hardware bug in executing vector load instructions to cache inhibited memory. In POWER9 DD2.2 chips, it is generated when conditions are detected relating to threads being in TM (transactional memory) suspended mode when the core SMT configuration needs to be reconfigured. The kernel currently has code to detect the vector CI load condition, but only when the HMI occurs in the host, not when it occurs in a guest. If a HMI occurs in the guest, it is always passed to OPAL, and then we always re-sync the timebase, because the HMI cause might have been a timebase error, for which OPAL would re-sync the timebase, thus removing the timebase offset which KVM applied for the guest. Since we don't know what OPAL did, we don't know whether to subtract the timebase offset from the timebase, so instead we re-sync the timebase. This adds code to determine explicitly what the cause of a debug trigger HMI will be. This is based on a new device-tree property under the CPU nodes called ibm,hmi-special-triggers, if it is present, or otherwise based on the PVR (processor version register). The handling of debug trigger HMIs is pulled out into a separate function which can be called from the KVM guest exit code. If this function handles and clears the HMI, and no other HMI causes remain, then we skip calling OPAL and we proceed to subtract the guest timebase offset from the timebase. The overall handling for HMIs that occur in the host (i.e. not in a KVM guest) is largely unchanged, except that we now don't set the flag for the vector CI load workaround on DD2.2 processors. This also removes a BUG_ON in the KVM code. BUG_ON is generally not useful in KVM guest entry/exit code since it is difficult to handle the resulting trap gracefully. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hmi.h | 4 + arch/powerpc/include/asm/reg.h | 5 +- arch/powerpc/kernel/mce.c | 142 +++++++++++++++++++++++++------- arch/powerpc/kvm/book3s_hv_ras.c | 8 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 +- 5 files changed, 131 insertions(+), 37 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 85b7a1a21e22..9c14f7b5c46c 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); static inline void wait_for_subcore_guest_exit(void) { } static inline void wait_for_tb_resync(void) { } #endif + +struct pt_regs; +extern long hmi_handle_debugtrig(struct pt_regs *regs); + #endif /* __ASM_PPC64_HMI_H__ */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..14e41b843952 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -432,8 +432,9 @@ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ -#define SPRN_HMER 0x150 /* Hardware m? error recovery */ -#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ +#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ +#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ #define SPRN_PCR 0x152 /* Processor compatibility register */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 742e4658c5dc..d2fecaec4fec 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) return handled; } -long hmi_exception_realmode(struct pt_regs *regs) +/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ +static enum { + DTRIG_UNKNOWN, + DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ + DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ +} hmer_debug_trig_function; + +static int init_debug_trig_function(void) { - __this_cpu_inc(irq_stat.hmi_exceptions); - -#ifdef CONFIG_PPC_BOOK3S_64 - /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ - if (pvr_version_is(PVR_POWER9)) { - unsigned long hmer = mfspr(SPRN_HMER); - - /* Do we have the debug bit set */ - if (hmer & PPC_BIT(17)) { - hmer &= ~PPC_BIT(17); - mtspr(SPRN_HMER, hmer); - - /* - * Now to avoid problems with soft-disable we - * only do the emulation if we are coming from - * user space - */ - if (user_mode(regs)) - local_paca->hmi_p9_special_emu = 1; - - /* - * Don't bother going to OPAL if that's the - * only relevant bit. - */ - if (!(hmer & mfspr(SPRN_HMEER))) - return local_paca->hmi_p9_special_emu; + int pvr; + struct device_node *cpun; + struct property *prop = NULL; + const char *str; + + /* First look in the device tree */ + preempt_disable(); + cpun = of_get_cpu_node(smp_processor_id(), NULL); + if (cpun) { + of_property_for_each_string(cpun, "ibm,hmi-special-triggers", + prop, str) { + if (strcmp(str, "bit17-vector-ci-load") == 0) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + else if (strcmp(str, "bit17-tm-suspend-escape") == 0) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; } + of_node_put(cpun); + } + preempt_enable(); + + /* If we found the property, don't look at PVR */ + if (prop) + goto out; + + pvr = mfspr(SPRN_PVR); + /* Check for POWER9 Nimbus (scale-out) */ + if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { + /* DD2.2 and later */ + if ((pvr & 0xfff) >= 0x202) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; + /* DD2.0 and DD2.1 - used for vector CI load emulation */ + else if ((pvr & 0xfff) >= 0x200) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + } + + out: + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + pr_debug("HMI debug trigger used for vector CI load\n"); + break; + case DTRIG_SUSPEND_ESCAPE: + pr_debug("HMI debug trigger used for TM suspend escape\n"); + break; + default: + break; } -#endif /* CONFIG_PPC_BOOK3S_64 */ + return 0; +} +__initcall(init_debug_trig_function); + +/* + * Handle HMIs that occur as a result of a debug trigger. + * Return values: + * -1 means this is not a HMI cause that we know about + * 0 means no further handling is required + * 1 means further handling is required + */ +long hmi_handle_debugtrig(struct pt_regs *regs) +{ + unsigned long hmer = mfspr(SPRN_HMER); + long ret = 0; + + /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ + if (!((hmer & HMER_DEBUG_TRIG) + && hmer_debug_trig_function != DTRIG_UNKNOWN)) + return -1; + + hmer &= ~HMER_DEBUG_TRIG; + /* HMER is a write-AND register */ + mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); + + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + /* + * Now to avoid problems with soft-disable we + * only do the emulation if we are coming from + * host user space + */ + if (regs && user_mode(regs)) + ret = local_paca->hmi_p9_special_emu = 1; + + break; + + default: + break; + } + + /* + * See if any other HMI causes remain to be handled + */ + if (hmer & mfspr(SPRN_HMEER)) + return -1; + + return ret; +} + +/* + * Return values: + */ +long hmi_exception_realmode(struct pt_regs *regs) +{ + int ret; + + __this_cpu_inc(irq_stat.hmi_exceptions); + + ret = hmi_handle_debugtrig(regs); + if (ret >= 0) + return ret; wait_for_subcore_guest_exit(); diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..c296343d0dcc 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void) * secondary threads to proceed. * - All secondary threads will eventually call opal hmi handler on * their exit path. + * + * Returns 1 if the timebase offset should be applied, 0 if not. */ long kvmppc_realmode_hmi_handler(void) { - int ptid = local_paca->kvm_hstate.ptid; bool resync_req; - /* This is only called on primary thread. */ - BUG_ON(ptid != 0); __this_cpu_inc(irq_stat.hmi_exceptions); + if (hmi_handle_debugtrig(NULL) >= 0) + return 1; + /* * By now primary thread has already completed guest->host * partition switch but haven't signaled secondaries yet. diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..bd0b623335af 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bne 27f bl kvmppc_realmode_hmi_handler nop + cmpdi r3, 0 li r12, BOOK3S_INTERRUPT_HMI /* - * At this point kvmppc_realmode_hmi_handler would have resync-ed - * the TB. Hence it is not required to subtract guest timebase - * offset from timebase. So, skip it. + * At this point kvmppc_realmode_hmi_handler may have resync-ed + * the TB, and if it has, we must not subtract the guest timebase + * offset from the timebase. So, skip it. * * Also, do not call kvmppc_subcore_exit_guest() because it has * been invoked as part of kvmppc_realmode_hmi_handler(). */ - b 30f + beq 30f 27: /* Subtract timebase offset from timebase */ -- cgit v1.2.3 From 1af19331a3a18296a918802dbe032a13328e264d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:13 +1000 Subject: powerpc/64s: Relax PACA address limitations Book3S PACA memory allocation is restricted by the RMA limit and also must not take SLB faults when accessed in virtual mode. Currently a fixed 256MB limit is used for this, which is imprecise and sub-optimal. Update the paca allocation limits to use use the ppc64_rma_size for RMA limit, and share the safe_stack_limit() that is currently used for stack allocations that must not take virtual mode faults. The safe_stack_limit() name is changed to ppc64_bolted_size() to match ppc64_rma_size and some comments are updated. We also need to use early_mmu_has_feature() because we are now calling this function prior to the jump label patching that enables mmu_has_feature(). Signed-off-by: Nicholas Piggin [mpe: Change mmu_has_feature() to early_mmu_has_feature()] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/paca.c | 13 +++++++------ arch/powerpc/kernel/setup.h | 4 ++++ arch/powerpc/kernel/setup_64.c | 26 ++++++++++++++++---------- 3 files changed, 27 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d6597038931d..95ffedf14885 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -18,6 +18,8 @@ #include #include +#include "setup.h" + #ifdef CONFIG_PPC_BOOK3S /* @@ -208,15 +210,14 @@ void __init allocate_pacas(void) u64 limit; int cpu; - limit = ppc64_rma_size; - #ifdef CONFIG_PPC_BOOK3S_64 /* - * We can't take SLB misses on the paca, and we want to access them - * in real mode, so allocate them within the RMA and also within - * the first segment. + * We access pacas in real mode, and cannot take SLB faults + * on them when in virtual mode, so allocate them accordingly. */ - limit = min(0x10000000ULL, limit); + limit = min(ppc64_bolted_size(), ppc64_rma_size); +#else + limit = ppc64_rma_size; #endif paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 21c18071d9d5..3fc11e30308f 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -51,6 +51,10 @@ void record_spr_defaults(void); static inline void record_spr_defaults(void) { }; #endif +#ifdef CONFIG_PPC64 +u64 ppc64_bolted_size(void); +#endif + /* * Having this in kvm_ppc.h makes include dependencies too * tricky to solve for setup-common.c so have it here. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d3124c302146..8f285d6a3db1 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -565,25 +565,31 @@ void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } -/* This returns the limit below which memory accesses to the linear - * mapping are guarnateed not to cause a TLB or SLB miss. This is - * used to allocate interrupt or emergency stacks for which our - * exception entry path doesn't deal with being interrupted. +/* + * This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause an architectural exception (e.g., + * TLB or SLB miss fault). + * + * This is used to allocate PACAs and various interrupt stacks that + * that are accessed early in interrupt handlers that must not cause + * re-entrant interrupts. */ -static __init u64 safe_stack_limit(void) +__init u64 ppc64_bolted_size(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ + if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) return linear_map_top; /* Other BookE, we assume the first GB is bolted */ return 1ul << 30; #else + /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) return ULONG_MAX; - /* BookS, the first segment is bolted */ - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + /* BookS hash, the first segment is bolted */ + if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; return 1UL << SID_SHIFT; #endif @@ -591,7 +597,7 @@ static __init u64 safe_stack_limit(void) void __init irqstack_early_init(void) { - u64 limit = safe_stack_limit(); + u64 limit = ppc64_bolted_size(); unsigned int i; /* @@ -676,7 +682,7 @@ void __init emergency_stack_init(void) * initialized in kernel/irq.c. These are initialized here in order * to have emergency stacks available as early as possible. */ - limit = min(safe_stack_limit(), ppc64_rma_size); + limit = min(ppc64_bolted_size(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; -- cgit v1.2.3 From 47712a921bb781caf69fca9eae43be19968816cb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 17 Jan 2018 22:47:22 +1000 Subject: powerpc/watchdog: remove arch_trigger_cpumask_backtrace The powerpc NMI IPIs may not be recoverable if they are taken in some sections of code, and also there have been and still are issues with taking NMIs (in KVM guest code, in firmware, etc) which makes them a bit dangerous to use. Generic code like softlockup detector and rcu stall detectors really hammer on trigger_*_backtrace, which has lead to further problems because we've implemented it with the NMI. So stop providing NMI backtraces for now. Importantly, the powerpc code uses NMI IPIs in crash/debug, and the SMP hardlockup watchdog. So if the softlockup and rcu hang detection traces are not being printed because the CPU is stuck with interrupts off, then the hard lockup watchdog should get it with the NMI IPI. Fixes: 2104180a5369 ("powerpc/64s: implement arch-specific hardlockup watchdog") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nmi.h | 4 ---- arch/powerpc/kernel/watchdog.c | 22 ---------------------- 2 files changed, 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index e97f58689ca7..9c80939b4d14 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,10 +4,6 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace - #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 87da80ccced1..3963baaba92d 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -393,25 +393,3 @@ int __init watchdog_nmi_probe(void) } return 0; } - -static void handle_backtrace_ipi(struct pt_regs *regs) -{ - nmi_cpu_backtrace(regs); -} - -static void raise_backtrace_ipi(cpumask_t *mask) -{ - unsigned int cpu; - - for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) - handle_backtrace_ipi(NULL); - else - smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000); - } -} - -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) -{ - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); -} -- cgit v1.2.3 From c16bee4bded5449ec3b3ec73579ba29881f2e978 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 17 Nov 2017 02:00:49 +1000 Subject: powerpc: define __ARCH_IRQ_EXIT_IRQS_DISABLED powerpc calls irq_exit() with local irqs disabled, therefore it can define __ARCH_IRQ_EXIT_IRQS_DISABLED. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 456f9e7b8d83..5986d473722b 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -29,6 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING +#define __ARCH_IRQ_EXIT_IRQS_DISABLED #define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) #define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) -- cgit v1.2.3 From 7a074fc08389f43b448ebef74bf56ba0f6f087d9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 19 Jan 2018 15:20:12 +1100 Subject: powerpc/64s: Fix ps3 build error due to tlbiel_all() The recent changes to TLB handling broke the PS3 build: arch/powerpc/include/asm/book3s/64/tlbflush.h:30: undefined reference to `.hash__tlbiel_all' Fix it by adding an fallback version of tlbiel_all() for non-native builds. It should never be called, due to checks in callers so it calls BUG(). We should probably clean it up further but this will suffice for now. Fixes: d4748276ae14 ("powerpc/64s: Improve local TLB flush for boot and MCE on POWER9") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 9befb4df235c..0cac17253513 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -14,6 +14,7 @@ enum { TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ }; +#ifdef CONFIG_PPC_NATIVE static inline void tlbiel_all(void) { /* @@ -29,6 +30,9 @@ static inline void tlbiel_all(void) else hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } +#else +static inline void tlbiel_all(void) { BUG(); }; +#endif static inline void tlbiel_all_lpid(bool radix) { -- cgit v1.2.3 From a8a4b03ab95f3e99196b0a4dd40804620ea77e74 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 20 Aug 2017 23:28:24 +0530 Subject: powerpc: Hard wire PT_SOFTE value to 1 in ptrace & signals We have always had softe in pt_regs, and accessible via PT_SOFTE, even though it is not userspace state. The value userspace sees should always be 1, because we should never be in userspace with interrupts soft disabled. In a subsequent patch we will be changing the semantics of the kernel softe value, so hard wire the value to 1 to retain the existing semantics. As far as we know nothing ever looks at it, but better safe than sorry. Signed-off-by: Madhavan Srinivasan [mpe: Split out of larger patch, write change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 12 ++++++++++++ arch/powerpc/kernel/signal_32.c | 8 ++++++++ arch/powerpc/kernel/signal_64.c | 3 +++ 3 files changed, 23 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f52ad5bb7109..bd2c49475473 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -283,6 +283,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) if (regno == PT_DSCR) return get_user_dscr(task, data); +#ifdef CONFIG_PPC64 + /* + * softe copies paca->soft_enabled variable state. Since soft_enabled is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (regno == PT_SOFTE) { + *data = 1; + return 0; + } +#endif + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { *data = ((unsigned long *)task->thread.regs)[regno]; return 0; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 9ffd73296f64..a30c6562ed66 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs, { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + elf_greg_t64 softe = 0x1; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { if (i == 14 && !FULL_REGS(regs)) i = 32; + if ( i == PT_SOFTE) { + if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) + return -EFAULT; + else + continue; + } if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4b9ca3570344..2705fba544ad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; long err = 0; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + unsigned long softe = 0x1; BUG_ON(tsk != current); @@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); err |= __put_user(signr, &sc->signal); err |= __put_user(handler, &sc->handler); if (set != NULL) -- cgit v1.2.3 From c2e480ba822718190e58849b79a76db13c3dac18 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:42 +0530 Subject: powerpc/64: Add #defines for paca->soft_enabled flags Two #defines IRQS_ENABLED and IRQS_DISABLED are added to be used when updating paca->soft_enabled. Replace the hardcoded values used when updating paca->soft_enabled with IRQ_(EN|DIS)ABLED #define. No logic change. Reviewed-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/hw_irq.h | 21 ++++++++++++++------- arch/powerpc/include/asm/irqflags.h | 6 +++--- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kernel/entry_64.S | 16 ++++++++-------- arch/powerpc/kernel/exceptions-64e.S | 6 +++--- arch/powerpc/kernel/head_64.S | 5 +++-- arch/powerpc/kernel/idle_book3e.S | 3 ++- arch/powerpc/kernel/idle_power4.S | 3 ++- arch/powerpc/kernel/irq.c | 9 +++++---- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kernel/setup_64.c | 3 +++ arch/powerpc/kernel/time.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 2 +- 15 files changed, 50 insertions(+), 35 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d..69c4e3d35e02 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -499,7 +499,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ - cmpwi r10,0; \ + cmpwi r10,IRQS_DISABLED; \ li r10,SOFTEN_VALUE_##vec; \ beq masked_##h##interrupt diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3818fa0164f0..7c2717dfd89a 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -28,6 +28,12 @@ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ #define PACA_IRQ_HMI 0x20 +/* + * flags for paca->soft_enabled + */ +#define IRQS_ENABLED 1 +#define IRQS_DISABLED 0 + #endif /* CONFIG_PPC64 */ #ifndef __ASSEMBLY__ @@ -60,9 +66,10 @@ static inline unsigned long arch_local_irq_disable(void) unsigned long flags, zero; asm volatile( - "li %1,0; lbz %0,%2(13); stb %1,%2(13)" + "li %1,%3; lbz %0,%2(13); stb %1,%2(13)" : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)) + : "i" (offsetof(struct paca_struct, soft_enabled)),\ + "i" (IRQS_DISABLED) : "memory"); return flags; @@ -72,7 +79,7 @@ extern void arch_local_irq_restore(unsigned long); static inline void arch_local_irq_enable(void) { - arch_local_irq_restore(1); + arch_local_irq_restore(IRQS_ENABLED); } static inline unsigned long arch_local_irq_save(void) @@ -82,7 +89,7 @@ static inline unsigned long arch_local_irq_save(void) static inline bool arch_irqs_disabled_flags(unsigned long flags) { - return flags == 0; + return flags == IRQS_DISABLED; } static inline bool arch_irqs_disabled(void) @@ -102,9 +109,9 @@ static inline bool arch_irqs_disabled(void) u8 _was_enabled; \ __hard_irq_disable(); \ _was_enabled = local_paca->soft_enabled; \ - local_paca->soft_enabled = 0; \ + local_paca->soft_enabled = IRQS_DISABLED;\ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (_was_enabled) \ + if (_was_enabled == IRQS_ENABLED) \ trace_hardirqs_off(); \ } while(0) @@ -127,7 +134,7 @@ static inline void may_hard_irq_enable(void) static inline bool arch_irq_disabled_regs(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe == IRQS_DISABLED); } extern bool prep_irq_for_idle(void); diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1aeb5f13b8c4..8d9fdc84828d 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -49,8 +49,8 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACASOFTIRQEN(r13); \ lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,0; \ - li __rA,0; \ + cmpwi cr0,__rA,IRQS_DISABLED;\ + li __rA,IRQS_DISABLED; \ ori __rB,__rB,PACA_IRQ_HARD_DIS; \ stb __rB,PACAIRQHAPPENED(r13); \ beq 44f; \ @@ -64,7 +64,7 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,0; \ + li __rB,IRQS_DISABLED; \ ori __rA,__rA,PACA_IRQ_HARD_DIS; \ stb __rB,PACASOFTIRQEN(r13); \ stb __rA,PACAIRQHAPPENED(r13) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 941c2a3f231b..68484d77a3cb 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void) /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; - local_paca->soft_enabled = 1; + local_paca->soft_enabled = IRQS_ENABLED; #endif } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 371c05fe250a..f8b32224dc11 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -130,7 +130,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) lbz r10,PACASOFTIRQEN(r13) - xori r10,r10,1 + xori r10,r10,IRQS_ENABLED 1: tdnei r10,0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -147,7 +147,7 @@ system_call: /* label this so stack traces look sane */ /* We do need to set SOFTE in the stack frame or the return * from interrupt will be painful */ - li r10,1 + li r10,IRQS_ENABLED std r10,SOFTE(r1) CURRENT_THREAD_INFO(r11, r1) @@ -743,7 +743,7 @@ resume_kernel: lwz r8,TI_PREEMPT(r9) cmpwi cr1,r8,0 ld r0,SOFTE(r1) - cmpdi r0,0 + cmpdi r0,IRQS_DISABLED crandc eq,cr1*4+eq,eq bne restore @@ -783,11 +783,11 @@ restore: */ ld r5,SOFTE(r1) lbz r6,PACASOFTIRQEN(r13) - cmpwi cr0,r5,0 + cmpwi cr0,r5,IRQS_DISABLED beq .Lrestore_irq_off /* We are enabling, were we already enabled ? Yes, just return */ - cmpwi cr0,r6,1 + cmpwi cr0,r6,IRQS_ENABLED beq cr0,.Ldo_restore /* @@ -806,7 +806,7 @@ restore: */ .Lrestore_no_replay: TRACE_ENABLE_INTS - li r0,1 + li r0,IRQS_ENABLED stb r0,PACASOFTIRQEN(r13); /* @@ -915,7 +915,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) /* The interrupt should not have soft enabled. */ lbz r7,PACASOFTIRQEN(r13) -1: tdnei r7,0 +1: tdnei r7,IRQS_DISABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif b .Ldo_restore @@ -1036,7 +1036,7 @@ _GLOBAL(enter_rtas) * check it with the asm equivalent of WARN_ON */ lbz r0,PACASOFTIRQEN(r13) -1: tdnei r0,0 +1: tdnei r0,IRQS_DISABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index acd8ca76233e..80cc91bbcf1a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -210,9 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) ld r5,SOFTE(r1) /* Interrupts had better not already be enabled... */ - twnei r6,0 + twnei r6,IRQS_DISABLED - cmpwi cr0,r5,0 + cmpwi cr0,r5,IRQS_DISABLED beq 1f TRACE_ENABLE_INTS @@ -352,7 +352,7 @@ ret_from_mc_except: #define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r10,0; /* yes -> go out of line */ \ + cmpwi cr0,r10,IRQS_DISABLED; /* yes -> go out of line */ \ beq masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index aa71a90f5222..0deef350004f 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -765,7 +765,7 @@ _GLOBAL(pmac_secondary_start) /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 + li r0,IRQS_DISABLED stb r0,PACASOFTIRQEN(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -822,6 +822,7 @@ __secondary_start: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ + li r7,IRQS_DISABLED stb r7,PACASOFTIRQEN(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -988,7 +989,7 @@ start_here_common: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 + li r0,IRQS_DISABLED stb r0,PACASOFTIRQEN(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 48c21acef915..1bb1d152aa46 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -17,6 +17,7 @@ #include #include #include +#include /* 64-bit version only for now */ #ifdef CONFIG_PPC64 @@ -46,7 +47,7 @@ _GLOBAL(\name) bl trace_hardirqs_on addi r1,r1,128 #endif - li r0,1 + li r0,IRQS_ENABLED stb r0,PACASOFTIRQEN(r13) /* Interrupts will make use return to LR, so get something we want diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index f57a19348bdd..8b4702d93701 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -15,6 +15,7 @@ #include #include #include +#include #undef DEBUG @@ -53,7 +54,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) mfmsr r7 #endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 + li r0,IRQS_ENABLED stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ BEGIN_FTR_SECTION DSSALL diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index b7a84522e652..483a9206554f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,6 +67,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include @@ -231,7 +232,7 @@ notrace void arch_local_irq_restore(unsigned long en) /* Write the new soft-enabled value */ set_soft_enabled(en); - if (!en) + if (en == IRQS_DISABLED) return; /* * From this point onward, we can take interrupts, preempt, @@ -276,7 +277,7 @@ notrace void arch_local_irq_restore(unsigned long en) } #endif /* CONFIG_TRACE_IRQFLAGS */ - set_soft_enabled(0); + set_soft_enabled(IRQS_DISABLED); trace_hardirqs_off(); /* @@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en) /* We can soft-enable now */ trace_hardirqs_on(); - set_soft_enabled(1); + set_soft_enabled(IRQS_ENABLED); /* * And replay if we have to. This will return with interrupts @@ -363,7 +364,7 @@ bool prep_irq_for_idle(void) * of entering the low power state. */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - local_paca->soft_enabled = 1; + local_paca->soft_enabled = IRQS_ENABLED; /* Tell the caller to enter the low power state */ return true; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bcd4441304a5..1563d1190da3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -57,6 +57,7 @@ #include #ifdef CONFIG_PPC64 #include +#include #endif #include #include @@ -1674,7 +1675,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[14] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = 1; + childregs->softe = IRQS_ENABLED; #endif childregs->gpr[15] = kthread_arg; p->thread.regs = NULL; /* no user register state */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8f285d6a3db1..2fd4e167ef09 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "setup.h" @@ -187,6 +188,8 @@ static void __init fixup_boot_paca(void) get_paca()->cpu_start = 1; /* Allow percpu accesses to work until we setup percpu data */ get_paca()->data_offset = 0; + /* Mark interrupts disabled in PACA */ + get_paca()->soft_enabled = IRQS_DISABLED; } static void __init configure_exceptions(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fe6f3a285455..070092b1ba8a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -253,7 +253,7 @@ void accumulate_stolen_time(void) * needs to reflect that so various debug stuff doesn't * complain */ - local_paca->soft_enabled = 0; + local_paca->soft_enabled = IRQS_DISABLED; sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f5d1008f8574..760bb5a2b8bd 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page) * So long as we atomically load page table pointers we are safe against teardown, * we can follow the address down to the the page and take a ref on it. * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 + * when we have MSR[EE] = 0 but the paca->soft_enabled = IRQS_ENABLED */ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9e3da168d54c..594261e308b3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs) */ static inline int perf_intr_is_nmi(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe == IRQS_DISABLED); } /* -- cgit v1.2.3 From 9f83e00f4cc1c560b6847acecba2b3746c8d8c06 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 20 Dec 2017 09:25:43 +0530 Subject: powerpc/64: Improve inline asm in arch_local_irq_disable arch_local_irq_disable is implemented strangely, with a temporary output register being set to the desired soft_enabled value via an immediate input, which is then used to store to memory. This is not required, the immediate can be specified directly as a register input. For simple cases at least, assembly is unchanged except register mapping. Reviewed-by: Madhavan Srinivasan Signed-off-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 7c2717dfd89a..4210ddbf38b0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -63,13 +63,13 @@ static inline unsigned long arch_local_save_flags(void) static inline unsigned long arch_local_irq_disable(void) { - unsigned long flags, zero; + unsigned long flags; asm volatile( - "li %1,%3; lbz %0,%2(13); stb %1,%2(13)" - : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)),\ - "i" (IRQS_DISABLED) + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)), + "r" (IRQS_DISABLED) : "memory"); return flags; -- cgit v1.2.3 From b5c1bd62c054f3cff1a672f9bf1dddefafadffec Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:44 +0530 Subject: powerpc/64: Fix arch_local_irq_disable() prototype In powerpc/64, the arch_local_irq_disable() function returns unsigned long, which is not consistent with other architectures. Move that set-return asm implementation into arch_local_irq_save(), and make arch_local_irq_disable() return void, simplifying the assembly. Suggested-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 4210ddbf38b0..4c54db29104f 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -61,18 +61,14 @@ static inline unsigned long arch_local_save_flags(void) return flags; } -static inline unsigned long arch_local_irq_disable(void) +static inline void arch_local_irq_disable(void) { - unsigned long flags; - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=&r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled)), - "r" (IRQS_DISABLED) + "stb %0,%1(13)" + : + : "r" (IRQS_DISABLED), + "i" (offsetof(struct paca_struct, soft_enabled)) : "memory"); - - return flags; } extern void arch_local_irq_restore(unsigned long); @@ -84,7 +80,16 @@ static inline void arch_local_irq_enable(void) static inline unsigned long arch_local_irq_save(void) { - return arch_local_irq_disable(); + unsigned long flags; + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)), + "r" (IRQS_DISABLED) + : "memory"); + + return flags; } static inline bool arch_irqs_disabled_flags(unsigned long flags) -- cgit v1.2.3 From 0b63acf4a0eb8843f83954ea1bd29ccdfcbaa778 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:45 +0530 Subject: powerpc/64: Move set_soft_enabled() and rename Move set_soft_enabled() from powerpc/kernel/irq.c to asm/hw_irq.c, to encourage updates to paca->soft_enabled done via these access function. Add "memory" clobber to hint compiler since paca->soft_enabled memory is the target here. Renaming it as soft_enabled_set() will make namespaces works better as prefix than a postfix when new soft_enabled manipulation functions are introduced. Reviewed-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 22 ++++++++++++++++------ arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kernel/irq.c | 14 ++++---------- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/kernel/time.c | 4 ++-- 5 files changed, 25 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 4c54db29104f..d046d9f3b777 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -49,6 +49,21 @@ extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include +/* + * The "memory" clobber acts as both a compiler barrier + * for the critical section and as a clobber because + * we changed paca->soft_enabled + */ +static inline notrace void soft_enabled_set(unsigned long enable) +{ + asm volatile( + "stb %0,%1(13)" + : + : "r" (enable), + "i" (offsetof(struct paca_struct, soft_enabled)) + : "memory"); +} + static inline unsigned long arch_local_save_flags(void) { unsigned long flags; @@ -63,12 +78,7 @@ static inline unsigned long arch_local_save_flags(void) static inline void arch_local_irq_disable(void) { - asm volatile( - "stb %0,%1(13)" - : - : "r" (IRQS_DISABLED), - "i" (offsetof(struct paca_struct, soft_enabled)) - : "memory"); + soft_enabled_set(IRQS_DISABLED); } extern void arch_local_irq_restore(unsigned long); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 68484d77a3cb..369f0640826c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void) /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; - local_paca->soft_enabled = IRQS_ENABLED; + soft_enabled_set(IRQS_ENABLED); #endif } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 483a9206554f..6c04e465caf5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -107,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace void set_soft_enabled(unsigned long enable) -{ - __asm__ __volatile__("stb %0,%1(13)" - : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); -} - static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); @@ -231,7 +225,7 @@ notrace void arch_local_irq_restore(unsigned long en) unsigned int replay; /* Write the new soft-enabled value */ - set_soft_enabled(en); + soft_enabled_set(en); if (en == IRQS_DISABLED) return; /* @@ -277,7 +271,7 @@ notrace void arch_local_irq_restore(unsigned long en) } #endif /* CONFIG_TRACE_IRQFLAGS */ - set_soft_enabled(IRQS_DISABLED); + soft_enabled_set(IRQS_DISABLED); trace_hardirqs_off(); /* @@ -289,7 +283,7 @@ notrace void arch_local_irq_restore(unsigned long en) /* We can soft-enable now */ trace_hardirqs_on(); - set_soft_enabled(IRQS_ENABLED); + soft_enabled_set(IRQS_ENABLED); /* * And replay if we have to. This will return with interrupts @@ -364,7 +358,7 @@ bool prep_irq_for_idle(void) * of entering the low power state. */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - local_paca->soft_enabled = IRQS_ENABLED; + soft_enabled_set(IRQS_ENABLED); /* Tell the caller to enter the low power state */ return true; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2fd4e167ef09..f6bedebda90a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -189,7 +189,7 @@ static void __init fixup_boot_paca(void) /* Allow percpu accesses to work until we setup percpu data */ get_paca()->data_offset = 0; /* Mark interrupts disabled in PACA */ - get_paca()->soft_enabled = IRQS_DISABLED; + soft_enabled_set(IRQS_DISABLED); } static void __init configure_exceptions(void) @@ -352,7 +352,7 @@ void __init early_setup(unsigned long dt_ptr) void early_setup_secondary(void) { /* Mark interrupts disabled in PACA */ - get_paca()->soft_enabled = 0; + soft_enabled_set(IRQS_DISABLED); /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 070092b1ba8a..320b8459c74e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -253,7 +253,7 @@ void accumulate_stolen_time(void) * needs to reflect that so various debug stuff doesn't * complain */ - local_paca->soft_enabled = IRQS_DISABLED; + soft_enabled_set(IRQS_DISABLED); sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); @@ -261,7 +261,7 @@ void accumulate_stolen_time(void) acct->utime -= ust; acct->steal_time += ust + sst; - local_paca->soft_enabled = save_soft_enabled; + soft_enabled_set(save_soft_enabled); } static inline u64 calculate_stolen_time(u64 stop_tb) -- cgit v1.2.3 From e0b5687bed13ddbf99985d77910b9adfd429bf12 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:46 +0530 Subject: powerpc/64: Implement and use soft_enabled_return API Add a new wrapper function, soft_enabled_return(), added to return paca->soft_enabled value. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 21 +++++++++++++-------- arch/powerpc/kernel/time.c | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index d046d9f3b777..f9791884af08 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -49,6 +49,18 @@ extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include +static inline notrace unsigned long soft_enabled_return(void) +{ + unsigned long flags; + + asm volatile( + "lbz %0,%1(13)" + : "=r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled))); + + return flags; +} + /* * The "memory" clobber acts as both a compiler barrier * for the critical section and as a clobber because @@ -66,14 +78,7 @@ static inline notrace void soft_enabled_set(unsigned long enable) static inline unsigned long arch_local_save_flags(void) { - unsigned long flags; - - asm volatile( - "lbz %0,%1(13)" - : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); - - return flags; + return soft_enabled_return(); } static inline void arch_local_irq_disable(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 320b8459c74e..daa6e9e35ab9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + unsigned long save_soft_enabled = soft_enabled_return(); struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before -- cgit v1.2.3 From a67c543aacb247a74e9d48fd9fde6d8369cea10b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:47 +0530 Subject: powerpc/64: Implement and use soft_enabled_set_return API Add a new wrapper function, soft_enabled_set_return(), added to do the paca->soft_enabled updates requiring a set-return. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f9791884af08..c1764fabf181 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -76,6 +76,20 @@ static inline notrace void soft_enabled_set(unsigned long enable) : "memory"); } +static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) +{ + unsigned long flags; + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, soft_enabled)), + "r" (enable) + : "memory"); + + return flags; +} + static inline unsigned long arch_local_save_flags(void) { return soft_enabled_return(); @@ -95,16 +109,7 @@ static inline void arch_local_irq_enable(void) static inline unsigned long arch_local_irq_save(void) { - unsigned long flags; - - asm volatile( - "lbz %0,%1(13); stb %2,%1(13)" - : "=&r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled)), - "r" (IRQS_DISABLED) - : "memory"); - - return flags; + return soft_enabled_set_return(IRQS_DISABLED); } static inline bool arch_irqs_disabled_flags(unsigned long flags) -- cgit v1.2.3 From acb396d7c2b8b5f0f567c980185bbddd10534b56 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:48 +0530 Subject: powerpc/64: Cleanup hard_irq_disable() macro Minor cleanup to use helper function for manipulating paca->soft_enabled variable. Suggested-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index c1764fabf181..52afb1595cb0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,12 +131,11 @@ static inline bool arch_irqs_disabled(void) #endif #define hard_irq_disable() do { \ - u8 _was_enabled; \ + unsigned long flags; \ __hard_irq_disable(); \ - _was_enabled = local_paca->soft_enabled; \ - local_paca->soft_enabled = IRQS_DISABLED;\ + flags = soft_enabled_set_return(IRQS_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (_was_enabled == IRQS_ENABLED) \ + if (!arch_irqs_disabled_flags(flags)) \ trace_hardirqs_off(); \ } while(0) -- cgit v1.2.3 From 01417c6cc7dc9195f721f7f9e9ea066090ccc99d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:49 +0530 Subject: powerpc/64: Change soft_enabled from flag to bitmask "paca->soft_enabled" is used as a flag to mask some of interrupts. Currently supported flags values and their details: soft_enabled MSR[EE] 0 0 Disabled (PMI and HMI not masked) 1 1 Enabled "paca->soft_enabled" is initialized to 1 to make the interripts as enabled. arch_local_irq_disable() will toggle the value when interrupts needs to disbled. At this point, the interrupts are not actually disabled, instead, interrupt vector has code to check for the flag and mask it when it occurs. By "mask it", it update interrupt paca->irq_happened and return. arch_local_irq_restore() is called to re-enable interrupts, which checks and replays interrupts if any occured. Now, as mentioned, current logic doesnot mask "performance monitoring interrupts" and PMIs are implemented as NMI. But this patchset depends on local_irq_* for a successful local_* update. Meaning, mask all possible interrupts during local_* update and replay them after the update. So the idea here is to reserve the "paca->soft_enabled" logic. New values and details: soft_enabled MSR[EE] 1 0 Disabled (PMI and HMI not masked) 0 1 Enabled Reason for the this change is to create foundation for a third mask value "0x2" for "soft_enabled" to add support to mask PMIs. When ->soft_enabled is set to a value "3", PMI interrupts are mask and when set to a value of "1", PMI are not mask. With this patch also extends soft_enabled as interrupt disable mask. Current flags are renamed from IRQ_[EN?DIS}ABLED to IRQS_ENABLED and IRQS_DISABLED. Patch also fixes the ptrace call to force the user to see the softe value to be alway 1. Reason being, even though userspace has no business knowing about softe, it is part of pt_regs. Like-wise in signal context. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/include/asm/hw_irq.h | 30 +++++++++++++++++++++++------- arch/powerpc/include/asm/irqflags.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 21 ++++++++++----------- arch/powerpc/kernel/exceptions-64e.S | 10 +++++----- arch/powerpc/kernel/irq.c | 20 +++++++++++++++++--- arch/powerpc/perf/core-book3s.c | 2 +- 7 files changed, 60 insertions(+), 31 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 69c4e3d35e02..a21adcb75fc6 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -499,9 +499,9 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ - cmpwi r10,IRQS_DISABLED; \ + andi. r10,r10,IRQS_DISABLED; \ li r10,SOFTEN_VALUE_##vec; \ - beq masked_##h##interrupt + bne masked_##h##interrupt #define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 52afb1595cb0..f96280ee9540 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -31,8 +31,8 @@ /* * flags for paca->soft_enabled */ -#define IRQS_ENABLED 1 -#define IRQS_DISABLED 0 +#define IRQS_ENABLED 0 +#define IRQS_DISABLED 1 #endif /* CONFIG_PPC64 */ @@ -68,6 +68,18 @@ static inline notrace unsigned long soft_enabled_return(void) */ static inline notrace void soft_enabled_set(unsigned long enable) { +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * mask must always include LINUX bit if any are set, and + * interrupts don't get replayed until the Linux interrupt is + * unmasked. This could be changed to replay partial unmasks + * in future, which would allow Linux masks to nest inside + * other masks, among other things. For now, be very dumb and + * simple. + */ + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + asm volatile( "stb %0,%1(13)" : @@ -76,15 +88,19 @@ static inline notrace void soft_enabled_set(unsigned long enable) : "memory"); } -static inline notrace unsigned long soft_enabled_set_return(unsigned long enable) +static inline notrace unsigned long soft_enabled_set_return(unsigned long mask) { unsigned long flags; +#ifdef CONFIG_TRACE_IRQFLAGS + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + asm volatile( "lbz %0,%1(13); stb %2,%1(13)" : "=&r" (flags) : "i" (offsetof(struct paca_struct, soft_enabled)), - "r" (enable) + "r" (mask) : "memory"); return flags; @@ -114,7 +130,7 @@ static inline unsigned long arch_local_irq_save(void) static inline bool arch_irqs_disabled_flags(unsigned long flags) { - return flags == IRQS_DISABLED; + return flags & IRQS_DISABLED; } static inline bool arch_irqs_disabled(void) @@ -133,7 +149,7 @@ static inline bool arch_irqs_disabled(void) #define hard_irq_disable() do { \ unsigned long flags; \ __hard_irq_disable(); \ - flags = soft_enabled_set_return(IRQS_DISABLED); \ + flags = soft_enabled_set_return(IRQS_DISABLED);\ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ if (!arch_irqs_disabled_flags(flags)) \ trace_hardirqs_off(); \ @@ -158,7 +174,7 @@ static inline void may_hard_irq_enable(void) static inline bool arch_irq_disabled_regs(struct pt_regs *regs) { - return (regs->softe == IRQS_DISABLED); + return (regs->softe & IRQS_DISABLED); } extern bool prep_irq_for_idle(void); diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 8d9fdc84828d..f1ec012232d9 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -49,11 +49,11 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACASOFTIRQEN(r13); \ lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,IRQS_DISABLED;\ + andi. __rA,__rA,IRQS_DISABLED;\ li __rA,IRQS_DISABLED; \ ori __rB,__rB,PACA_IRQ_HARD_DIS; \ stb __rB,PACAIRQHAPPENED(r13); \ - beq 44f; \ + bne 44f; \ stb __rA,PACASOFTIRQEN(r13); \ TRACE_DISABLE_INTS; \ 44: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index f8b32224dc11..51e4cc4dba4a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -130,8 +130,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) lbz r10,PACASOFTIRQEN(r13) - xori r10,r10,IRQS_ENABLED -1: tdnei r10,0 +1: tdnei r10,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -741,10 +740,10 @@ resume_kernel: beq+ restore /* Check that preempt_count() == 0 and interrupts are enabled */ lwz r8,TI_PREEMPT(r9) - cmpwi cr1,r8,0 + cmpwi cr0,r8,0 + bne restore ld r0,SOFTE(r1) - cmpdi r0,IRQS_DISABLED - crandc eq,cr1*4+eq,eq + andi. r0,r0,IRQS_DISABLED bne restore /* @@ -783,11 +782,11 @@ restore: */ ld r5,SOFTE(r1) lbz r6,PACASOFTIRQEN(r13) - cmpwi cr0,r5,IRQS_DISABLED - beq .Lrestore_irq_off + andi. r5,r5,IRQS_DISABLED + bne .Lrestore_irq_off /* We are enabling, were we already enabled ? Yes, just return */ - cmpwi cr0,r6,IRQS_ENABLED + andi. r6,r6,IRQS_DISABLED beq cr0,.Ldo_restore /* @@ -1031,15 +1030,15 @@ _GLOBAL(enter_rtas) li r0,0 mtcr r0 -#ifdef CONFIG_BUG +#ifdef CONFIG_BUG /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ lbz r0,PACASOFTIRQEN(r13) -1: tdnei r0,IRQS_DISABLED +1: tdeqi r0,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif - + /* Hard-disable interrupts */ mfmsr r6 rldicl r7,r6,48,1 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 80cc91bbcf1a..9216ece7672c 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -210,10 +210,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) ld r5,SOFTE(r1) /* Interrupts had better not already be enabled... */ - twnei r6,IRQS_DISABLED + tweqi r6,IRQS_ENABLED - cmpwi cr0,r5,IRQS_DISABLED - beq 1f + andi. r6,r5,IRQS_DISABLED + bne 1f TRACE_ENABLE_INTS stb r5,PACASOFTIRQEN(r13) @@ -352,8 +352,8 @@ ret_from_mc_except: #define PROLOG_ADDITION_MASKABLE_GEN(n) \ lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r10,IRQS_DISABLED; /* yes -> go out of line */ \ - beq masked_interrupt_book3e_##n + andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ + bne masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6c04e465caf5..406c4329b535 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -219,15 +219,29 @@ notrace unsigned int __check_irq_replay(void) return 0; } -notrace void arch_local_irq_restore(unsigned long en) +notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; unsigned int replay; /* Write the new soft-enabled value */ - soft_enabled_set(en); - if (en == IRQS_DISABLED) + soft_enabled_set(mask); + if (mask) { +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * mask must always include LINUX bit if any + * are set, and interrupts don't get replayed until + * the Linux interrupt is unmasked. This could be + * changed to replay partial unmasks in future, + * which would allow Linux masks to nest inside + * other masks, among other things. For now, be very + * dumb and simple. + */ + WARN_ON(!(mask & IRQS_DISABLED)); +#endif return; + } + /* * From this point onward, we can take interrupts, preempt, * etc... unless we got hard-disabled. We check if an event diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 594261e308b3..ea4c709f481b 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs) */ static inline int perf_intr_is_nmi(struct pt_regs *regs) { - return (regs->softe == IRQS_DISABLED); + return (regs->softe & IRQS_DISABLED); } /* -- cgit v1.2.3 From 4e26bc4a4ed683c42ba45f09050575a671c6f1f4 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:50 +0530 Subject: powerpc/64: Rename soft_enabled to irq_soft_mask Rename the paca->soft_enabled to paca->irq_soft_mask as it is no longer used as a flag for interrupt state, but a mask. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 +-- arch/powerpc/include/asm/hw_irq.h | 56 ++++++++++++++++++-------------- arch/powerpc/include/asm/irqflags.h | 12 +++---- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 12 +++---- arch/powerpc/kernel/exceptions-64e.S | 10 +++--- arch/powerpc/kernel/head_64.S | 6 ++-- arch/powerpc/kernel/idle_book3e.S | 2 +- arch/powerpc/kernel/idle_power4.S | 2 +- arch/powerpc/kernel/irq.c | 23 +++---------- arch/powerpc/kernel/optprobes_head.S | 2 +- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/kernel/setup_64.c | 4 +-- arch/powerpc/kernel/time.c | 6 ++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/xmon/xmon.c | 4 +-- 19 files changed, 74 insertions(+), 81 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index a21adcb75fc6..b090db4ca37e 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -432,7 +432,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mflr r9; /* Get LR, later save to stack */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ std r9,_LINK(r1); \ - lbz r10,PACASOFTIRQEN(r13); \ + lbz r10,PACAIRQSOFTMASK(r13); \ mfspr r11,SPRN_XER; /* save XER in stackframe */ \ std r10,SOFTE(r1); \ std r11,_XER(r1); \ @@ -498,7 +498,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE #define __SOFTEN_TEST(h, vec) \ - lbz r10,PACASOFTIRQEN(r13); \ + lbz r10,PACAIRQSOFTMASK(r13); \ andi. r10,r10,IRQS_DISABLED; \ li r10,SOFTEN_VALUE_##vec; \ bne masked_##h##interrupt diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f96280ee9540..5c82bb116832 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -29,7 +29,7 @@ #define PACA_IRQ_HMI 0x20 /* - * flags for paca->soft_enabled + * flags for paca->irq_soft_mask */ #define IRQS_ENABLED 0 #define IRQS_DISABLED 1 @@ -49,14 +49,14 @@ extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include -static inline notrace unsigned long soft_enabled_return(void) +static inline notrace unsigned long irq_soft_mask_return(void) { unsigned long flags; asm volatile( "lbz %0,%1(13)" : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); + : "i" (offsetof(struct paca_struct, irq_soft_mask))); return flags; } @@ -64,18 +64,24 @@ static inline notrace unsigned long soft_enabled_return(void) /* * The "memory" clobber acts as both a compiler barrier * for the critical section and as a clobber because - * we changed paca->soft_enabled + * we changed paca->irq_soft_mask */ -static inline notrace void soft_enabled_set(unsigned long enable) +static inline notrace void irq_soft_mask_set(unsigned long mask) { #ifdef CONFIG_TRACE_IRQFLAGS /* - * mask must always include LINUX bit if any are set, and - * interrupts don't get replayed until the Linux interrupt is - * unmasked. This could be changed to replay partial unmasks - * in future, which would allow Linux masks to nest inside - * other masks, among other things. For now, be very dumb and - * simple. + * The irq mask must always include the STD bit if any are set. + * + * and interrupts don't get replayed until the standard + * interrupt (local_irq_disable()) is unmasked. + * + * Other masks must only provide additional masking beyond + * the standard, and they are also not replayed until the + * standard interrupt becomes unmasked. + * + * This could be changed, but it will require partial + * unmasks to be replayed, among other things. For now, take + * the simple approach. */ WARN_ON(mask && !(mask & IRQS_DISABLED)); #endif @@ -83,12 +89,12 @@ static inline notrace void soft_enabled_set(unsigned long enable) asm volatile( "stb %0,%1(13)" : - : "r" (enable), - "i" (offsetof(struct paca_struct, soft_enabled)) + : "r" (mask), + "i" (offsetof(struct paca_struct, irq_soft_mask)) : "memory"); } -static inline notrace unsigned long soft_enabled_set_return(unsigned long mask) +static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { unsigned long flags; @@ -99,7 +105,7 @@ static inline notrace unsigned long soft_enabled_set_return(unsigned long mask) asm volatile( "lbz %0,%1(13); stb %2,%1(13)" : "=&r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled)), + : "i" (offsetof(struct paca_struct, irq_soft_mask)), "r" (mask) : "memory"); @@ -108,12 +114,12 @@ static inline notrace unsigned long soft_enabled_set_return(unsigned long mask) static inline unsigned long arch_local_save_flags(void) { - return soft_enabled_return(); + return irq_soft_mask_return(); } static inline void arch_local_irq_disable(void) { - soft_enabled_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_DISABLED); } extern void arch_local_irq_restore(unsigned long); @@ -125,7 +131,7 @@ static inline void arch_local_irq_enable(void) static inline unsigned long arch_local_irq_save(void) { - return soft_enabled_set_return(IRQS_DISABLED); + return irq_soft_mask_set_return(IRQS_DISABLED); } static inline bool arch_irqs_disabled_flags(unsigned long flags) @@ -146,13 +152,13 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() do { \ - unsigned long flags; \ - __hard_irq_disable(); \ - flags = soft_enabled_set_return(IRQS_DISABLED);\ - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (!arch_irqs_disabled_flags(flags)) \ - trace_hardirqs_off(); \ +#define hard_irq_disable() do { \ + unsigned long flags; \ + __hard_irq_disable(); \ + flags = irq_soft_mask_set_return(IRQS_DISABLED); \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (!arch_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index f1ec012232d9..1a6c1ce17735 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -47,14 +47,14 @@ * be clobbered. */ #define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACASOFTIRQEN(r13); \ + lbz __rA,PACAIRQSOFTMASK(r13); \ lbz __rB,PACAIRQHAPPENED(r13); \ - andi. __rA,__rA,IRQS_DISABLED;\ - li __rA,IRQS_DISABLED; \ + andi. __rA,__rA,IRQS_DISABLED; \ + li __rA,IRQS_DISABLED; \ ori __rB,__rB,PACA_IRQ_HARD_DIS; \ stb __rB,PACAIRQHAPPENED(r13); \ bne 44f; \ - stb __rA,PACASOFTIRQEN(r13); \ + stb __rA,PACAIRQSOFTMASK(r13); \ TRACE_DISABLE_INTS; \ 44: @@ -64,9 +64,9 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,IRQS_DISABLED; \ + li __rB,IRQS_DISABLED; \ ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACASOFTIRQEN(r13); \ + stb __rB,PACAIRQSOFTMASK(r13); \ stb __rA,PACAIRQHAPPENED(r13) #endif #endif diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 369f0640826c..9db18287b5f4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void) /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; - soft_enabled_set(IRQS_ENABLED); + irq_soft_mask_set(IRQS_ENABLED); #endif } diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..e2ee193eb24d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -159,7 +159,7 @@ struct paca_struct { u64 saved_r1; /* r1 save for RTAS calls or PM */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ - u8 soft_enabled; /* irq soft-enable flag */ + u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 3f6316bcde4e..43a6967d48ad 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -178,7 +178,7 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); - OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled); + OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); #ifdef CONFIG_PPC_BOOK3S OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 51e4cc4dba4a..7c51f022d0de 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -129,7 +129,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) * is correct */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) - lbz r10,PACASOFTIRQEN(r13) + lbz r10,PACAIRQSOFTMASK(r13) 1: tdnei r10,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -781,7 +781,7 @@ restore: * are about to re-enable interrupts */ ld r5,SOFTE(r1) - lbz r6,PACASOFTIRQEN(r13) + lbz r6,PACAIRQSOFTMASK(r13) andi. r5,r5,IRQS_DISABLED bne .Lrestore_irq_off @@ -806,7 +806,7 @@ restore: .Lrestore_no_replay: TRACE_ENABLE_INTS li r0,IRQS_ENABLED - stb r0,PACASOFTIRQEN(r13); + stb r0,PACAIRQSOFTMASK(r13); /* * Final return path. BookE is handled in a different file @@ -913,8 +913,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1: #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) /* The interrupt should not have soft enabled. */ - lbz r7,PACASOFTIRQEN(r13) -1: tdnei r7,IRQS_DISABLED + lbz r7,PACAIRQSOFTMASK(r13) +1: tdeqi r7,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif b .Ldo_restore @@ -1034,7 +1034,7 @@ _GLOBAL(enter_rtas) /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ - lbz r0,PACASOFTIRQEN(r13) + lbz r0,PACAIRQSOFTMASK(r13) 1: tdeqi r0,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 9216ece7672c..ee832d344a5a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACASOFTIRQEN(r13) + lbz r10,PACAIRQSOFTMASK(r13) SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) @@ -206,7 +206,7 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACASOFTIRQEN(r13) + lbz r6,PACAIRQSOFTMASK(r13) ld r5,SOFTE(r1) /* Interrupts had better not already be enabled... */ @@ -216,7 +216,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) bne 1f TRACE_ENABLE_INTS - stb r5,PACASOFTIRQEN(r13) + stb r5,PACAIRQSOFTMASK(r13) 1: /* * Restore PACAIRQHAPPENED rather than setting it based on @@ -351,7 +351,7 @@ ret_from_mc_except: #define PROLOG_ADDITION_NONE_MC(n) #define PROLOG_ADDITION_MASKABLE_GEN(n) \ - lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ + lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ bne masked_interrupt_book3e_##n @@ -397,7 +397,7 @@ exc_##n##_common: \ mfspr r8,SPRN_XER; /* save XER in stackframe */ \ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ - lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \ + lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \ ld r12,exception_marker@toc(r2); \ li r0,0; \ std r3,GPR10(r1); /* save r10 to stackframe */ \ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0deef350004f..a61151a6ea5e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -766,7 +766,7 @@ _GLOBAL(pmac_secondary_start) * in the PACA when doing hotplug) */ li r0,IRQS_DISABLED - stb r0,PACASOFTIRQEN(r13) + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -823,7 +823,7 @@ __secondary_start: * in the PACA when doing hotplug) */ li r7,IRQS_DISABLED - stb r7,PACASOFTIRQEN(r13) + stb r7,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -990,7 +990,7 @@ start_here_common: * in the PACA when doing hotplug) */ li r0,IRQS_DISABLED - stb r0,PACASOFTIRQEN(r13) + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 1bb1d152aa46..2b269315d377 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -48,7 +48,7 @@ _GLOBAL(\name) addi r1,r1,128 #endif li r0,IRQS_ENABLED - stb r0,PACASOFTIRQEN(r13) + stb r0,PACAIRQSOFTMASK(r13) /* Interrupts will make use return to LR, so get something we want * in there diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 8b4702d93701..08faa93755f9 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -55,7 +55,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) #endif /* CONFIG_TRACE_IRQFLAGS */ li r0,IRQS_ENABLED - stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */ BEGIN_FTR_SECTION DSSALL sync diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 406c4329b535..e137b98dc5f8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -225,22 +225,9 @@ notrace void arch_local_irq_restore(unsigned long mask) unsigned int replay; /* Write the new soft-enabled value */ - soft_enabled_set(mask); - if (mask) { -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * mask must always include LINUX bit if any - * are set, and interrupts don't get replayed until - * the Linux interrupt is unmasked. This could be - * changed to replay partial unmasks in future, - * which would allow Linux masks to nest inside - * other masks, among other things. For now, be very - * dumb and simple. - */ - WARN_ON(!(mask & IRQS_DISABLED)); -#endif + irq_soft_mask_set(mask); + if (mask) return; - } /* * From this point onward, we can take interrupts, preempt, @@ -285,7 +272,7 @@ notrace void arch_local_irq_restore(unsigned long mask) } #endif /* CONFIG_TRACE_IRQFLAGS */ - soft_enabled_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_DISABLED); trace_hardirqs_off(); /* @@ -297,7 +284,7 @@ notrace void arch_local_irq_restore(unsigned long mask) /* We can soft-enable now */ trace_hardirqs_on(); - soft_enabled_set(IRQS_ENABLED); + irq_soft_mask_set(IRQS_ENABLED); /* * And replay if we have to. This will return with interrupts @@ -372,7 +359,7 @@ bool prep_irq_for_idle(void) * of entering the low power state. */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - soft_enabled_set(IRQS_ENABLED); + irq_soft_mask_set(IRQS_ENABLED); /* Tell the caller to enter the low power state */ return true; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 52fc864cdec4..98a3aeeb3c8c 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -58,7 +58,7 @@ optprobe_template_entry: std r5,_XER(r1) mfcr r5 std r5,_CCR(r1) - lbz r5,PACASOFTIRQEN(r13) + lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) /* diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index bd2c49475473..aef08e579946 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -285,7 +285,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) #ifdef CONFIG_PPC64 /* - * softe copies paca->soft_enabled variable state. Since soft_enabled is + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is * no more used as a flag, lets force usr to alway see the softe value as 1 * which means interrupts are not soft disabled. */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f6bedebda90a..896dacef2f2d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -189,7 +189,7 @@ static void __init fixup_boot_paca(void) /* Allow percpu accesses to work until we setup percpu data */ get_paca()->data_offset = 0; /* Mark interrupts disabled in PACA */ - soft_enabled_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_DISABLED); } static void __init configure_exceptions(void) @@ -352,7 +352,7 @@ void __init early_setup(unsigned long dt_ptr) void early_setup_secondary(void) { /* Mark interrupts disabled in PACA */ - soft_enabled_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_DISABLED); /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index daa6e9e35ab9..a32823dcd9a4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - unsigned long save_soft_enabled = soft_enabled_return(); + unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before @@ -253,7 +253,7 @@ void accumulate_stolen_time(void) * needs to reflect that so various debug stuff doesn't * complain */ - soft_enabled_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_DISABLED); sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); @@ -261,7 +261,7 @@ void accumulate_stolen_time(void) acct->utime -= ust; acct->steal_time += ust + sst; - soft_enabled_set(save_soft_enabled); + irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..a92ad8500917 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3249,7 +3249,7 @@ kvmppc_bad_host_intr: mfctr r4 #endif mfxer r5 - lbz r6, PACASOFTIRQEN(r13) + lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) std r4, _CTR(r1) std r5, _XER(r1) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 760bb5a2b8bd..876da2bc1796 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page) * So long as we atomically load page table pointers we are safe against teardown, * we can follow the address down to the the page and take a ref on it. * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->soft_enabled = IRQS_ENABLED + * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED */ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 1b2d8cb49abb..2695f8dd2359 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", - local_paca, local_paca->soft_enabled, local_paca->irq_happened); + local_paca, local_paca->irq_soft_mask, local_paca->irq_happened); #endif if (current) { printf(" pid = %ld, comm = %s\n", @@ -2391,7 +2391,7 @@ static void dump_one_paca(int cpu) DUMP(p, stab_rr, "lx"); DUMP(p, saved_r1, "lx"); DUMP(p, trap_save, "x"); - DUMP(p, soft_enabled, "x"); + DUMP(p, irq_soft_mask, "x"); DUMP(p, irq_happened, "x"); DUMP(p, io_sync, "x"); DUMP(p, irq_work_pending, "x"); -- cgit v1.2.3 From d32eb1b550d4ecb791c6311fcafa08ce91b6c06b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:51 +0530 Subject: powerpc/64s: Avoid using EXCEPTION_PROLOG_1 macro in MASKABLE_* Currently we use both EXCEPTION_PROLOG_1 and __EXCEPTION_PROLOG_1 in the MASKABLE_* macros. As a cleanup, this patch makes MASKABLE_* to use only __EXCEPTION_PROLOG_1. There is not logic change. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b090db4ca37e..593d7f9da5da 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -536,7 +536,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_STD, SOFTEN_TEST_PR) #define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) #define MASKABLE_EXCEPTION_HV(loc, vec, label) \ @@ -544,7 +544,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_HV, SOFTEN_TEST_HV) #define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ @@ -565,7 +565,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXC_HV, SOFTEN_TEST_HV) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* -- cgit v1.2.3 From f14e953b191fcb0da82ef066597df731b683a957 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:52 +0530 Subject: powerpc/64s: Add support to take additional parameter in MASKABLE_* macro To support addition of "bitmask" to MASKABLE_* macros, factor out the EXCPETION_PROLOG_1 macro. Make it explicit the interrupt masking supported by a gievn interrupt handler. Patch correspondingly extends the MASKABLE_* macros with an addition's parameter. "bitmask" parameter is passed to SOFTEN_TEST macro to decide on masking the interrupt. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 92 ++++++++++++++++++++------------ arch/powerpc/include/asm/head-64.h | 40 +++++++------- arch/powerpc/kernel/exceptions-64s.S | 32 ++++++----- 3 files changed, 96 insertions(+), 68 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 593d7f9da5da..d005a1c19b68 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -198,18 +198,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) -#define __EXCEPTION_PROLOG_1(area, extra, vec) \ +#define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ SAVE_CTR(r10, area); \ - mfcr r9; \ - extra(vec); \ + mfcr r9; + +#define __EXCEPTION_PROLOG_1_POST(area) \ std r11,area+EX_R11(r13); \ std r12,area+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,area+EX_R13(r13) + +/* + * This version of the EXCEPTION_PROLOG_1 will carry + * addition parameter called "bitmask" to support + * checking of the interrupt maskable level in the SOFTEN_TEST. + * Intended to be used in MASKABLE_EXCPETION_* macros. + */ +#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec, bitmask); \ + __EXCEPTION_PROLOG_1_POST(area); + +/* + * This version of the EXCEPTION_PROLOG_1 is intended + * to be used in STD_EXCEPTION* macros + */ +#define _EXCEPTION_PROLOG_1(area, extra, vec) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec); \ + __EXCEPTION_PROLOG_1_POST(area); + #define EXCEPTION_PROLOG_1(area, extra, vec) \ - __EXCEPTION_PROLOG_1(area, extra, vec) + _EXCEPTION_PROLOG_1(area, extra, vec) #define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ @@ -497,21 +519,21 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE -#define __SOFTEN_TEST(h, vec) \ +#define __SOFTEN_TEST(h, vec, bitmask) \ lbz r10,PACAIRQSOFTMASK(r13); \ - andi. r10,r10,IRQS_DISABLED; \ + andi. r10,r10,bitmask; \ li r10,SOFTEN_VALUE_##vec; \ bne masked_##h##interrupt -#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) +#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask) -#define SOFTEN_TEST_PR(vec) \ +#define SOFTEN_TEST_PR(vec, bitmask) \ KVMTEST(EXC_STD, vec); \ - _SOFTEN_TEST(EXC_STD, vec) + _SOFTEN_TEST(EXC_STD, vec, bitmask) -#define SOFTEN_TEST_HV(vec) \ +#define SOFTEN_TEST_HV(vec, bitmask) \ KVMTEST(EXC_HV, vec); \ - _SOFTEN_TEST(EXC_HV, vec) + _SOFTEN_TEST(EXC_HV, vec, bitmask) #define KVMTEST_PR(vec) \ KVMTEST(EXC_STD, vec) @@ -519,53 +541,53 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define KVMTEST_HV(vec) \ KVMTEST(EXC_HV, vec) -#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) -#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) +#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask) +#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask) -#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_PSERIES_1(label, h); -#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, bitmask) -#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ +#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) -#define MASKABLE_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) -#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) -#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\ + __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_NOTEST_PR) + EXC_STD, SOFTEN_NOTEST_PR, bitmask) -#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 0a663dfc28b5..7e0e93f24cb7 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -268,14 +268,14 @@ name: STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ EXC_VIRT_END(name, start, size); -#define EXC_REAL_MASKABLE(name, start, size) \ +#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ + MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\ EXC_REAL_END(name, start, size); -#define EXC_VIRT_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\ EXC_VIRT_END(name, start, size); #define EXC_REAL_HV(name, start, size) \ @@ -304,13 +304,13 @@ name: #define __EXC_REAL_OOL_MASKABLE(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE(name, start); + __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask); #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ EXC_REAL_BEGIN(name, start, size); \ @@ -331,13 +331,13 @@ name: #define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE_HV(name, start); + __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask); #define __EXC_VIRT_OOL(name, start, size) \ EXC_VIRT_BEGIN(name, start, size); \ @@ -355,13 +355,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask); #define __EXC_VIRT_OOL_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); @@ -377,13 +377,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask); #define TRAMP_KVM(area, n) \ TRAMP_KVM_BEGIN(do_kvm_##n); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 175891c6909c..f99021f3b47b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -691,10 +691,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) @@ -702,9 +704,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) @@ -800,8 +806,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) +EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -812,8 +818,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) -EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) -EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xa00) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) @@ -1025,7 +1031,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * mode. */ __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) -__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED) EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) @@ -1083,8 +1089,8 @@ EXC_COMMON_BEGIN(hmi_exception_common) EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) -EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xe80) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) @@ -1093,8 +1099,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) #endif -EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xea0) EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) -- cgit v1.2.3 From f442d004806e31fe5aab614ec48e53f7b38f7c2d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:53 +0530 Subject: powerpc/64s: Add support to mask perf interrupts and replay them Two new bit mask field "IRQ_DISABLE_MASK_PMU" is introduced to support the masking of PMI and "IRQ_DISABLE_MASK_ALL" to aid interrupt masking checking. Couple of new irq #defs "PACA_IRQ_PMI" and "SOFTEN_VALUE_0xf0*" added to use in the exception code to check for PMI interrupts. In the masked_interrupt handler, for PMIs we reset the MSR[EE] and return. In the __check_irq_replay(), replay the PMI interrupt by calling performance_monitor_common handler. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 5 +++++ arch/powerpc/include/asm/hw_irq.h | 19 +++++++++++-------- arch/powerpc/kernel/entry_64.S | 5 +++++ arch/powerpc/kernel/exceptions-64s.S | 6 ++++-- arch/powerpc/kernel/irq.c | 7 ++++++- 5 files changed, 31 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index d005a1c19b68..54afd1f140a4 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -518,6 +518,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE +#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI #define __SOFTEN_TEST(h, vec, bitmask) \ lbz r10,PACAIRQSOFTMASK(r13); \ @@ -582,6 +583,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ EXC_STD, SOFTEN_NOTEST_PR, bitmask) +#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD); + #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV, bitmask) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 5c82bb116832..552a2c6e428f 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -27,12 +27,15 @@ #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ #define PACA_IRQ_HMI 0x20 +#define PACA_IRQ_PMI 0x40 /* * flags for paca->irq_soft_mask */ #define IRQS_ENABLED 0 -#define IRQS_DISABLED 1 +#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */ +#define IRQS_PMI_DISABLED 2 +#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) #endif /* CONFIG_PPC64 */ @@ -152,13 +155,13 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() do { \ - unsigned long flags; \ - __hard_irq_disable(); \ - flags = irq_soft_mask_set_return(IRQS_DISABLED); \ - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (!arch_irqs_disabled_flags(flags)) \ - trace_hardirqs_off(); \ +#define hard_irq_disable() do { \ + unsigned long flags; \ + __hard_irq_disable(); \ + flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (!arch_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 7c51f022d0de..3b90a2f1018e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -954,6 +954,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except +1: cmpwi cr0,r3,0xf00 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl performance_monitor_exception + b ret_from_except 1: cmpwi cr0,r3,0xe60 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f99021f3b47b..1a4f51f35870 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1111,8 +1111,8 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) -EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) -EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) +EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED) +EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xf00) EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) @@ -1723,6 +1723,8 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE beq hardware_interrupt_common ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) + cmpwi r3,0xf00 + beq performance_monitor_common BEGIN_FTR_SECTION cmpwi r3,0xa00 beq h_doorbell_common_msgclr diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e137b98dc5f8..1a41cceddfe2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -186,6 +186,11 @@ notrace unsigned int __check_irq_replay(void) return 0x900; } + if (happened & PACA_IRQ_PMI) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + return 0xf00; + } + if (happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; return 0x500; @@ -272,7 +277,7 @@ notrace void arch_local_irq_restore(unsigned long mask) } #endif /* CONFIG_TRACE_IRQFLAGS */ - irq_soft_mask_set(IRQS_DISABLED); + irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); /* -- cgit v1.2.3 From 9aa88188ee87f4cc5c4e1bce15754bfcde8e900f Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:54 +0530 Subject: powerpc: Add new kconfig CONFIG_PPC_IRQ_SOFT_MASK_DEBUG New Kconfig is added "CONFIG_PPC_IRQ_SOFT_MASK_DEBUG" to add WARN_ON to alert the invalid transitions. Also moved the code under the CONFIG_TRACE_IRQFLAGS in arch_local_irq_restore() to new Kconfig. Reviewed-by: Nicholas Piggin Signed-off-by: Madhavan Srinivasan [mpe: Fix name of CONFIG option in change log] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig.debug | 4 ++++ arch/powerpc/include/asm/hw_irq.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/irq.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 5d18169fff58..c45424c64e19 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST depends on DEBUG_KERNEL default n +config PPC_IRQ_SOFT_MASK_DEBUG + bool "Include extra checks for powerpc irq soft masking" + default n + config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 552a2c6e428f..f40da3bd079b 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -71,7 +71,7 @@ static inline notrace unsigned long irq_soft_mask_return(void) */ static inline notrace void irq_soft_mask_set(unsigned long mask) { -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG /* * The irq mask must always include the STD bit if any are set. * @@ -101,7 +101,7 @@ static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) { unsigned long flags; -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG WARN_ON(mask && !(mask & IRQS_DISABLED)); #endif diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3b90a2f1018e..b2995509b01d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -128,7 +128,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) * of irq tracing is used, we additionally check that condition * is correct */ -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) lbz r10,PACAIRQSOFTMASK(r13) 1: tdnei r10,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING @@ -911,7 +911,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS stb r7,PACAIRQHAPPENED(r13) 1: -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) /* The interrupt should not have soft enabled. */ lbz r7,PACAIRQSOFTMASK(r13) 1: tdeqi r7,IRQS_ENABLED diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1a41cceddfe2..f88038847790 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -264,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long mask) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG else { /* * We should already be hard disabled here. We had bugs @@ -275,7 +275,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAGS */ +#endif irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); -- cgit v1.2.3 From c6424382de996c216e81cfccf5cf2371157df651 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:55 +0530 Subject: powerpc/64s: Add new set of irq_soft_mask_ functions for PMI masking To support soft-masking of the performance monitor interrupt, a set of new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore() functions are added. And powerpc_local_irq_save() implemented, by adding a new irq_soft_mask manipulation function irq_soft_mask_or_return(). Local_irq_pmu_* macros are provided to access these powerpc_local_irq_pmu* functions which includes trace_hardirqs_on|off() to match what we have in include/linux/irqflags.h. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f40da3bd079b..88e5e8f17e98 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -115,6 +115,24 @@ static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) return flags; } +static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) +{ + unsigned long flags, tmp; + + asm volatile( + "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" + : "=&r" (flags), "=r" (tmp) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); +#endif + + return flags; +} + static inline unsigned long arch_local_save_flags(void) { return irq_soft_mask_return(); @@ -147,6 +165,55 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +#ifdef CONFIG_PPC_BOOK3S +/* + * To support disabling and enabling of irq with PMI, set of + * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore() + * functions are added. These macros are implemented using generic + * linux local_irq_* code from include/linux/irqflags.h. + */ +#define raw_local_irq_pmu_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = irq_soft_mask_or_return(IRQS_DISABLED | \ + IRQS_PMI_DISABLED); \ + } while(0) + +#define raw_local_irq_pmu_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + arch_local_irq_restore(flags); \ + } while(0) + +#ifdef CONFIG_TRACE_IRQFLAGS +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + trace_hardirqs_off(); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_pmu_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_pmu_restore(flags); \ + } \ + } while(0) +#else +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + raw_local_irq_pmu_restore(flags); \ + } while (0) +#endif /* CONFIG_TRACE_IRQFLAGS */ + +#endif /* CONFIG_PPC_BOOK3S */ + #ifdef CONFIG_PPC_BOOK3E #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory") #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory") -- cgit v1.2.3 From 3b7e30208699e23a70565601cac54b1f0741b6d7 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:56 +0530 Subject: powerpc: use generic atomic implementation for local_t powerpc implements local_t with atomic operations. There is already an asm-generic implementation which does this using atomic_t. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/local.h | 171 +-------------------------------------- 1 file changed, 1 insertion(+), 170 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 600a68bd77f5..cfbcc31d43ad 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -2,175 +2,6 @@ #ifndef _ARCH_POWERPC_LOCAL_H #define _ARCH_POWERPC_LOCAL_H -#include -#include - -typedef struct -{ - atomic_long_t a; -} local_t; - -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } - -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l,i) atomic_long_set(&(l)->a, (i)) - -#define local_add(i,l) atomic_long_add((i),(&(l)->a)) -#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) -#define local_inc(l) atomic_long_inc(&(l)->a) -#define local_dec(l) atomic_long_dec(&(l)->a) - -static __inline__ long local_add_return(long a, local_t *l) -{ - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\ - add %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); - - return t; -} - -#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) - -static __inline__ long local_sub_return(long a, local_t *l) -{ - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\ - subf %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); - - return t; -} - -static __inline__ long local_inc_return(local_t *l) -{ - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); - - return t; -} - -/* - * local_inc_and_test - increment and test - * @l: pointer of type local_t - * - * Atomically increments @l by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#define local_inc_and_test(l) (local_inc_return(l) == 0) - -static __inline__ long local_dec_return(local_t *l) -{ - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); - - return t; -} - -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) - -/** - * local_add_unless - add unless the number is a given value - * @l: pointer of type local_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @l, so long as it was not @u. - * Returns non-zero if @l was not @u, and zero otherwise. - */ -static __inline__ int local_add_unless(local_t *l, long a, long u) -{ - long t; - - __asm__ __volatile__ ( -"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\ - cmpw 0,%0,%3 \n\ - beq- 2f \n\ - add %0,%2,%0 \n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b \n" -" subf %0,%2,%0 \n\ -2:" - : "=&r" (t) - : "r" (&(l->a.counter)), "r" (a), "r" (u) - : "cc", "memory"); - - return t != u; -} - -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) -#define local_dec_and_test(l) (local_dec_return((l)) == 0) - -/* - * Atomically test *l and decrement if it is greater than 0. - * The function returns the old value of *l minus 1. - */ -static __inline__ long local_dec_if_positive(local_t *l) -{ - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\ - cmpwi %0,1\n\ - addi %0,%0,-1\n\ - blt- 2f\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - "\n\ -2:" : "=&b" (t) - : "r" (&(l->a.counter)) - : "cc", "memory"); - - return t; -} - -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ - -#define __local_inc(l) ((l)->a.counter++) -#define __local_dec(l) ((l)->a.counter++) -#define __local_add(i,l) ((l)->a.counter+=(i)) -#define __local_sub(i,l) ((l)->a.counter-=(i)) +#include #endif /* _ARCH_POWERPC_LOCAL_H */ -- cgit v1.2.3 From 6cd74d2b6174957103c8f6f2cfcefe086a51e6eb Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 20 Dec 2017 09:25:57 +0530 Subject: powerpc/64s: Implement local_t using irq soft masking local_t is used for atomic modifications for per-CPU data, versus re-entrant modifications via interrupts. local_t read-modify-write atomic operations are currently implemented with hardware atomics (larx/stcx), which are quite slow. This patch implements them by masking all types of interrupts that may do local_t operations ("standard" and perf interrupts). Rusty's benchmark (https://lkml.org/lkml/2008/12/16/450) gives the following timings for the local_t test, in nanoseconds per iteration: larx/stcx irq+pmu disable _inc 38 10 _add 38 10 _read 4 4 _add_return 38 10 There are still some interrupt types (system reset, machine check, and watchdog), which can not safely use local_t operations, because they are not masked. An alternative approach was proposed, using a CR bit to mark a critical section, which is tested in the interrupt return path, and would then branch to a fixup handler (similar to exception fixups), which re-starts the operation. The problem with this was the complexity of the fixup handler and the latency of the slow path. https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-November/123024.html Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/local.h | 141 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index cfbcc31d43ad..fdd00939270b 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -2,6 +2,147 @@ #ifndef _ARCH_POWERPC_LOCAL_H #define _ARCH_POWERPC_LOCAL_H +#ifdef CONFIG_PPC_BOOK3S_64 + +#include +#include +#include + +#include + +typedef struct +{ + long v; +} local_t; + +#define LOCAL_INIT(i) { (i) } + +static __inline__ long local_read(local_t *l) +{ + return READ_ONCE(l->v); +} + +static __inline__ void local_set(local_t *l, long i) +{ + WRITE_ONCE(l->v, i); +} + +#define LOCAL_OP(op, c_op) \ +static __inline__ void local_##op(long i, local_t *l) \ +{ \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + l->v c_op i; \ + powerpc_local_irq_pmu_restore(flags); \ +} + +#define LOCAL_OP_RETURN(op, c_op) \ +static __inline__ long local_##op##_return(long a, local_t *l) \ +{ \ + long t; \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + t = (l->v c_op a); \ + powerpc_local_irq_pmu_restore(flags); \ + \ + return t; \ +} + +#define LOCAL_OPS(op, c_op) \ + LOCAL_OP(op, c_op) \ + LOCAL_OP_RETURN(op, c_op) + +LOCAL_OPS(add, +=) +LOCAL_OPS(sub, -=) + +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) +#define local_inc_return(l) local_add_return(1LL, l) +#define local_inc(l) local_inc_return(l) + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +#define local_dec_return(l) local_sub_return(1LL, l) +#define local_dec(l) local_dec_return(l) +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +static __inline__ long local_cmpxchg(local_t *l, long o, long n) +{ + long t; + unsigned long flags; + + powerpc_local_irq_pmu_save(flags); + t = l->v; + if (t == o) + l->v = n; + powerpc_local_irq_pmu_restore(flags); + + return t; +} + +static __inline__ long local_xchg(local_t *l, long n) +{ + long t; + unsigned long flags; + + powerpc_local_irq_pmu_save(flags); + t = l->v; + l->v = n; + powerpc_local_irq_pmu_restore(flags); + + return t; +} + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +static __inline__ int local_add_unless(local_t *l, long a, long u) +{ + unsigned long flags; + int ret = 0; + + powerpc_local_irq_pmu_save(flags); + if (l->v != u) { + l->v += a; + ret = 1; + } + powerpc_local_irq_pmu_restore(flags); + + return ret; +} + +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + */ + +#define __local_inc(l) ((l)->v++) +#define __local_dec(l) ((l)->v++) +#define __local_add(i,l) ((l)->v+=(i)) +#define __local_sub(i,l) ((l)->v-=(i)) + +#else /* CONFIG_PPC64 */ + #include +#endif /* CONFIG_PPC64 */ + #endif /* _ARCH_POWERPC_LOCAL_H */ -- cgit v1.2.3 From e7673818d9510043f11502e55afee5a1ad4f8ff7 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Mon, 11 Dec 2017 11:28:35 +0530 Subject: powerpc/perf: Remove thread_imc_pmu global variable from Remove the global variable 'thread_imc_pmu', since it is not used in the code. Signed-off-by: Anju T Sudhakar Reviewed-by: madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..2a7b9323183a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu; /* Thread IMC data structures and variables */ static DEFINE_PER_CPU(u64 *, thread_imc_mem); -static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; struct imc_pmu *imc_event_to_pmu(struct perf_event *event) @@ -1263,7 +1262,6 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, return res; } - thread_imc_pmu = pmu_ptr; break; default: return -EINVAL; -- cgit v1.2.3 From ed8e443feee2bde8539f865092289eeaf0681c3f Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Mon, 11 Dec 2017 11:28:36 +0530 Subject: powerpc/perf: IMC code cleanup with some code refactoring Factor out memory freeing part for attribute elements from imc_common_cpuhp_mem_free(). Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 2a7b9323183a..9db2529df3ed 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1157,6 +1157,16 @@ static void cleanup_all_thread_imc_memory(void) } } +/* Function to free the attr_groups which are dynamically allocated */ +static void imc_common_mem_free(struct imc_pmu *pmu_ptr) +{ + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); + kfree(pmu_ptr); + kfree(per_nest_pmu_arr); +} + /* * Common function to unregister cpu hotplug callback and * free the memory. @@ -1188,14 +1198,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); cleanup_all_thread_imc_memory(); } - - /* Only free the attr_groups which are dynamically allocated */ - if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); - kfree(pmu_ptr); - kfree(per_nest_pmu_arr); - return; } @@ -1244,8 +1246,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), GFP_KERNEL); - if (!core_imc_refc) + if (!core_imc_refc) { + kfree(pmu_ptr->mem_info); return -ENOMEM; + } core_imc_pmu = pmu_ptr; break; @@ -1258,8 +1262,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, thread_imc_mem_size = pmu_ptr->counter_mem_size; for_each_online_cpu(cpu) { res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size); - if (res) + if (res) { + cleanup_all_thread_imc_memory(); return res; + } } break; @@ -1285,8 +1291,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id int ret; ret = imc_mem_init(pmu_ptr, parent, pmu_idx); - if (ret) - goto err_free; + if (ret) { + imc_common_mem_free(pmu_ptr); + return ret; + } switch (pmu_ptr->domain) { case IMC_DOMAIN_NEST: @@ -1351,6 +1359,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id return 0; err_free: + imc_common_mem_free(pmu_ptr); imc_common_cpuhp_mem_free(pmu_ptr); return ret; } -- cgit v1.2.3 From 8b4e6deaff7822c76c94336c40f8d1f244c6f6ed Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Mon, 11 Dec 2017 11:28:37 +0530 Subject: powerpc/perf: Pass struct imc_events as a parameter to imc_parse_event() Remove the allocation of struct imc_events from imc_parse_event(). Instead pass imc_events as a parameter to imc_parse_event(), which is a pointer to a slot in the array allocated in update_events_in_group(). Reported-by: Dan Carpenter ("powerpc/perf: Fix a sizeof() typo so we allocate less memory") Suggested-by: Michael Ellerman Signed-off-by: Anju T Sudhakar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/imc-pmu.h | 2 +- arch/powerpc/perf/imc-pmu.c | 66 +++++++++++++++++++++++--------------- 2 files changed, 41 insertions(+), 27 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index fad0e6ff460f..080731dc883a 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -71,7 +71,7 @@ struct imc_events { struct imc_pmu { struct pmu pmu; struct imc_mem_info *mem_info; - struct imc_events **events; + struct imc_events *events; /* * Attribute groups for the PMU. Slot 0 used for * format attribute, slot 1 used for cpusmask attribute, diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9db2529df3ed..9ec7e87056e5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -116,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st return &attr->attr.attr; } -struct imc_events *imc_parse_event(struct device_node *np, const char *scale, - const char *unit, const char *prefix, u32 base) +static int imc_parse_event(struct device_node *np, const char *scale, + const char *unit, const char *prefix, + u32 base, struct imc_events *event) { - struct imc_events *event; const char *s; u32 reg; - event = kzalloc(sizeof(struct imc_events), GFP_KERNEL); - if (!event) - return NULL; - if (of_property_read_u32(np, "reg", ®)) goto error; /* Add the base_reg value to the "reg" */ @@ -157,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale, goto error; } - return event; + return 0; error: kfree(event->unit); kfree(event->scale); kfree(event->name); - kfree(event); + return -EINVAL; +} + +/* + * imc_free_events: Function to cleanup the events list, having + * "nr_entries". + */ +static void imc_free_events(struct imc_events *events, int nr_entries) +{ + int i; + + /* Nothing to clean, return */ + if (!events) + return; + for (i = 0; i < nr_entries; i++) { + kfree(events[i].unit); + kfree(events[i].scale); + kfree(events[i].name); + } - return NULL; + kfree(events); } /* @@ -176,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) struct attribute_group *attr_group; struct attribute **attrs, *dev_str; struct device_node *np, *pmu_events; - struct imc_events *ev; u32 handle, base_reg; - int i=0, j=0, ct; + int i = 0, j = 0, ct, ret; const char *prefix, *g_scale, *g_unit; const char *ev_val_str, *ev_scale_str, *ev_unit_str; @@ -216,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) ct = 0; /* Parse the events and update the struct */ for_each_child_of_node(pmu_events, np) { - ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg); - if (ev) - pmu->events[ct++] = ev; + ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]); + if (!ret) + ct++; } /* Allocate memory for attribute group */ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); - if (!attr_group) + if (!attr_group) { + imc_free_events(pmu->events, ct); return -ENOMEM; + } /* * Allocate memory for attributes. @@ -237,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL); if (!attrs) { kfree(attr_group); - kfree(pmu->events); + imc_free_events(pmu->events, ct); return -ENOMEM; } attr_group->name = "events"; attr_group->attrs = attrs; do { - ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value); - dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str); + ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value); + dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str); if (!dev_str) continue; attrs[j++] = dev_str; - if (pmu->events[i]->scale) { - ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale); + if (pmu->events[i].scale) { + ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name); + dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale); if (!dev_str) continue; attrs[j++] = dev_str; } - if (pmu->events[i]->unit) { - ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit); + if (pmu->events[i].unit) { + ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name); + dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit); if (!dev_str) continue; @@ -272,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) /* Save the event attribute */ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; - kfree(pmu->events); return 0; } -- cgit v1.2.3 From 684d984038aa2b8ec3ab4b9d314257879a3fbfbd Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 13 Dec 2017 11:39:54 +0530 Subject: powerpc/powernv: Add debugfs interface for imc-mode and imc-command In memory Collection (IMC) counter pmu driver controls the ucode's execution state. At the system boot, IMC perf driver pause the ucode. Ucode state is changed to "running" only when any of the nest units are monitored or profiled using perf tool. Nest units support only limited set of hardware counters and ucode is always programmed in the "production mode" ("accumulation") mode. This mode is configured to provide key performance metric data for most of the nest units. But ucode also supports other modes which would be used for "debug" to drill down specific nest units. That is, ucode when switched to "powerbus" debug mode (for example), will dynamically reconfigure the nest counters to target only "powerbus" related events in the hardware counters. This allows the IMC nest unit to focus on powerbus related transactions in the system in more detail. At this point, production mode events may or may not be counted. IMC nest counters has both in-band (ucode access) and out of band access to it. Since not all nest counter configurations are supported by ucode, out of band tools are used to characterize other nest counter configurations. Patch provides an interface via "debugfs" to enable the switching of ucode modes in the system. To switch ucode mode, one has to first pause the microcode (imc_cmd), and then write the target mode value to the "imc_mode" file. Proposed Approach: In the proposed approach, the function (export_imc_mode_and_cmd) which creates the debugfs interface for imc mode and command is implemented in opal-imc.c. Thus we can use imc_get_mem_addr() to get the homer base address for each chip. The interface to expose imc mode and command is required only if we have nest pmu units registered. Employing the existing data structures to track whether we have any nest units registered will require to extend data from perf side to opal-imc.c. Instead an integer is introduced to hold that information by counting successful nest unit registration. Debugfs interface is removed based on the integer count. Example for the interface: $ ls /sys/kernel/debug/imc imc_cmd_0 imc_cmd_8 imc_mode_0 imc_mode_8 Signed-off-by: Anju T Sudhakar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/imc-pmu.h | 7 +++ arch/powerpc/platforms/powernv/opal-imc.c | 77 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 080731dc883a..d76cb11be3e3 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -34,6 +34,13 @@ #define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL #define THREAD_IMC_ENABLE 0x8000000000000000ULL +/* + * For debugfs interface for imc-mode and imc-command + */ +#define IMC_CNTL_BLK_OFFSET 0x3FC00 +#define IMC_CNTL_BLK_CMD_OFFSET 8 +#define IMC_CNTL_BLK_MODE_OFFSET 32 + /* * Structure to hold memory address information for imc units. */ diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 465ea105b771..dd4c9b8b8a81 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -21,6 +21,78 @@ #include #include #include +#include + +static struct dentry *imc_debugfs_parent; + +/* Helpers to export imc command and mode via debugfs */ +static int imc_mem_get(void *data, u64 *val) +{ + *val = cpu_to_be64(*(u64 *)data); + return 0; +} + +static int imc_mem_set(void *data, u64 val) +{ + *(u64 *)data = cpu_to_be64(val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); + +static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, + value, &fops_imc_x64); +} + +/* + * export_imc_mode_and_cmd: Create a debugfs interface + * for imc_cmd and imc_mode + * for each node in the system. + * imc_mode and imc_cmd can be changed by echo into + * this interface. + */ +static void export_imc_mode_and_cmd(struct device_node *node, + struct imc_pmu *pmu_ptr) +{ + static u64 loc, *imc_mode_addr, *imc_cmd_addr; + int chip = 0, nid; + char mode[16], cmd[16]; + u32 cb_offset; + + imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); + + /* + * Return here, either because 'imc' directory already exists, + * Or failed to create a new one. + */ + if (!imc_debugfs_parent) + return; + + if (of_property_read_u32(node, "cb_offset", &cb_offset)) + cb_offset = IMC_CNTL_BLK_OFFSET; + + for_each_node(nid) { + loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset; + imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); + sprintf(mode, "imc_mode_%d", nid); + if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr)) + goto err; + + imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); + sprintf(cmd, "imc_cmd_%d", nid); + if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr)) + goto err; + chip++; + } + return; + +err: + debugfs_remove_recursive(imc_debugfs_parent); +} /* * imc_get_mem_addr_nest: Function to get nest counter memory region @@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; + export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } + /* If none of the nest units are registered, remove debugfs interface */ + if (pmu_count == 0) + debugfs_remove_recursive(imc_debugfs_parent); + return 0; } -- cgit v1.2.3 From 074db39e00bb0d35cda7de9a4bae3a9b6da557ad Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Tue, 31 Oct 2017 15:22:00 +0530 Subject: powerpc/perf: Change the data type for the variable 'ncpu' in IMC code Change the data type for the variable 'ncpu' in ppc_core_imc_cpu_offline(), since cpumask_any_but() returns an 'int' value. Signed-off-by: Anju T Sudhakar Reported-by: David Binderman Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ec7e87056e5..671aa20d8dd8 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -611,7 +611,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu) static int ppc_core_imc_cpu_offline(unsigned int cpu) { - unsigned int ncpu, core_id; + unsigned int core_id; + int ncpu; struct imc_pmu_ref *ref; /* -- cgit v1.2.3 From b1db551324f72fa14ad82ca31237a7ed418104df Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Thu, 11 Jan 2018 09:55:25 +0100 Subject: cxl: Add support for ASB_Notify on POWER9 The POWER9 core supports a new feature: ASB_Notify which requires the support of the Special Purpose Register: TIDR. The ASB_Notify command, generated by the AFU, will attempt to wake-up the host thread identified by the particular LPID:PID:TID. This patch assign a unique TIDR (thread id) for the current thread which will be used in the process element entry. Signed-off-by: Christophe Lombard Reviewed-by: Philippe Bergheaud Acked-by: Frederic Barrat Reviewed-by: Vaibhav Jain Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + drivers/misc/cxl/context.c | 2 ++ drivers/misc/cxl/cxl.h | 3 +++ drivers/misc/cxl/cxllib.c | 3 ++- drivers/misc/cxl/file.c | 15 +++++++++++++-- drivers/misc/cxl/native.c | 13 ++++++++++++- include/uapi/misc/cxl.h | 10 ++++++---- 7 files changed, 39 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1563d1190da3..9eb78ec0ce5b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1590,6 +1590,7 @@ int set_thread_tidr(struct task_struct *t) return 0; } +EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 12a41b2753f0..7ff315ad3692 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pid = NULL; /* Set in start work ioctl */ mutex_init(&ctx->mapping_lock); ctx->mapping = NULL; + ctx->tidr = 0; + ctx->assign_tidr = false; if (cxl_is_power8()) { spin_lock_init(&ctx->sste_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index e46a4062904a..53149fbd780e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -630,6 +630,9 @@ struct cxl_context { struct list_head extra_irq_contexts; struct mm_struct *mm; + + u16 tidr; + bool assign_tidr; }; struct cxl_irq_info; diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index dc9bc1807fdf..30ccba436b3b 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task, */ attr->pid = mm->context.id; mmput(mm); + attr->tid = task->thread.tidr; } else { attr->pid = 0; + attr->tid = 0; } - attr->tid = 0; return 0; } EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 76c0b0ca9388..93fd381f6484 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, * flags are set it's invalid */ if (work.reserved1 || work.reserved2 || work.reserved3 || - work.reserved4 || work.reserved5 || work.reserved6 || + work.reserved4 || work.reserved5 || (work.flags & ~CXL_START_WORK_ALL)) { rc = -EINVAL; goto out; @@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, rc = -EINVAL; goto out; } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) goto out; if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + if (work.flags & CXL_START_WORK_TID) + ctx->assign_tidr = true; + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); /* @@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, goto out; } - ctx->status = STARTED; rc = 0; + if (work.flags & CXL_START_WORK_TID) { + work.tid = ctx->tidr; + if (copy_to_user(uwork, &work, sizeof(work))) + rc = -EFAULT; + } + + ctx->status = STARTED; + out: mutex_unlock(&ctx->status_mutex); return rc; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 02b6b45b4c20..1b3d7c65ea3f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "cxl.h" @@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx) static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; + int rc; cxl_assign_psn_space(ctx); @@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) pid = ctx->mm->context.id; } - ctx->elem->common.tid = 0; + /* Assign a unique TIDR (thread id) for the current thread */ + if (!(ctx->tidr) && (ctx->assign_tidr)) { + rc = set_thread_tidr(current); + if (rc) + return -ENODEV; + ctx->tidr = current->thread.tidr; + pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); + } + + ctx->elem->common.tid = cpu_to_be32(ctx->tidr); ctx->elem->common.pid = cpu_to_be32(pid); ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 49e8fd08855a..56376d3907d8 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -20,20 +20,22 @@ struct cxl_ioctl_start_work { __u64 work_element_descriptor; __u64 amr; __s16 num_interrupts; - __s16 reserved1; - __s32 reserved2; + __u16 tid; + __s32 reserved1; + __u64 reserved2; __u64 reserved3; __u64 reserved4; __u64 reserved5; - __u64 reserved6; }; #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL #define CXL_START_WORK_ERR_FF 0x0000000000000004ULL +#define CXL_START_WORK_TID 0x0000000000000008ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS |\ - CXL_START_WORK_ERR_FF) + CXL_START_WORK_ERR_FF |\ + CXL_START_WORK_TID) /* Possible modes that an afu can be in */ -- cgit v1.2.3 From 785a3fab4adbf91b2189c928a59ae219c54ba95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 23 Oct 2017 07:20:00 -0700 Subject: mm, dax: introduce pfn_t_special() In support of removing the VM_MIXEDMAP indication from DAX VMAs, introduce pfn_t_special() for drivers to indicate that _PAGE_SPECIAL should be used for DAX ptes. This also helps identify drivers like dccssblk that only want to use DAX in a read-only fashion without get_user_pages() support. Ideally we could delete axonram and dcssblk DAX support, but if we need to keep it better make it explicit that axonram and dcssblk only support a sub-set of DAX due to missing _PAGE_DEVMAP support. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/s390/block/dcssblk.c | 3 ++- include/linux/pfn_t.h | 13 +++++++++++++ mm/memory.c | 16 +++++++++++++++- 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1b307c80b401..cdbb0e59b3d3 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page resource_size_t offset = pgoff * PAGE_SIZE; *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); + *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL); return (bank->size - offset) / PAGE_SIZE; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6aaefb780436..9cae08b36b80 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, dev_sz = dev_info->end - dev_info->start + 1; *kaddr = (void *) dev_info->start + offset; - *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), + PFN_DEV|PFN_SPECIAL); return (dev_sz - offset) / PAGE_SIZE; } diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e82..a03c2642a87c 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,8 +15,10 @@ #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) #define PFN_FLAGS_TRACE \ + { PFN_SPECIAL, "SPECIAL" }, \ { PFN_SG_CHAIN, "SG_CHAIN" }, \ { PFN_SG_LAST, "SG_LAST" }, \ { PFN_DEV, "DEV" }, \ @@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud); #endif #endif /* __HAVE_ARCH_PTE_DEVMAP */ +#ifdef __HAVE_ARCH_PTE_SPECIAL +static inline bool pfn_t_special(pfn_t pfn) +{ + return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; +} +#else +static inline bool pfn_t_special(pfn_t pfn) +{ + return false; +} +#endif /* __HAVE_ARCH_PTE_SPECIAL */ #endif /* _LINUX_PFN_T_H_ */ diff --git a/mm/memory.c b/mm/memory.c index ca5674cbaff2..46b6c33b7f04 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1897,12 +1897,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_pfn_prot); +static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) +{ + /* these checks mirror the abort conditions in vm_normal_page */ + if (vma->vm_flags & VM_MIXEDMAP) + return true; + if (pfn_t_devmap(pfn)) + return true; + if (pfn_t_special(pfn)) + return true; + if (is_zero_pfn(pfn_t_to_pfn(pfn))) + return true; + return false; +} + static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; - BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); + BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; -- cgit v1.2.3 From 569d0365f571fa6421a5c80bc30d1b2cdab857fe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 14 Oct 2017 11:33:32 -0700 Subject: dax: require 'struct page' by default for filesystem dax If a dax buffer from a device that does not map pages is passed to read(2) or write(2) as a target for direct-I/O it triggers SIGBUS. If gdb attempts to examine the contents of a dax buffer from a device that does not map pages it triggers SIGBUS. If fork(2) is called on a process with a dax mapping from a device that does not map pages it triggers SIGBUS. 'struct page' is required otherwise several kernel code paths break in surprising ways. Disable filesystem-dax on devices that do not map pages. In addition to needing pfn_to_page() to be valid we also require devmap pages. We need this to detect dax pages in the get_user_pages_fast() path and so that we can stop managing the VM_MIXEDMAP flag. For DAX drivers that have not supported get_user_pages() to date we allow them to opt-in to supporting DAX with the CONFIG_FS_DAX_LIMITED configuration option which requires ->direct_access() to return pfn_t_special() pfns. This leaves DAX support in brd disabled and scheduled for removal. Note that when the initial dax support was being merged a few years back there was concern that struct page was unsuitable for use with next generation persistent memory devices. The theoretical concern was that struct page access, being such a hotly used data structure in the kernel, would lead to media wear out. While that was a reasonable conservative starting position it has not held true in practice. We have long since committed to using devm_memremap_pages() to support higher order kernel functionality that needs get_user_pages() and pfn_to_page(). Cc: Jeff Moyer Cc: Ross Zwisler Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Gerald Schaefer Signed-off-by: Dan Williams --- arch/powerpc/platforms/Kconfig | 1 + drivers/dax/super.c | 10 ++++++++++ drivers/s390/block/Kconfig | 1 + fs/Kconfig | 7 +++++++ 4 files changed, 19 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 5a96a2763e4a..2ce89b42a9f4 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -297,6 +297,7 @@ config AXON_RAM tristate "Axon DDR2 memory device driver" depends on PPC_IBM_CELL_BLADE && BLOCK select DAX + select FS_DAX_LIMITED default m help It registers one block device per Axon's DDR2 memory bank found diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 3ec804672601..473af694ad1c 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) return len < 0 ? len : -EIO; } + if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) + || pfn_t_devmap(pfn)) + /* pass */; + else { + pr_debug("VFS (%s): error: dax support not enabled\n", + sb->s_id); + return -EOPNOTSUPP; + } + return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index bc27d716aa6b..1444333210c7 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -16,6 +16,7 @@ config BLK_DEV_XPRAM config DCSSBLK def_tristate m select DAX + select FS_DAX_LIMITED prompt "DCSSBLK support" depends on S390 && BLOCK help diff --git a/fs/Kconfig b/fs/Kconfig index 7aee6d699fd6..b40128bf6d1a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -58,6 +58,13 @@ config FS_DAX_PMD depends on ZONE_DEVICE depends on TRANSPARENT_HUGEPAGE +# Selected by DAX drivers that do not expect filesystem DAX to support +# get_user_pages() of DAX mappings. I.e. "limited" indicates no support +# for fork() of processes with MAP_SHARED mappings or support for +# direct-I/O to a DAX mapping. +config FS_DAX_LIMITED + bool + endif # BLOCK # Posix ACL utility routines -- cgit v1.2.3 From fa9dd599b4dae841924b022768354cfde9affecb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 20 Jan 2018 01:24:33 +0100 Subject: bpf: get rid of pure_initcall dependency to enable jits Having a pure_initcall() callback just to permanently enable BPF JITs under CONFIG_BPF_JIT_ALWAYS_ON is unnecessary and could leave a small race window in future where JIT is still disabled on boot. Since we know about the setting at compilation time anyway, just initialize it properly there. Also consolidate all the individual bpf_jit_enable variables into a single one and move them under one location. Moreover, don't allow for setting unspecified garbage values on them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 2 -- arch/arm64/net/bpf_jit_comp.c | 2 -- arch/mips/net/bpf_jit.c | 2 -- arch/mips/net/ebpf_jit.c | 2 -- arch/powerpc/net/bpf_jit_comp.c | 2 -- arch/powerpc/net/bpf_jit_comp64.c | 2 -- arch/s390/net/bpf_jit_comp.c | 2 -- arch/sparc/net/bpf_jit_comp_32.c | 2 -- arch/sparc/net/bpf_jit_comp_64.c | 2 -- arch/x86/net/bpf_jit_comp.c | 2 -- kernel/bpf/core.c | 19 ++++++++++++------- net/core/sysctl_net_core.c | 18 ++++++++++++------ net/socket.c | 9 --------- 13 files changed, 24 insertions(+), 42 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4425189bb24c..a15e7cdf8754 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -25,8 +25,6 @@ #include "bpf_jit_32.h" -int bpf_jit_enable __read_mostly; - #define STACK_OFFSET(k) (k) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index acaa935ed977..8d456ee6dddc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -31,8 +31,6 @@ #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 44b925005dd3..4d8cb9bb8365 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1207,8 +1207,6 @@ jmp_cmp: return 0; } -int bpf_jit_enable __read_mostly; - void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 97069a1b6f43..4e347030ed2c 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -177,8 +177,6 @@ static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) (ctx->idx * 4) - 4; } -int bpf_jit_enable __read_mostly; - enum which_ebpf_reg { src_reg, src_reg_no_fp, diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index f9941b3b5770..872d1f6dd11e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,8 +18,6 @@ #include "bpf_jit32.h" -int bpf_jit_enable __read_mostly; - static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 6771c63b2bec..217a78e84865 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -21,8 +21,6 @@ #include "bpf_jit64.h" -int bpf_jit_enable __read_mostly; - static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { memset32(area, BREAKPOINT_INSTRUCTION, size/4); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 1dfadbd126f3..e50188773ff3 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -28,8 +28,6 @@ #include #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - struct bpf_jit { u32 seen; /* Flags to remember seen eBPF instructions */ u32 seen_reg[16]; /* Array to remember which registers are used */ diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index 09e318eb34ee..3bd8ca95e521 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -11,8 +11,6 @@ #include "bpf_jit_32.h" -int bpf_jit_enable __read_mostly; - static inline bool is_simm13(unsigned int value) { return value + 0x1000 < 0x2000; diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 635fdefd4ae2..50a24d7bd4c5 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -12,8 +12,6 @@ #include "bpf_jit_64.h" -int bpf_jit_enable __read_mostly; - static inline bool is_simm13(unsigned int value) { return value + 0x1000 < 0x2000; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 87f214fbe66e..b881a979efe1 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,8 +15,6 @@ #include #include -int bpf_jit_enable __read_mostly; - /* * assembly code in arch/x86/net/bpf_jit.S */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 25e723b0dfd4..bc9c5b11d6a9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -300,6 +300,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +/* All BPF JIT sysctl knobs here. */ +int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +int bpf_jit_harden __read_mostly; +int bpf_jit_kallsyms __read_mostly; + static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, unsigned long *symbol_start, @@ -381,8 +386,6 @@ static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; -int bpf_jit_kallsyms __read_mostly; - static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) { WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); @@ -563,8 +566,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } -int bpf_jit_harden __read_mostly; - static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff) @@ -1379,9 +1380,13 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) } #else -static unsigned int __bpf_prog_ret0(const void *ctx, - const struct bpf_insn *insn) +static unsigned int __bpf_prog_ret0_warn(const void *ctx, + const struct bpf_insn *insn) { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON + * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); return 0; } #endif @@ -1441,7 +1446,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; #else - fp->bpf_func = __bpf_prog_ret0; + fp->bpf_func = __bpf_prog_ret0_warn; #endif /* eBPF JITs can rewrite the program in case constant diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a47ad6cd41c0..6d39b4c01fc6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,7 @@ static int zero = 0; static int one = 1; +static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -325,13 +326,14 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -#else .proc_handler = proc_dointvec_minmax, +# ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, -#endif +# else + .extra1 = &zero, + .extra2 = &two, +# endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -339,14 +341,18 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "bpf_jit_kallsyms", .data = &bpf_jit_kallsyms, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, # endif #endif diff --git a/net/socket.c b/net/socket.c index fbfae1ed3ff5..1536515b6437 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2613,15 +2613,6 @@ out_fs: core_initcall(sock_init); /* early initcall */ -static int __init jit_init(void) -{ -#ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; -#endif - return 0; -} -pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { -- cgit v1.2.3 From 92e3da3cf193fd27996909956c12a23c0333da44 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:24 -0800 Subject: powerpc: initial pkey plumbing Basic plumbing to initialize the pkey system. Nothing is enabled yet. A later patch will enable it once all the infrastructure is in place. Signed-off-by: Ram Pai [mpe: Rework copyrights to use SPDX tags] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 15 ++++++++++ arch/powerpc/include/asm/mmu_context.h | 1 + arch/powerpc/include/asm/pkeys.h | 51 ++++++++++++++++++++++++++++++++++ arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/hash_utils_64.c | 1 + arch/powerpc/mm/pkeys.c | 29 +++++++++++++++++++ 6 files changed, 98 insertions(+) create mode 100644 arch/powerpc/include/asm/pkeys.h create mode 100644 arch/powerpc/mm/pkeys.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..c9660a18c093 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -867,6 +867,21 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config PPC_MEM_KEYS + prompt "PowerPC Memory Protection Keys" + def_bool y + depends on PPC_BOOK3S_64 + select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_HAS_PKEYS + help + Memory Protection Keys provides a mechanism for enforcing + page-based protections, but without requiring modification of the + page tables when an application changes protection domains. + + For details, see Documentation/vm/protection-keys.txt + + If unsure, say y. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8..fb5e6a3ce127 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -192,5 +192,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, /* by default, allow everything */ return true; } + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h new file mode 100644 index 000000000000..dadea7d4324d --- /dev/null +++ b/arch/powerpc/include/asm/pkeys.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_KEYS_H +#define _ASM_POWERPC_KEYS_H + +#include + +DECLARE_STATIC_KEY_TRUE(pkey_disabled); +#define ARCH_VM_PKEY_FLAGS 0 + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + return false; +} + +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + return -1; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + return -EINVAL; +} + +/* + * Try to dedicate one of the protection keys to be used as an + * execute-only protection key. + */ +static inline int execute_only_pkey(struct mm_struct *mm) +{ + return 0; +} + +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + return 0; +} + +static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + return 0; +} +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 8d271bfe2d94..f06f3577d8d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o +obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 64e704eefad1..8cd31ea9b243 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c new file mode 100644 index 000000000000..a11afba584b9 --- /dev/null +++ b/arch/powerpc/mm/pkeys.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#include + +DEFINE_STATIC_KEY_TRUE(pkey_disabled); +bool pkey_execute_disable_supported; + +int pkey_initialize(void) +{ + /* + * Disable the pkey system till everything is in place. A subsequent + * patch will enable it. + */ + static_branch_enable(&pkey_disabled); + + /* + * Disable execute_disable support for now. A subsequent patch will + * enable it. + */ + pkey_execute_disable_supported = false; + return 0; +} + +arch_initcall(pkey_initialize); -- cgit v1.2.3 From 4fb158f65ac5556b9b4a6f63f38272853ed99b22 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:25 -0800 Subject: powerpc: track allocation status of all pkeys Total 32 keys are available on power7 and above. However pkey 0,1 are reserved. So effectively we have 30 pkeys. On 4K kernels, we do not have 5 bits in the PTE to represent all the keys; we only have 3bits. Two of those keys are reserved; pkey 0 and pkey 1. So effectively we have 6 pkeys. This patch keeps track of reserved keys, allocated keys and keys that are currently free. Also it adds skeletal functions and macros, that the architecture-independent code expects to be available. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 9 ++++ arch/powerpc/include/asm/mmu_context.h | 4 ++ arch/powerpc/include/asm/pkeys.h | 83 ++++++++++++++++++++++++++++++-- arch/powerpc/mm/mmu_context_book3s64.c | 2 + arch/powerpc/mm/pkeys.c | 40 +++++++++++++++ 5 files changed, 134 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c9448e19847a..37ef23cc1136 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -108,6 +108,15 @@ typedef struct { #ifdef CONFIG_SPAPR_TCE_IOMMU struct list_head iommu_group_mem_list; #endif + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * Each bit represents one protection key. + * bit set -> key allocated + * bit unset -> key available for allocation + */ + u32 pkey_allocation_map; +#endif } mm_context_t; /* diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index fb5e6a3ce127..7d0f2d05189b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -193,5 +193,9 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return true; } +#ifndef CONFIG_PPC_MEM_KEYS +#define pkey_mm_init(mm) +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index dadea7d4324d..79b105d1ff73 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -11,21 +11,94 @@ #include DECLARE_STATIC_KEY_TRUE(pkey_disabled); -#define ARCH_VM_PKEY_FLAGS 0 +extern int pkeys_total; /* total pkeys as per device tree */ +extern u32 initial_allocation_mask; /* bits set for reserved keys */ + +/* + * Define these here temporarily so we're not dependent on patching linux/mm.h. + * Once it's updated we can drop these. + */ +#ifndef VM_PKEY_BIT0 +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +# define VM_PKEY_BIT4 VM_HIGH_ARCH_4 +#endif + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ + VM_PKEY_BIT3 | VM_PKEY_BIT4) + +#define arch_max_pkey() pkeys_total + +#define pkey_alloc_mask(pkey) (0x1 << pkey) + +#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) + +#define __mm_pkey_allocated(mm, pkey) { \ + mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_free(mm, pkey) { \ + mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_is_allocated(mm, pkey) \ + (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey)) + +#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \ + pkey_alloc_mask(pkey)) static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { - return false; + /* A reserved key is never considered as 'explicitly allocated' */ + return ((pkey < arch_max_pkey()) && + !__mm_pkey_is_reserved(pkey) && + __mm_pkey_is_allocated(mm, pkey)); } +/* + * Returns a positive, 5-bit key on success, or -1 on failure. + * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and + * mm_pkey_free(). + */ static inline int mm_pkey_alloc(struct mm_struct *mm) { - return -1; + /* + * Note: this is the one and only place we make sure that the pkey is + * valid as far as the hardware is concerned. The rest of the kernel + * trusts that only good, valid pkeys come out of here. + */ + u32 all_pkeys_mask = (u32)(~(0x0)); + int ret; + + if (static_branch_likely(&pkey_disabled)) + return -1; + + /* + * Are we out of pkeys? We must handle this specially because ffz() + * behavior is undefined if there are no zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz((u32)mm_pkey_allocation_map(mm)); + __mm_pkey_allocated(mm, ret); + return ret; } static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - return -EINVAL; + if (static_branch_likely(&pkey_disabled)) + return -1; + + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + __mm_pkey_free(mm, pkey); + + return 0; } /* @@ -48,4 +121,6 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, { return 0; } + +extern void pkey_mm_init(struct mm_struct *mm); #endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 59c0766ae4e0..929d9ef7083f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) subpage_prot_init_new_context(mm); + pkey_mm_init(mm); return index; } diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index a11afba584b9..e68e367dcdd7 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -9,21 +9,61 @@ DEFINE_STATIC_KEY_TRUE(pkey_disabled); bool pkey_execute_disable_supported; +int pkeys_total; /* Total pkeys as per device tree */ +u32 initial_allocation_mask; /* Bits set for reserved keys */ int pkey_initialize(void) { + int os_reserved, i; + /* * Disable the pkey system till everything is in place. A subsequent * patch will enable it. */ static_branch_enable(&pkey_disabled); + /* Lets assume 32 keys */ + pkeys_total = 32; + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + /* * Disable execute_disable support for now. A subsequent patch will * enable it. */ pkey_execute_disable_supported = false; + +#ifdef CONFIG_PPC_4K_PAGES + /* + * The OS can manage only 8 pkeys due to its inability to represent them + * in the Linux 4K PTE. + */ + os_reserved = pkeys_total - 8; +#else + os_reserved = 0; +#endif + /* + * Bits are in LE format. NOTE: 1, 0 are reserved. + * key 0 is the default key, which allows read/write/execute. + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + initial_allocation_mask = ~0x0; + for (i = 2; i < (pkeys_total - os_reserved); i++) + initial_allocation_mask &= ~(0x1 << i); return 0; } arch_initcall(pkey_initialize); + +void pkey_mm_init(struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return; + mm_pkey_allocation_map(mm) = initial_allocation_mask; +} -- cgit v1.2.3 From 1b4037deadb64e3a26c02f0e9852c257ee92a6e3 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:26 -0800 Subject: powerpc: helper function to read, write AMR, IAMR, UAMOR registers Implements helper functions to read and write the key related registers; AMR, IAMR, UAMOR. AMR register tracks the read,write permission of a key IAMR register tracks the execute permission of a key UAMOR register enables and disables a key Acked-by: Balbir Singh Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index e68e367dcdd7..afa6d0d63f32 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -67,3 +67,39 @@ void pkey_mm_init(struct mm_struct *mm) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; } + +static inline u64 read_amr(void) +{ + return mfspr(SPRN_AMR); +} + +static inline void write_amr(u64 value) +{ + mtspr(SPRN_AMR, value); +} + +static inline u64 read_iamr(void) +{ + if (!likely(pkey_execute_disable_supported)) + return 0x0UL; + + return mfspr(SPRN_IAMR); +} + +static inline void write_iamr(u64 value) +{ + if (!likely(pkey_execute_disable_supported)) + return; + + mtspr(SPRN_IAMR, value); +} + +static inline u64 read_uamor(void) +{ + return mfspr(SPRN_UAMOR); +} + +static inline void write_uamor(u64 value) +{ + mtspr(SPRN_UAMOR, value); +} -- cgit v1.2.3 From 4d70b698f9575cea7fa52e1d8f5020beca869907 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:27 -0800 Subject: powerpc: helper functions to initialize AMR, IAMR and UAMOR registers Introduce helper functions that can initialize the bits in the AMR, IAMR and UAMOR register; the bits that correspond to the given pkey. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pkeys.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index afa6d0d63f32..0cfe901594c7 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -12,6 +12,10 @@ bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ u32 initial_allocation_mask; /* Bits set for reserved keys */ +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64)*8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) + int pkey_initialize(void) { int os_reserved, i; @@ -103,3 +107,46 @@ static inline void write_uamor(u64 value) { mtspr(SPRN_UAMOR, value); } + +static inline void init_amr(int pkey, u8 init_bits) +{ + u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); + u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + + write_amr(old_amr | new_amr_bits); +} + +static inline void init_iamr(int pkey, u8 init_bits) +{ + u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); + u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + + write_iamr(old_iamr | new_iamr_bits); +} + +static void pkey_status_change(int pkey, bool enable) +{ + u64 old_uamor; + + /* Reset the AMR and IAMR bits for this key */ + init_amr(pkey, 0x0); + init_iamr(pkey, 0x0); + + /* Enable/disable key */ + old_uamor = read_uamor(); + if (enable) + old_uamor |= (0x3ul << pkeyshift(pkey)); + else + old_uamor &= ~(0x3ul << pkeyshift(pkey)); + write_uamor(old_uamor); +} + +void __arch_activate_pkey(int pkey) +{ + pkey_status_change(pkey, true); +} + +void __arch_deactivate_pkey(int pkey) +{ + pkey_status_change(pkey, false); +} -- cgit v1.2.3 From 0685f217afe8279a754490954b8dcc02cb763a5d Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:28 -0800 Subject: powerpc: cleanup AMR, IAMR when a key is allocated or freed Cleanup the bits corresponding to a key in the AMR, and IAMR register, when the key is newly allocated/activated or is freed. We dont want some residual bits cause the hardware enforce unintended behavior when the key is activated or freed. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 79b105d1ff73..5ed52ac39bab 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -58,6 +58,8 @@ static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) __mm_pkey_is_allocated(mm, pkey)); } +extern void __arch_activate_pkey(int pkey); +extern void __arch_deactivate_pkey(int pkey); /* * Returns a positive, 5-bit key on success, or -1 on failure. * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and @@ -85,6 +87,12 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) ret = ffz((u32)mm_pkey_allocation_map(mm)); __mm_pkey_allocated(mm, ret); + + /* + * Enable the key in the hardware + */ + if (ret > 0) + __arch_activate_pkey(ret); return ret; } @@ -96,6 +104,10 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; + /* + * Disable the key in the hardware + */ + __arch_deactivate_pkey(pkey); __mm_pkey_free(mm, pkey); return 0; -- cgit v1.2.3 From 2ddc53f3a7516b3e2c4ab9c36854a227641a4fde Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:29 -0800 Subject: powerpc: implementation for arch_set_user_pkey_access() This patch provides the detailed implementation for a user to allocate a key and enable it in the hardware. It provides the plumbing, but it cannot be used till the system call is implemented. The next patch will do so. Reviewed-by: Thiago Jung Bauermann Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 6 +++++- arch/powerpc/mm/pkeys.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 5ed52ac39bab..bf8e706dcb0f 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -128,10 +128,14 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, return 0; } +extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - return 0; + if (static_branch_likely(&pkey_disabled)) + return -EINVAL; + return __arch_set_user_pkey_access(tsk, pkey, init_val); } extern void pkey_mm_init(struct mm_struct *mm); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 0cfe901594c7..b01238bd88c1 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -5,6 +5,7 @@ * Copyright 2017, Ram Pai, IBM Corporation. */ +#include #include DEFINE_STATIC_KEY_TRUE(pkey_disabled); @@ -13,6 +14,9 @@ int pkeys_total; /* Total pkeys as per device tree */ u32 initial_allocation_mask; /* Bits set for reserved keys */ #define AMR_BITS_PER_PKEY 2 +#define AMR_RD_BIT 0x1UL +#define AMR_WR_BIT 0x2UL +#define IAMR_EX_BIT 0x1UL #define PKEY_REG_BITS (sizeof(u64)*8) #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) @@ -108,6 +112,20 @@ static inline void write_uamor(u64 value) mtspr(SPRN_UAMOR, value); } +static bool is_pkey_enabled(int pkey) +{ + u64 uamor = read_uamor(); + u64 pkey_bits = 0x3ul << pkeyshift(pkey); + u64 uamor_pkey_bits = (uamor & pkey_bits); + + /* + * Both the bits in UAMOR corresponding to the key should be set or + * reset. + */ + WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); + return !!(uamor_pkey_bits); +} + static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -150,3 +168,25 @@ void __arch_deactivate_pkey(int pkey) { pkey_status_change(pkey, false); } + +/* + * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that + * specified in @init_val. + */ +int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + u64 new_amr_bits = 0x0ul; + + if (!is_pkey_enabled(pkey)) + return -EINVAL; + + /* Set the bits we need in AMR: */ + if (init_val & PKEY_DISABLE_ACCESS) + new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; + else if (init_val & PKEY_DISABLE_WRITE) + new_amr_bits |= AMR_WR_BIT; + + init_amr(pkey, new_amr_bits); + return 0; +} -- cgit v1.2.3 From dcf872956d444bbbd46f601024ea989ced03f8a7 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:30 -0800 Subject: powerpc: ability to create execute-disabled pkeys powerpc has hardware support to disable execute on a pkey. This patch enables the ability to create execute-disabled keys. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/uapi/asm/mman.h | 6 ++++++ arch/powerpc/mm/pkeys.c | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index e63bc37e33af..65065ce32814 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -30,4 +30,10 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_EXECUTE) #endif /* _UAPI_ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index b01238bd88c1..e61bea4a3606 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -24,6 +24,14 @@ int pkey_initialize(void) { int os_reserved, i; + /* + * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral + * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. + * Ensure that the bits a distinct. + */ + BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & + (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + /* * Disable the pkey system till everything is in place. A subsequent * patch will enable it. @@ -177,10 +185,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { u64 new_amr_bits = 0x0ul; + u64 new_iamr_bits = 0x0ul; if (!is_pkey_enabled(pkey)) return -EINVAL; + if (init_val & PKEY_DISABLE_EXECUTE) { + if (!pkey_execute_disable_supported) + return -EINVAL; + new_iamr_bits |= IAMR_EX_BIT; + } + init_iamr(pkey, new_iamr_bits); + /* Set the bits we need in AMR: */ if (init_val & PKEY_DISABLE_ACCESS) new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; -- cgit v1.2.3 From 06bb53b33804613627c7ca1eda246459a7be2803 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:31 -0800 Subject: powerpc: store and restore the pkey state across context switches Store and restore the AMR, IAMR and UAMOR register state of the task before scheduling out and after scheduling in, respectively. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 3 ++ arch/powerpc/include/asm/pkeys.h | 4 +++ arch/powerpc/include/asm/processor.h | 5 ++++ arch/powerpc/kernel/process.c | 7 +++++ arch/powerpc/mm/pkeys.c | 52 +++++++++++++++++++++++++++++++++- 5 files changed, 70 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 7d0f2d05189b..4d69223d217a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -195,6 +195,9 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, #ifndef CONFIG_PPC_MEM_KEYS #define pkey_mm_init(mm) +#define thread_pkey_regs_save(thread) +#define thread_pkey_regs_restore(new_thread, old_thread) +#define thread_pkey_regs_init(thread) #endif /* CONFIG_PPC_MEM_KEYS */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index bf8e706dcb0f..41c303a01e1a 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -139,4 +139,8 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, } extern void pkey_mm_init(struct mm_struct *mm); +extern void thread_pkey_regs_save(struct thread_struct *thread); +extern void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread); +extern void thread_pkey_regs_init(struct thread_struct *thread); #endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bdab3b74eb98..01299cdc9806 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -309,6 +309,11 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC_MEM_KEYS + unsigned long amr; + unsigned long iamr; + unsigned long uamor; +#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9eb78ec0ce5b..755fd7e23ed4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1103,6 +1104,8 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif + + thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1142,6 +1145,8 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); #endif + + thread_pkey_regs_restore(new_thread, old_thread); } #ifdef CONFIG_PPC_BOOK3S_64 @@ -1867,6 +1872,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + + thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index e61bea4a3606..91ec6c467074 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -12,6 +12,8 @@ DEFINE_STATIC_KEY_TRUE(pkey_disabled); bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ u32 initial_allocation_mask; /* Bits set for reserved keys */ +u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ +u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL @@ -70,8 +72,16 @@ int pkey_initialize(void) * programming note. */ initial_allocation_mask = ~0x0; - for (i = 2; i < (pkeys_total - os_reserved); i++) + + /* register mask is in BE format */ + pkey_amr_uamor_mask = ~0x0ul; + pkey_iamr_mask = ~0x0ul; + + for (i = 2; i < (pkeys_total - os_reserved); i++) { initial_allocation_mask &= ~(0x1 << i); + pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); + pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); + } return 0; } @@ -206,3 +216,43 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, init_amr(pkey, new_amr_bits); return 0; } + +void thread_pkey_regs_save(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Skip saving registers if @thread hasn't used any keys yet. + */ + thread->amr = read_amr(); + thread->iamr = read_iamr(); + thread->uamor = read_uamor(); +} + +void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet. + */ + if (old_thread->amr != new_thread->amr) + write_amr(new_thread->amr); + if (old_thread->iamr != new_thread->iamr) + write_iamr(new_thread->iamr); + if (old_thread->uamor != new_thread->uamor) + write_uamor(new_thread->uamor); +} + +void thread_pkey_regs_init(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + write_amr(read_amr() & pkey_amr_uamor_mask); + write_iamr(read_iamr() & pkey_iamr_mask); + write_uamor(read_uamor() & pkey_amr_uamor_mask); +} -- cgit v1.2.3 From 5586cf61e108019565bb936daeb296e53df1c1d6 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:32 -0800 Subject: powerpc: introduce execute-only pkey This patch provides the implementation of execute-only pkey. The architecture-independent layer expects the arch-dependent layer, to support the ability to create and enable a special key which has execute-only permission. Acked-by: Balbir Singh Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 1 + arch/powerpc/include/asm/pkeys.h | 6 +++- arch/powerpc/mm/pkeys.c | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 37ef23cc1136..0abeb0e2d616 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -116,6 +116,7 @@ typedef struct { * bit unset -> key available for allocation */ u32 pkey_allocation_map; + s16 execute_only_pkey; /* key holding execute-only protection */ #endif } mm_context_t; diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 41c303a01e1a..b8d2a8e3f2e1 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -117,9 +117,13 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) * Try to dedicate one of the protection keys to be used as an * execute-only protection key. */ +extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { - return 0; + if (static_branch_likely(&pkey_disabled)) + return -1; + + return __execute_only_pkey(mm); } static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 91ec6c467074..6fece6e6fc97 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -92,6 +92,8 @@ void pkey_mm_init(struct mm_struct *mm) if (static_branch_likely(&pkey_disabled)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; + /* -1 means unallocated or invalid */ + mm->context.execute_only_pkey = -1; } static inline u64 read_amr(void) @@ -256,3 +258,59 @@ void thread_pkey_regs_init(struct thread_struct *thread) write_iamr(read_iamr() & pkey_iamr_mask); write_uamor(read_uamor() & pkey_amr_uamor_mask); } + +static inline bool pkey_allows_readwrite(int pkey) +{ + int pkey_shift = pkeyshift(pkey); + + if (!is_pkey_enabled(pkey)) + return true; + + return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift)); +} + +int __execute_only_pkey(struct mm_struct *mm) +{ + bool need_to_set_mm_pkey = false; + int execute_only_pkey = mm->context.execute_only_pkey; + int ret; + + /* Do we need to assign a pkey for mm's execute-only maps? */ + if (execute_only_pkey == -1) { + /* Go allocate one to use, which might fail */ + execute_only_pkey = mm_pkey_alloc(mm); + if (execute_only_pkey < 0) + return -1; + need_to_set_mm_pkey = true; + } + + /* + * We do not want to go through the relatively costly dance to set AMR + * if we do not need to. Check it first and assume that if the + * execute-only pkey is readwrite-disabled than we do not have to set it + * ourselves. + */ + if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey)) + return execute_only_pkey; + + /* + * Set up AMR so that it denies access for everything other than + * execution. + */ + ret = __arch_set_user_pkey_access(current, execute_only_pkey, + PKEY_DISABLE_ACCESS | + PKEY_DISABLE_WRITE); + /* + * If the AMR-set operation failed somehow, just return 0 and + * effectively disable execute-only support. + */ + if (ret) { + mm_pkey_free(mm, execute_only_pkey); + return -1; + } + + /* We got one, store it and use it from here on out */ + if (need_to_set_mm_pkey) + mm->context.execute_only_pkey = execute_only_pkey; + return execute_only_pkey; +} -- cgit v1.2.3 From 013a91b39c2d5158cdc5529803f245bbb0526c7c Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:33 -0800 Subject: powerpc: ability to associate pkey to a vma arch-independent code expects the arch to map a pkey into the vma's protection bit setting. The patch provides that ability. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mman.h | 7 ++++++- arch/powerpc/include/asm/pkeys.h | 11 +++++++++++ arch/powerpc/mm/pkeys.c | 8 ++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 30922f699341..29994788b9f3 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,6 +13,7 @@ #include #include +#include #include /* @@ -22,7 +23,11 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { - return (prot & PROT_SAO) ? VM_SAO : 0; +#ifdef CONFIG_PPC_MEM_KEYS + return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); +#else + return ((prot & PROT_SAO) ? VM_SAO : 0); +#endif } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index b8d2a8e3f2e1..df372557ec33 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -30,6 +30,17 @@ extern u32 initial_allocation_mask; /* bits set for reserved keys */ #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ VM_PKEY_BIT3 | VM_PKEY_BIT4) +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \ + PKEY_DISABLE_WRITE | \ + PKEY_DISABLE_EXECUTE) + +static inline u64 pkey_to_vmflag_bits(u16 pkey) +{ + return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); +} + #define arch_max_pkey() pkeys_total #define pkey_alloc_mask(pkey) (0x1 << pkey) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 6fece6e6fc97..a4aac75f1a93 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -34,6 +34,14 @@ int pkey_initialize(void) BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + /* + * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous + * in the vmaflag. Make sure that is really the case. + */ + BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + != (sizeof(u64) * BITS_PER_BYTE)); + /* * Disable the pkey system till everything is in place. A subsequent * patch will enable it. -- cgit v1.2.3 From 87bbabbed8a77092135f6442b8d5619906a81255 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:34 -0800 Subject: powerpc: implementation for arch_override_mprotect_pkey() arch independent code calls arch_override_mprotect_pkey() to return a pkey that best matches the requested protection. This patch provides the implementation. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 5 +++++ arch/powerpc/include/asm/pkeys.h | 21 +++++++++++++++++++- arch/powerpc/mm/pkeys.c | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 4d69223d217a..3ba571dfdfd9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -198,6 +198,11 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, #define thread_pkey_regs_save(thread) #define thread_pkey_regs_restore(new_thread, old_thread) #define thread_pkey_regs_init(thread) + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + return 0; +} #endif /* CONFIG_PPC_MEM_KEYS */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index df372557ec33..24e82260052f 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -41,6 +41,13 @@ static inline u64 pkey_to_vmflag_bits(u16 pkey) return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } +static inline int vma_pkey(struct vm_area_struct *vma) +{ + if (static_branch_likely(&pkey_disabled)) + return 0; + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; +} + #define arch_max_pkey() pkeys_total #define pkey_alloc_mask(pkey) (0x1 << pkey) @@ -137,10 +144,22 @@ static inline int execute_only_pkey(struct mm_struct *mm) return __execute_only_pkey(mm); } +extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey); static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - return 0; + if (static_branch_likely(&pkey_disabled)) + return 0; + + /* + * Is this an mprotect_pkey() call? If so, never override the value that + * came from the user. + */ + if (pkey != -1) + return pkey; + + return __arch_override_mprotect_pkey(vma, prot, pkey); } extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index a4aac75f1a93..010c90c1f5da 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -322,3 +322,39 @@ int __execute_only_pkey(struct mm_struct *mm) mm->context.execute_only_pkey = execute_only_pkey; return execute_only_pkey; } + +static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) +{ + /* Do this check first since the vm_flags should be hot */ + if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + return false; + + return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); +} + +/* + * This should only be called for *plain* mprotect calls. + */ +int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, + int pkey) +{ + /* + * If the currently associated pkey is execute-only, but the requested + * protection requires read or write, move it back to the default pkey. + */ + if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE))) + return 0; + + /* + * The requested protection is execute-only. Hence let's use an + * execute-only pkey. + */ + if (prot == PROT_EXEC) { + pkey = execute_only_pkey(vma->vm_mm); + if (pkey > 0) + return pkey; + } + + /* Nothing to override. */ + return vma_pkey(vma); +} -- cgit v1.2.3 From eb95d016ce3d3404e061c7c85d4362094dc34b3f Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:35 -0800 Subject: powerpc: map vma key-protection bits to pte key bits. Map the key protection bits of the vma to the pkey bits in the PTE. The PTE bits used for pkey are 3,4,5,6 and 57. The first four bits are the same four bits that were freed up initially in this patch series. remember? :-) Without those four bits this patch wouldn't be possible. BUT, on 4k kernel, bit 3, and 4 could not be freed up. remember? Hence we have to be satisfied with 5, 6 and 7. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 25 ++++++++++++++++++++++++- arch/powerpc/include/asm/mman.h | 6 ++++++ arch/powerpc/include/asm/pkeys.h | 12 ++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 60a0d7fd82bc..26285fc17baf 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -40,6 +40,7 @@ #define _RPAGE_RSV2 0x0800000000000000UL #define _RPAGE_RSV3 0x0400000000000000UL #define _RPAGE_RSV4 0x0200000000000000UL +#define _RPAGE_RSV5 0x00040UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -59,6 +60,25 @@ /* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 57 +#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_64K_PAGES +#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 +#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 +#else /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ +#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ +#endif /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#else /* CONFIG_PPC_MEM_KEYS */ +#define H_PTE_PKEY_BIT0 0 +#define H_PTE_PKEY_BIT1 0 +#define H_PTE_PKEY_BIT2 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT4 0 +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * Max physical address bit we will use for now. * @@ -122,13 +142,16 @@ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY) + +#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ + H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | H_PTE_PKEY) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 29994788b9f3..07e3f54de9e3 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -33,7 +33,13 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 24e82260052f..03d8d5053126 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -41,6 +41,18 @@ static inline u64 pkey_to_vmflag_bits(u16 pkey) return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (static_branch_likely(&pkey_disabled)) + return 0x0UL; + + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); +} + static inline int vma_pkey(struct vm_area_struct *vma) { if (static_branch_likely(&pkey_disabled)) -- cgit v1.2.3 From a6590ca55f1f49912e17e4811ccae9a51bda8859 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:36 -0800 Subject: powerpc: Program HPTE key protection bits Map the PTE protection key bits to the HPTE key protection bits, while creating HPTE entries. Acked-by: Balbir Singh Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 5 +++++ arch/powerpc/include/asm/mmu_context.h | 6 ++++++ arch/powerpc/include/asm/pkeys.h | 9 +++++++++ arch/powerpc/mm/hash_utils_64.c | 1 + 4 files changed, 21 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e91e115a816f..50ed64fba4ae 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -90,6 +90,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -104,6 +106,9 @@ #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) +#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 3ba571dfdfd9..209f12705fbb 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -203,6 +203,12 @@ static inline int vma_pkey(struct vm_area_struct *vma) { return 0; } + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return 0x0UL; +} + #endif /* CONFIG_PPC_MEM_KEYS */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 03d8d5053126..a8ef7fa5360e 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -62,6 +62,15 @@ static inline int vma_pkey(struct vm_area_struct *vma) #define arch_max_pkey() pkeys_total +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); +} + #define pkey_alloc_mask(pkey) (0x1 << pkey) #define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8cd31ea9b243..a78e24cf93ff 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -233,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; + rflags |= pte_to_hpte_pkey_bits(pteflags); return rflags; } -- cgit v1.2.3 From f2407ef3ba225665ee24965f69bc84435fb590cf Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:37 -0800 Subject: powerpc: helper to validate key-access permissions of a pte helper function that checks if the read/write/execute is allowed on the pte. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 ++ arch/powerpc/include/asm/pkeys.h | 9 +++++++++ arch/powerpc/mm/pkeys.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 26285fc17baf..449b797f8b7b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -571,6 +571,8 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } +extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); + #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index a8ef7fa5360e..2298771b066b 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -71,6 +71,15 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); } +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); +} + #define pkey_alloc_mask(pkey) (0x1 << pkey) #define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 010c90c1f5da..5071778fdd20 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -358,3 +358,31 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, /* Nothing to override. */ return vma_pkey(vma); } + +static bool pkey_access_permitted(int pkey, bool write, bool execute) +{ + int pkey_shift; + u64 amr; + + if (!pkey) + return true; + + if (!is_pkey_enabled(pkey)) + return true; + + pkey_shift = pkeyshift(pkey); + if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) + return true; + + amr = read_amr(); /* Delay reading amr until absolutely needed */ + return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || + (write && !(amr & (AMR_WR_BIT << pkey_shift)))); +} + +bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + + return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); +} -- cgit v1.2.3 From bca7aacfe8be121ecefb3be609420220b0820c2c Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:38 -0800 Subject: powerpc: check key protection for user page access Make sure that the kernel does not access user pages without checking their key-protection. Signed-off-by: Ram Pai [mpe: Integrate with upstream version of pte_access_permitted()] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 449b797f8b7b..1c495068bcfa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -571,7 +571,14 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } +#ifdef CONFIG_PPC_MEM_KEYS extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); +#else +static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + return true; +} +#endif /* CONFIG_PPC_MEM_KEYS */ #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) @@ -593,7 +600,8 @@ static inline bool pte_access_permitted(pte_t pte, bool write) if ((pteval & clear_pte_bits) == clear_pte_bits) return false; - return true; + + return arch_pte_access_permitted(pte_val(pte), write, 0); } /* -- cgit v1.2.3 From 1137573acfe4c67cdd265bbfbd2d66ebe87d6325 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:39 -0800 Subject: powerpc: implementation for arch_vma_access_permitted() This patch provides the implementation for arch_vma_access_permitted(). Returns true if the requested access is allowed by pkey associated with the vma. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 5 ++++- arch/powerpc/mm/pkeys.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 209f12705fbb..cd2bd739c0df 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -186,6 +186,10 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +#ifdef CONFIG_PPC_MEM_KEYS +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign); +#else /* CONFIG_PPC_MEM_KEYS */ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { @@ -193,7 +197,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return true; } -#ifndef CONFIG_PPC_MEM_KEYS #define pkey_mm_init(mm) #define thread_pkey_regs_save(thread) #define thread_pkey_regs_restore(new_thread, old_thread) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 5071778fdd20..4a885cc165e5 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -386,3 +386,37 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute) return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); } + +/* + * We only want to enforce protection keys on the current thread because we + * effectively have no access to AMR/IAMR for other threads or any way to tell + * which AMR/IAMR in a threaded process we could use. + * + * So do not enforce things if the VMA is not from the current mm, or if we are + * in a kernel thread. + */ +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + + /* if it is not our ->mm, it has to be foreign */ + if (current->mm != vma->vm_mm) + return true; + + return false; +} + +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + /* + * Do not enforce our key-permissions on a foreign vma. + */ + if (foreign || vma_is_foreign(vma)) + return true; + + return pkey_access_permitted(vma_pkey(vma), write, execute); +} -- cgit v1.2.3 From e6c2a4797e101a25eced94aa9e1fc42c30247aec Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:40 -0800 Subject: powerpc: Handle exceptions caused by pkey violation Handle Data and Instruction exceptions caused by memory protection-key. The CPU will detect the key fault if the HPTE is already programmed with the key. However if the HPTE is not hashed, a key fault will not be detected by the hardware. The software will detect pkey violation in such a case. Signed-off-by: Ram Pai Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 - arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/mm/fault.c | 22 ++++++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..ffc9990e8040 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -312,7 +312,6 @@ DSISR_BAD_EXT_CTRL) #define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ DSISR_ATTR_CONFLICT | \ - DSISR_KEYFAULT | \ DSISR_UNSUPP_MMU | \ DSISR_PRTABLE_FAULT | \ DSISR_ICSWX_NO_CT | \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1a4f51f35870..42a47d83282c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1527,7 +1527,7 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h + lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c07896c19517..6417a659b02d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -427,6 +427,11 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (error_code & DSISR_KEYFAULT) { + _exception(SIGSEGV, regs, SEGV_PKUERR, address); + return 0; + } + /* * We want to do this outside mmap_sem, because reading code around nip * can result in fault, which will cause a deadlock when called with @@ -498,6 +503,23 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, flags); + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * if the HPTE is not hashed, hardware will not detect + * a key fault. Lets check if we failed because of a + * software detected key fault. + */ + if (unlikely(fault & VM_FAULT_SIGSEGV) && + !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + is_exec, 0)) { + int pkey = vma_pkey(vma); + + if (likely(pkey)) + return __bad_area(regs, address, SEGV_PKUERR); + } +#endif /* CONFIG_PPC_MEM_KEYS */ + major |= fault & VM_FAULT_MAJOR; /* -- cgit v1.2.3 From 087003e9ef7c1c5bec932387e47511429eff2a54 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:41 -0800 Subject: powerpc: introduce get_mm_addr_key() helper get_mm_addr_key() helper returns the pkey associated with an address corresponding to a given mm_struct. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 9 +++++++++ arch/powerpc/mm/hash_utils_64.c | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 6364f5c2cc3e..bb38312cff28 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void) } #endif +#ifdef CONFIG_PPC_MEM_KEYS +extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address); +#else +static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a78e24cf93ff..462c34e7b01d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1581,6 +1581,30 @@ out_exit: local_irq_restore(flags); } +#ifdef CONFIG_PPC_MEM_KEYS +/* + * Return the protection key associated with the given address and the + * mm_struct. + */ +u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + pte_t *ptep; + u16 pkey = 0; + unsigned long flags; + + if (!mm || !mm->pgd) + return 0; + + local_irq_save(flags); + ptep = find_linux_pte(mm->pgd, address, NULL, NULL); + if (ptep) + pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); + local_irq_restore(flags); + + return pkey; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) { -- cgit v1.2.3 From 99cd1302327a2ccaedf905e9f6a8d8fd234bd485 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:42 -0800 Subject: powerpc: Deliver SEGV signal on pkey violation The value of the pkey, whose protection got violated, is made available in si_pkey field of the siginfo structure. Signed-off-by: Ram Pai Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/traps.c | 12 +++++++++++- arch/powerpc/mm/fault.c | 39 +++++++++++++++++++++++++++------------ 3 files changed, 39 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3c04249bcf39..97c38472b924 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,6 +133,7 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); +extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int); extern void die(const char *, struct pt_regs *, long); extern bool die_will_crash(void); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d139117d3339..4b1a8e2ec023 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -266,7 +267,9 @@ void user_single_step_siginfo(struct task_struct *tsk, info->si_addr = (void __user *)regs->nip; } -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) + +void _exception_pkey(int signr, struct pt_regs *regs, int code, + unsigned long addr, int key) { siginfo_t info; const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ @@ -293,9 +296,16 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; + info.si_pkey = key; + force_sig_info(signr, &info, current); } +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + _exception_pkey(signr, regs, code, addr, 0); +} + void system_reset_exception(struct pt_regs *regs) { /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6417a659b02d..d3a6e0395eaa 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs) */ static int -__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { /* * If we are in kernel mode, bail out with a SEGV, this will @@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) if (!user_mode(regs)) return SIGSEGV; - _exception(SIGSEGV, regs, si_code, address); + _exception_pkey(SIGSEGV, regs, si_code, address, pkey); return 0; } static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) { - return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); + return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0); } -static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { struct mm_struct *mm = current->mm; @@ -137,12 +139,18 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) */ up_read(&mm->mmap_sem); - return __bad_area_nosemaphore(regs, address, si_code); + return __bad_area_nosemaphore(regs, address, si_code, pkey); } static noinline int bad_area(struct pt_regs *regs, unsigned long address) { - return __bad_area(regs, address, SEGV_MAPERR); + return __bad_area(regs, address, SEGV_MAPERR, 0); +} + +static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, + int pkey) +{ + return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey); } static int do_sigbus(struct pt_regs *regs, unsigned long address, @@ -427,10 +435,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (error_code & DSISR_KEYFAULT) { - _exception(SIGSEGV, regs, SEGV_PKUERR, address); - return 0; - } + if (error_code & DSISR_KEYFAULT) + return bad_key_fault_exception(regs, address, + get_mm_addr_key(mm, address)); /* * We want to do this outside mmap_sem, because reading code around nip @@ -513,10 +520,18 @@ good_area: if (unlikely(fault & VM_FAULT_SIGSEGV) && !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, is_exec, 0)) { + /* + * The PGD-PDT...PMD-PTE tree may not have been fully setup. + * Hence we cannot walk the tree to locate the PTE, to locate + * the key. Hence let's use vma_pkey() to get the key; instead + * of get_mm_addr_key(). + */ int pkey = vma_pkey(vma); - if (likely(pkey)) - return __bad_area(regs, address, SEGV_PKUERR); + if (likely(pkey)) { + up_read(&mm->mmap_sem); + return bad_key_fault_exception(regs, address, pkey); + } } #endif /* CONFIG_PPC_MEM_KEYS */ -- cgit v1.2.3 From c5cc1f4df6b16646f8fae7aab523c1820bf916e8 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 18 Jan 2018 17:50:43 -0800 Subject: powerpc/ptrace: Add memory protection key regset The AMR/IAMR/UAMOR are part of the program context. Allow it to be accessed via ptrace and through core files. Signed-off-by: Ram Pai Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pkeys.h | 5 +++ arch/powerpc/include/uapi/asm/elf.h | 1 + arch/powerpc/kernel/ptrace.c | 66 +++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/traps.c | 7 ++++ include/uapi/linux/elf.h | 1 + 5 files changed, 80 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 2298771b066b..c3cbad824e5a 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -202,6 +202,11 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, return __arch_set_user_pkey_access(tsk, pkey, init_val); } +static inline bool arch_pkeys_enabled(void) +{ + return !static_branch_likely(&pkey_disabled); +} + extern void pkey_mm_init(struct mm_struct *mm); extern void thread_pkey_regs_save(struct thread_struct *thread); extern void thread_pkey_regs_restore(struct thread_struct *new_thread, diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 5f201d40bcca..860c59291bfc 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -97,6 +97,7 @@ #define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */ #define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */ #define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */ +#define ELF_NPKEY 3 /* includes amr, iamr, uamor */ typedef unsigned long elf_greg_t64; typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index aef08e579946..ca72d7391d40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -1787,6 +1788,61 @@ static int pmu_set(struct task_struct *target, return ret; } #endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.amr, 0, + ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * These are our native regset flavors. */ @@ -1821,6 +1877,9 @@ enum powerpc_regset { REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ #endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif }; static const struct user_regset native_regsets[] = { @@ -1926,6 +1985,13 @@ static const struct user_regset native_regsets[] = { .active = pmu_active, .get = pmu_get, .set = pmu_set }, #endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4b1a8e2ec023..122a3c883f4e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -292,6 +292,13 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code, local_irq_enable(); current->thread.trap_nr = code; + + /* + * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need + * to capture the content, if the task gets killed. + */ + thread_pkey_regs_save(¤t->thread); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index bb6836986200..3bf73fb58045 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -396,6 +396,7 @@ typedef struct elf64_shdr { #define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ #define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ #define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ -- cgit v1.2.3 From cf43d3b26452a332d7e1d64a00079c607613e944 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:44 -0800 Subject: powerpc: Enable pkey subsystem PAPR defines 'ibm,processor-storage-keys' property. It exports two values. The first value holds the number of data-access keys and the second holds the number of instruction-access keys. Due to a bug in the firmware, instruction-access keys is always reported as zero. However any key can be configured to disable data-access and/or disable execution-access. The inavailablity of the second value is not a big handicap, though it could have been used to determine if the platform supported disable-execution-access. Non-PAPR platforms do not define this property in the device tree yet. Fortunately power8 is the only released Non-PAPR platform that is supported. Here, we hardcode the number of supported pkey to 32, by consulting the PowerISA3.0 This patch calculates the number of keys supported by the platform. Also it determines the platform support for read/write/execution access support for pkeys. Signed-off-by: Ram Pai [mpe: Use a PVR check instead of CPU_FTR for execute. Restrict to Power7/8/9 for now until older CPUs are tested.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 9 +++--- arch/powerpc/include/asm/pkeys.h | 3 ++ arch/powerpc/mm/pkeys.c | 64 +++++++++++++++++++++++++++++++------ 3 files changed, 63 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 78ca2a721d04..a2c5c95882cf 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -195,7 +195,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) -/* Free LONG_ASM_CONST(0x0020000000000000) */ +#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) @@ -442,7 +442,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -452,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ @@ -464,7 +464,8 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_PKEY) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index c3cbad824e5a..0409c80c32c0 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -9,6 +9,7 @@ #define _ASM_POWERPC_KEYS_H #include +#include DECLARE_STATIC_KEY_TRUE(pkey_disabled); extern int pkeys_total; /* total pkeys as per device tree */ @@ -208,6 +209,8 @@ static inline bool arch_pkeys_enabled(void) } extern void pkey_mm_init(struct mm_struct *mm); +extern bool arch_supports_pkeys(int cap); +extern unsigned int arch_usable_pkeys(void); extern void thread_pkey_regs_save(struct thread_struct *thread); extern void thread_pkey_regs_restore(struct thread_struct *new_thread, struct thread_struct *old_thread); diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 4a885cc165e5..ba71c5481f42 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -6,11 +6,14 @@ */ #include +#include #include +#include DEFINE_STATIC_KEY_TRUE(pkey_disabled); bool pkey_execute_disable_supported; int pkeys_total; /* Total pkeys as per device tree */ +bool pkeys_devtree_defined; /* pkey property exported by device tree */ u32 initial_allocation_mask; /* Bits set for reserved keys */ u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ @@ -22,6 +25,35 @@ u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ #define PKEY_REG_BITS (sizeof(u64)*8) #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) +static void scan_pkey_feature(void) +{ + u32 vals[2]; + struct device_node *cpu; + + cpu = of_find_node_by_type(NULL, "cpu"); + if (!cpu) + return; + + if (of_property_read_u32_array(cpu, + "ibm,processor-storage-keys", vals, 2)) + return; + + /* + * Since any pkey can be used for data or execute, we will just treat + * all keys as equal and track them as one entity. + */ + pkeys_total = be32_to_cpu(vals[0]); + pkeys_devtree_defined = true; +} + +static inline bool pkey_mmu_enabled(void) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return pkeys_total; + else + return cpu_has_feature(CPU_FTR_PKEY); +} + int pkey_initialize(void) { int os_reserved, i; @@ -42,14 +74,17 @@ int pkey_initialize(void) __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); + /* scan the device tree for pkey feature */ + scan_pkey_feature(); + /* - * Disable the pkey system till everything is in place. A subsequent - * patch will enable it. + * Let's assume 32 pkeys on P8 bare metal, if its not defined by device + * tree. We make this exception since skiboot forgot to expose this + * property on power8. */ - static_branch_enable(&pkey_disabled); - - /* Lets assume 32 keys */ - pkeys_total = 32; + if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && + cpu_has_feature(CPU_FTRS_POWER8)) + pkeys_total = 32; /* * Adjust the upper limit, based on the number of bits supported by @@ -58,11 +93,22 @@ int pkey_initialize(void) pkeys_total = min_t(int, pkeys_total, (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) + static_branch_enable(&pkey_disabled); + else + static_branch_disable(&pkey_disabled); + + if (static_branch_likely(&pkey_disabled)) + return 0; + /* - * Disable execute_disable support for now. A subsequent patch will - * enable it. + * The device tree cannot be relied to indicate support for + * execute_disable support. Instead we use a PVR check. */ - pkey_execute_disable_supported = false; + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p)) + pkey_execute_disable_supported = false; + else + pkey_execute_disable_supported = true; #ifdef CONFIG_PPC_4K_PAGES /* -- cgit v1.2.3 From 9499ec1b5e82321829e1c1510bcc37edc20b6f38 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:45 -0800 Subject: powerpc: sys_pkey_alloc() and sys_pkey_free() system calls Finally this patch provides the ability for a process to allocate and free a protection key. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 2 ++ arch/powerpc/include/asm/unistd.h | 4 +--- arch/powerpc/include/uapi/asm/unistd.h | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 449912f057f6..dea4a952ec53 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -389,3 +389,5 @@ COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) SYSCALL(statx) +SYSCALL(pkey_alloc) +SYSCALL(pkey_free) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 9ba11dbcaca9..e0273bc5b095 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,13 +12,11 @@ #include -#define NR_syscalls 384 +#define NR_syscalls 386 #define __NR__exit __NR_exit #define __IGNORE_pkey_mprotect -#define __IGNORE_pkey_alloc -#define __IGNORE_pkey_free #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index df8684f31919..5db438516e61 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -395,5 +395,7 @@ #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 #define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From 3350eb2ea127978319ced883523d828046af4045 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:46 -0800 Subject: powerpc: sys_pkey_mprotect() system call Patch provides the ability for a process to associate a pkey with a address range. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 4 +--- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index dea4a952ec53..d61f9c96d916 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -391,3 +391,4 @@ SYSCALL(kexec_file_load) SYSCALL(statx) SYSCALL(pkey_alloc) SYSCALL(pkey_free) +SYSCALL(pkey_mprotect) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e0273bc5b095..daf1ba97a00c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,12 +12,10 @@ #include -#define NR_syscalls 386 +#define NR_syscalls 387 #define __NR__exit __NR_exit -#define __IGNORE_pkey_mprotect - #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 5db438516e61..389c36fd8299 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -397,5 +397,6 @@ #define __NR_statx 383 #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From 5b2b80714796073ec9e196c7aeec59b7489fe676 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 4 Dec 2017 11:19:22 +0530 Subject: powerpc/mm: Invalidate subpage_prot() system call on radix platforms Radix enabled platforms don't support subpage_prot() system calls. But at present the system call goes through without an error and fails later on while validating expected subpage accesses. Lets not allow the system call on powerpc radix platforms to begin with to prevent this confusion in user space. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/mm/subpage-prot.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 781532d7bc4d..f14a07c2fb90 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) unsigned long next, limit; int err; + if (radix_enabled()) + return -ENOENT; + /* Check parameters */ if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || addr >= mm->task_size || len >= mm->task_size || -- cgit v1.2.3 From ae677ff02f2ddb0980953efd4afed1c90a56c88f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 18 Jan 2018 13:51:03 +1100 Subject: powerpc/powernv/ioda: Finish removing explicit max window size check 9003a2498 removed checn from the DMA window pages allocator, however the VFIO driver tests limits before doing so by calling the get_table_size hook which was left behind; this fixes it. Fixes: 9003a2498 "powerpc/powernv/ioda: Remove explicit max window size check" Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b9146d5aef81..1069f9cb273a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2572,7 +2572,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, unsigned long direct_table_size; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || - (window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return 0; -- cgit v1.2.3 From 11ed8c5569b149a065184dc8ce22414aac2f20e9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 18:01:50 +0100 Subject: powerpc/mpic_timer: avoid struct timeval In an effort to remove all instances of 'struct timeval' from the kernel, I'm changing the powerpc mpic_timer interface to use plain seconds instead. There is only one user of this interface, and that doesn't use the microseconds portion, so the code gets noticeably simpler in the process. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mpic_timer.h | 8 ++--- arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c | 16 ++++----- arch/powerpc/sysdev/mpic_timer.c | 55 ++++++----------------------- 3 files changed, 21 insertions(+), 58 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h index 0e23cd4ac8aa..13e6702ec458 100644 --- a/arch/powerpc/include/asm/mpic_timer.h +++ b/arch/powerpc/include/asm/mpic_timer.h @@ -29,17 +29,17 @@ struct mpic_timer { #ifdef CONFIG_MPIC_TIMER struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time); + time64_t time); void mpic_start_timer(struct mpic_timer *handle); void mpic_stop_timer(struct mpic_timer *handle); -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time); +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time); void mpic_free_timer(struct mpic_timer *handle); #else struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) { return NULL; } + time64_t time) { return NULL; } void mpic_start_timer(struct mpic_timer *handle) { } void mpic_stop_timer(struct mpic_timer *handle) { } -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { } +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { } void mpic_free_timer(struct mpic_timer *handle) { } #endif diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c index 1707bf04dec6..94278e8af192 100644 --- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c +++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c @@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct timeval interval; - int val = 0; + time64_t interval = 0; mutex_lock(&sysfs_lock); if (fsl_wakeup->timer) { mpic_get_remain_time(fsl_wakeup->timer, &interval); - val = interval.tv_sec + 1; + interval++; } mutex_unlock(&sysfs_lock); - return sprintf(buf, "%d\n", val); + return sprintf(buf, "%lld\n", interval); } static ssize_t fsl_timer_wakeup_store(struct device *dev, @@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, const char *buf, size_t count) { - struct timeval interval; + time64_t interval; int ret; - interval.tv_usec = 0; - if (kstrtol(buf, 0, &interval.tv_sec)) + if (kstrtoll(buf, 0, &interval)) return -EINVAL; mutex_lock(&sysfs_lock); @@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, fsl_wakeup->timer = NULL; } - if (!interval.tv_sec) { + if (!interval) { mutex_unlock(&sysfs_lock); return count; } fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq, - fsl_wakeup, &interval); + fsl_wakeup, interval); if (!fsl_wakeup->timer) { mutex_unlock(&sysfs_lock); return -EINVAL; diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index a418579591be..87e7c42777a8 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -47,9 +47,6 @@ #define MAX_TICKS_CASCADE (~0U) #define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num)) -/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */ -#define ONE_SECOND 1000000 - struct timer_regs { u32 gtccr; u32 res0[3]; @@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = { static LIST_HEAD(timer_group_list); static void convert_ticks_to_time(struct timer_group_priv *priv, - const u64 ticks, struct timeval *time) + const u64 ticks, time64_t *time) { - u64 tmp_sec; - - time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq); - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - - time->tv_usec = 0; - - if (tmp_sec <= ticks) - time->tv_usec = (__kernel_suseconds_t) - div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq); - - return; + *time = (u64)div_u64(ticks, priv->timerfreq); } /* the time set by the user is converted to "ticks" */ static int convert_time_to_ticks(struct timer_group_priv *priv, - const struct timeval *time, u64 *ticks) + time64_t time, u64 *ticks) { u64 max_value; /* prevent u64 overflow */ - u64 tmp = 0; - - u64 tmp_sec; - u64 tmp_ms; - u64 tmp_us; max_value = div_u64(ULLONG_MAX, priv->timerfreq); - if (time->tv_sec > max_value || - (time->tv_sec == max_value && time->tv_usec > 0)) + if (time > max_value) return -EINVAL; - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - tmp += tmp_sec; - - tmp_ms = time->tv_usec / 1000; - tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000); - tmp += tmp_ms; - - tmp_us = time->tv_usec % 1000; - tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000); - tmp += tmp_us; - - *ticks = tmp; + *ticks = (u64)time * (u64)priv->timerfreq; return 0; } @@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv, return allocated_timer; } -static struct mpic_timer *get_timer(const struct timeval *time) +static struct mpic_timer *get_timer(time64_t time) { struct timer_group_priv *priv; struct mpic_timer *timer; @@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time) * @handle: the timer to be started. * * It will do ->fn(->dev) callback from the hardware interrupt at - * the ->timeval point in the future. + * the 'time64_t' point in the future. */ void mpic_start_timer(struct mpic_timer *handle) { @@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer); * * Query timer remaining time. */ -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { struct timer_group_priv *priv = container_of(handle, struct timer_group_priv, timer[handle->num]); @@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer); * else "handle" on success. */ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) + time64_t time) { struct mpic_timer *allocated_timer; int ret; @@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, if (list_empty(&timer_group_list)) return NULL; - if (!(time->tv_sec + time->tv_usec) || - time->tv_sec < 0 || time->tv_usec < 0) - return NULL; - - if (time->tv_usec > ONE_SECOND) + if (time < 0) return NULL; allocated_timer = get_timer(time); -- cgit v1.2.3 From cef37ac119b1abffcb41a9a706792968676ea106 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 18:00:35 +0100 Subject: powerpc/spufs: use timespec64 for timestamps The switch log prints the tv_sec portion of timespec as a 32-bit number, while overflows in 2106. It also uses the timespec type, which is safe on 64-bit architectures, but deprecated because it causes overflows in 2038 elsewhere. This changes it to timespec64 and printing a 64-bit number for consistency. Signed-off-by: Arnd Bergmann Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/spufs/file.c | 6 +++--- arch/powerpc/platforms/cell/spufs/spufs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 5ffcdeb1eb17..94139135be9c 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE; - return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n", - (unsigned int) p->tstamp.tv_sec, + return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n", + (unsigned long long) p->tstamp.tv_sec, (unsigned int) p->tstamp.tv_nsec, p->spu_id, (unsigned int) p->type, @@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, struct switch_log_entry *p; p = ctx->switch_log->log + ctx->switch_log->head; - ktime_get_ts(&p->tstamp); + ktime_get_ts64(&p->tstamp); p->timebase = get_tb(); p->spu_id = spu ? spu->number : -1; p->type = type; diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 5e59f80e95db..5d85c689c2e9 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -69,7 +69,7 @@ struct switch_log { unsigned long head; unsigned long tail; struct switch_log_entry { - struct timespec tstamp; + struct timespec64 tstamp; s32 spu_id; u32 type; u32 val; -- cgit v1.2.3 From 4ec591e51a4b0aedb6c7f1a8cd722aa58d7f61ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 4 Jan 2018 16:35:25 +0100 Subject: powerpc: restore alphabetic order in Kconfig This patch restores the alphabetic order which was broken by commit 1e0fc9d1eb2b0 ("powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs") Fixes: 1e0fc9d1eb2b0 ("powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs") Signed-off-by: Christophe Leroy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c9660a18c093..7bf80e214191 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,6 +142,7 @@ config PPC select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -149,6 +150,7 @@ config PPC select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_USE_BUILTIN_BSWAP @@ -179,8 +181,6 @@ config PPC select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) - select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select HAVE_CBPF_JIT if !PPC64 select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK -- cgit v1.2.3 From 8cf4c05712f04a405f0dacebcca8f042b391694a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Nov 2017 08:31:07 +0100 Subject: powerpc/lib/code-patching: refactor patch_instruction() patch_instruction() uses almost the same sequence as __patch_instruction() This patch refactor it so that patch_instruction() uses __patch_instruction() instead of duplicating code. Signed-off-by: Christophe Leroy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/lib/code-patching.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 096d4e4d31e6..e1c58937281f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -23,19 +23,26 @@ #include #include -static int __patch_instruction(unsigned int *addr, unsigned int instr) +static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, + unsigned int *patch_addr) { int err; - __put_user_size(instr, addr, 4, err); + __put_user_size(instr, patch_addr, 4, err); if (err) return err; - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr)); + asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), + "r" (exec_addr)); return 0; } +static int raw_patch_instruction(unsigned int *addr, unsigned int instr) +{ + return __patch_instruction(addr, instr, addr); +} + #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); @@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr) int patch_instruction(unsigned int *addr, unsigned int instr) { int err; - unsigned int *dest = NULL; + unsigned int *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -149,7 +156,7 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * to allow patching. We just do the plain old patching */ if (!this_cpu_read(*PTRRELOC(&text_poke_area))) - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - dest = (unsigned int *)(text_poke_addr) + + patch_addr = (unsigned int *)(text_poke_addr) + ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); - /* - * We use __put_user_size so that we can handle faults while - * writing to dest and return err to handle faults gracefully - */ - __put_user_size(instr, dest, 4, err); - if (!err) - asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync" - ::"r" (dest), "r"(addr)); + __patch_instruction(addr, instr, patch_addr); err = unmap_patch_area(text_poke_addr); if (err) @@ -184,7 +184,7 @@ out: int patch_instruction(unsigned int *addr, unsigned int instr) { - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -- cgit v1.2.3 From 8183d99f4a22c2abbc543847a588df3666ef0c0c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 24 Nov 2017 08:31:09 +0100 Subject: powerpc/lib/feature-fixups: use raw_patch_instruction() feature fixups need to use patch_instruction() early in the boot, even before the code is relocated to its final address, requiring patch_instruction() to use PTRRELOC() in order to address data. But feature fixups applies on code before it is set to read only, even for modules. Therefore, feature fixups can use raw_patch_instruction() instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/code-patching.h | 1 + arch/powerpc/lib/code-patching.c | 4 ++-- arch/powerpc/lib/feature-fixups.c | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 2c895e8d07f7..812535f40124 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -31,6 +31,7 @@ unsigned int create_cond_branch(const unsigned int *addr, unsigned long target, int flags); int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); +int raw_patch_instruction(unsigned int *addr, unsigned int instr); int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index e1c58937281f..e0d881ab304e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -38,7 +38,7 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, return 0; } -static int raw_patch_instruction(unsigned int *addr, unsigned int instr) +int raw_patch_instruction(unsigned int *addr, unsigned int instr) { return __patch_instruction(addr, instr, addr); } @@ -155,7 +155,7 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(*PTRRELOC(&text_poke_area))) + if (!this_cpu_read(text_poke_area)) return raw_patch_instruction(addr, instr); local_irq_save(flags); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf..0872d60ede10 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, } } - patch_instruction(dest, instr); + raw_patch_instruction(dest, instr); return 0; } @@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) } for (; dest < end; dest++) - patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, PPC_INST_NOP); return 0; } @@ -129,7 +129,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, PPC_INST_LWSYNC); } } @@ -147,7 +147,7 @@ static void do_final_fixups(void) length = (__end_interrupts - _stext) / sizeof(int); while (length--) { - patch_instruction(dest, *src); + raw_patch_instruction(dest, *src); src++; dest++; } -- cgit v1.2.3 From 723b113319baa8d8b5f619a94348fa5b3a78bc94 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 1 Nov 2017 11:27:33 +1100 Subject: powerpc/watchdog: improve watchdog comments The overview comments in the powerpc watchdog are out of date after several iterations and changes of the code. Bring them up to date. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 58 +++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3963baaba92d..0673230c0261 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,15 +26,45 @@ #include /* - * The watchdog has a simple timer that runs on each CPU, once per timer - * period. This is the heartbeat. + * The powerpc watchdog ensures that each CPU is able to service timers. + * The watchdog sets up a simple timer on each CPU to run once per timer + * period, and updates a per-cpu timestamp and a "pending" cpumask. This is + * the heartbeat. * - * Then there are checks to see if the heartbeat has not triggered on a CPU - * for the panic timeout period. Currently the watchdog only supports an - * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. + * Then there are two systems to check that the heartbeat is still running. + * The local soft-NMI, and the SMP checker. * - * This is not an NMI watchdog, but Linux uses that name for a generic - * watchdog in some cases, so NMI gets used in some places. + * The soft-NMI checker can detect lockups on the local CPU. When interrupts + * are disabled with local_irq_disable(), platforms that use soft-masking + * can leave hardware interrupts enabled and handle them with a masked + * interrupt handler. The masked handler can send the timer interrupt to the + * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI + * interrupt, and can be used to detect CPUs stuck with IRQs disabled. + * + * The soft-NMI checker will compare the heartbeat timestamp for this CPU + * with the current time, and take action if the difference exceeds the + * watchdog threshold. + * + * The limitation of the soft-NMI watchdog is that it does not work when + * interrupts are hard disabled or otherwise not being serviced. This is + * solved by also having a SMP watchdog where all CPUs check all other + * CPUs heartbeat. + * + * The SMP checker can detect lockups on other CPUs. A gobal "pending" + * cpumask is kept, containing all CPUs which enable the watchdog. Each + * CPU clears their pending bit in their heartbeat timer. When the bitmask + * becomes empty, the last CPU to clear its pending bit updates a global + * timestamp and refills the pending bitmask. + * + * In the heartbeat timer, if any CPU notices that the global timestamp has + * not been updated for a period exceeding the watchdog threshold, then it + * means the CPU(s) with their bit still set in the pending mask have had + * their heartbeat stop, and action is taken. + * + * Some platforms implement true NMI IPIs, which can by used by the SMP + * watchdog to detect an unresponsive CPU and pull it out of its stuck + * state with the NMI IPI, to get crash/debug data from it. This way the + * SMP watchdog can detect hardware interrupts off lockups. */ static cpumask_t wd_cpus_enabled __read_mostly; @@ -47,19 +77,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ static DEFINE_PER_CPU(struct timer_list, wd_timer); static DEFINE_PER_CPU(u64, wd_timer_tb); -/* - * These are for the SMP checker. CPUs clear their pending bit in their - * heartbeat. If the bitmask becomes empty, the time is noted and the - * bitmask is refilled. - * - * All CPUs clear their bit in the pending mask every timer period. - * Once all have cleared, the time is noted and the bits are reset. - * If the time since all clear was greater than the panic timeout, - * we can panic with the list of stuck CPUs. - * - * This will work best with NMI IPIs for crash code so the stuck CPUs - * can be pulled out to get their backtraces. - */ +/* SMP checker bits */ static unsigned long __wd_smp_lock; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; -- cgit v1.2.3 From bb8651e5ce2c23c36998b263272ab2f9203f5a2e Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 5 Sep 2017 13:59:43 +0200 Subject: powerpc/fsl_pci: Correct fsl_pci_mcheck_exception get_user() had it args reversed causing NIP to be NULL:ed instead of fixing up the PCI access. Note: This still hangs my P1020 Freescale CPU hard, but at least I get a NIP now. Signed-off-by: Joakim Tjernlund Acked-by: Li Yang Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 1e57edd4947a..0c524c20c462 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1070,7 +1070,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) { pagefault_disable(); - ret = get_user(regs->nip, &inst); + ret = get_user(inst, (__u32 __user *)regs->nip); pagefault_enable(); } else { ret = probe_kernel_address((void *)regs->nip, inst); -- cgit v1.2.3 From d038386a0cecaf5283745f951bc35c49bd78da22 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Mon, 27 Nov 2017 22:37:33 +0100 Subject: powerpc/fsl_pci: Fix ptr_ret.cocci warnings arch/powerpc/sysdev/fsl_pci.c:1307:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 0c524c20c462..dd89df7833b6 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1304,10 +1304,8 @@ static int add_err_dev(struct platform_device *pdev) pdev->resource, pdev->num_resources, &pd, sizeof(pd)); - if (IS_ERR(errdev)) - return PTR_ERR(errdev); - return 0; + return PTR_ERR_OR_ZERO(errdev); } static int fsl_pci_probe(struct platform_device *pdev) -- cgit v1.2.3 From 3f38000eda485f466153d37c4f9201fb50cb03a3 Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Fri, 1 Dec 2017 17:19:40 -0600 Subject: powerpc/firmware: Add definitions for new drc-info firmware feature Firmware Features: Define new bit flag representing the presence of new device tree property "ibm,drc-info". The flag is used to tell the front end processor whether the Linux kernel supports the new property, and by the front end processor to tell the Linux kernel that the new property is present in the device tree. Signed-off-by: Michael Bringmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/firmware.h | 4 +++- arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/platforms/pseries/firmware.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 832df61f30ef..511acfd7ab0d 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -68,7 +69,8 @@ enum { FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | - FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2, + FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | + FW_FEATURE_DRC_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 9f27866e3126..2ab5d732fefa 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,6 +160,7 @@ extern int of_get_ibm_chip_id(struct device_node *np); #define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ /* Radix Table Extensions */ #define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ +#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index aac3ea2911b2..a3bbeb43689e 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -115,6 +115,7 @@ vec5_fw_features_table[] = { {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY}, {FW_FEATURE_PRRN, OV5_PRRN}, {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, + {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, }; static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) -- cgit v1.2.3 From e83636ac333441a17436a1fcd196308f59cd0b51 Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Fri, 1 Dec 2017 17:19:43 -0600 Subject: pseries/drc-info: Search DRC properties for CPU indexes pseries/drc-info: Provide parallel routines to convert between drc_index and CPU numbers at runtime, using the older device-tree properties ("ibm,drc-indexes", "ibm,drc-names", "ibm,drc-types" and "ibm,drc-power-domains"), or the new property "ibm,drc-info". Signed-off-by: Michael Bringmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 15 +++ arch/powerpc/platforms/pseries/of_helpers.c | 60 +++++++++++ arch/powerpc/platforms/pseries/pseries_energy.c | 126 ++++++++++++++++++------ 3 files changed, 173 insertions(+), 28 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 2ab5d732fefa..b04c5ce8191b 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -80,6 +80,21 @@ extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); +struct of_drc_info { + char *drc_type; + char *drc_name_prefix; + u32 drc_index_start; + u32 drc_name_suffix_start; + u32 num_sequential_elems; + u32 sequential_inc; + u32 drc_power_domain; + u32 last_drc_index; +}; + +extern int of_read_drc_info_cell(struct property **prop, + const __be32 **curval, struct of_drc_info *data); + + /* * There are two methods for telling firmware what our capabilities are. * Newer machines have an "ibm,client-architecture-support" method on the diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 7e75101fa522..6df192f38f80 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "of_helpers.h" @@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path) kfree(parent_path); return parent ? parent : ERR_PTR(-EINVAL); } + + +/* Helper Routines to convert between drc_index to cpu numbers */ + +int of_read_drc_info_cell(struct property **prop, const __be32 **curval, + struct of_drc_info *data) +{ + const char *p; + const __be32 *p2; + + if (!data) + return -EINVAL; + + /* Get drc-type:encode-string */ + p = data->drc_type = (char*) (*curval); + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-name-prefix:encode-string */ + data->drc_name_prefix = (char *)p; + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-index-start:encode-int */ + p2 = (const __be32 *)p; + p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start); + if (!p2) + return -EINVAL; + + /* Get drc-name-suffix-start:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start); + if (!p2) + return -EINVAL; + + /* Get number-sequential-elements:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems); + if (!p2) + return -EINVAL; + + /* Get sequential-increment:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc); + if (!p2) + return -EINVAL; + + /* Get drc-power-domain:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain); + if (!p2) + return -EINVAL; + + /* Should now know end of current entry */ + (*curval) = (void *)p2; + data->last_drc_index = data->drc_index_start + + ((data->num_sequential_elems - 1) * data->sequential_inc); + + return 0; +} +EXPORT_SYMBOL(of_read_drc_info_cell); diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 35c891aabef0..6ed22127391b 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -22,6 +22,7 @@ #include #include #include +#include #define MODULE_VERS "1.0" @@ -38,26 +39,58 @@ static int sysfs_entries; static u32 cpu_to_drc_index(int cpu) { struct device_node *dn = NULL; - const int *indexes; - int i; + int thread_index; int rc = 1; u32 ret = 0; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; + /* Convert logical cpu number to core number */ - i = cpu_core_index_of_thread(cpu); - /* - * The first element indexes[0] is the number of drc_indexes - * returned in the list. Hence i+1 will get the drc_index - * corresponding to core number i. - */ - WARN_ON(i > indexes[0]); - ret = indexes[i + 1]; + thread_index = cpu_core_index_of_thread(cpu); + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (thread_index < drc.last_drc_index) + break; + } + + ret = drc.drc_index_start + (thread_index * drc.sequential_inc); + } else { + const __be32 *indexes; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + + /* + * The first element indexes[0] is the number of drc_indexes + * returned in the list. Hence thread_index+1 will get the + * drc_index corresponding to core number thread_index. + */ + ret = indexes[thread_index + 1]; + } + rc = 0; err_of_node_put: @@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index) { struct device_node *dn = NULL; const int *indexes; - int i, cpu = 0; + int thread_index = 0, cpu = 0; int rc = 1; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; - /* - * First element in the array is the number of drc_indexes - * returned. Search through the list to find the matching - * drc_index and get the core number - */ - for (i = 0; i < indexes[0]; i++) { - if (indexes[i + 1] == drc_index) + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (drc_index > drc.last_drc_index) { + cpu += drc.num_sequential_elems; + continue; + } + cpu += ((drc_index - drc.drc_index_start) / + drc.sequential_inc); + + thread_index = cpu_first_thread_of_core(cpu); + rc = 0; break; + } + } else { + unsigned long int i; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + /* + * First element in the array is the number of drc_indexes + * returned. Search through the list to find the matching + * drc_index and get the core number + */ + for (i = 0; i < indexes[0]; i++) { + if (indexes[i + 1] == drc_index) + break; + } + /* Convert core number to logical cpu number */ + thread_index = cpu_first_thread_of_core(i); + rc = 0; } - /* Convert core number to logical cpu number */ - cpu = cpu_first_thread_of_core(i); - rc = 0; err_of_node_put: of_node_put(dn); err: if (rc) printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index); - return cpu; + return thread_index; } /* -- cgit v1.2.3 From 02ef6dd8109b581343ebeb1c4c973513682535d6 Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Fri, 1 Dec 2017 17:19:55 -0600 Subject: powerpc: Enable support for ibm,drc-info devtree property To: linuxppc-dev@lists.ozlabs.org From: Michael Bringmann Cc: Michael Bringmann Cc: nfont@linux.vnet.ibm.com Subject: [PATCH V6 4/4] powerpc: Enable support for ibm,drc-info devtree property prom_init.c: Enable support for new DRC device tree property "ibm,drc-info" in initial handshake between the Linux kernel and the front end processor. Signed-off-by: Michael Bringmann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index acf4b2e0530c..adf044daafd7 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,6 +874,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, + .byte22 = OV5_FEAT(OV5_DRC_INFO), }, /* option vector 6: IBM PAPR hints */ -- cgit v1.2.3 From c095ff93f901c1620b28dce4d813dd548bc5236b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 13 Dec 2017 12:26:23 +0100 Subject: powerpc/sysdev: change CPM GPIO to platform_device Since commit 9427ecbed46cc ("gpio: Rework of_gpiochip_set_names() to use device property accessors"), gpio chips have to have a parent, otherwise devprop_gpiochip_set_names() prematurely exists with message "GPIO chip parent is NULL" and doesn't proceed 'gpio-line-names' DT property. This patch wraps the CPM GPIO into a platform driver to allow assignment of the parent device. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cpm.h | 2 +- arch/powerpc/include/asm/cpm1.h | 2 + arch/powerpc/sysdev/Makefile | 3 +- arch/powerpc/sysdev/cpm1.c | 33 ++++------------- arch/powerpc/sysdev/cpm2.c | 11 ------ arch/powerpc/sysdev/cpm_common.c | 5 ++- arch/powerpc/sysdev/cpm_gpio.c | 80 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 97 insertions(+), 39 deletions(-) create mode 100644 arch/powerpc/sysdev/cpm_gpio.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h index b925df1b87d0..4c24ea8209bb 100644 --- a/arch/powerpc/include/asm/cpm.h +++ b/arch/powerpc/include/asm/cpm.h @@ -166,6 +166,6 @@ static inline int cpm_command(u32 command, u8 opcode) } #endif /* CONFIG_CPM */ -int cpm2_gpiochip_add32(struct device_node *np); +int cpm2_gpiochip_add32(struct device *dev); #endif diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index 3db821876d48..a116fe931789 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -605,5 +605,7 @@ enum cpm_clk { }; int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode); +int cpm1_gpiochip_add16(struct device *dev); +int cpm1_gpiochip_add32(struct device *dev); #endif /* __CPM1__ */ diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 0baba21404dc..fc31a271830b 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -43,7 +43,8 @@ obj-$(CONFIG_OF_RTC) += of_rtc.o obj-$(CONFIG_CPM) += cpm_common.o obj-$(CONFIG_CPM1) += cpm1.o -obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o +obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o +obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o obj-$(CONFIG_PPC_DCR) += dcr.o obj-$(CONFIG_UCODE_PATCH) += micropatch.o diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index c6f154b602fb..5240d3a74a10 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -629,8 +629,9 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm1_gpiochip_add16(struct device_node *np) +int cpm1_gpiochip_add16(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm1_gpio16_chip *cpm1_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -660,6 +661,8 @@ int cpm1_gpiochip_add16(struct device_node *np) gc->get = cpm1_gpio16_get; gc->set = cpm1_gpio16_set; gc->to_irq = cpm1_gpio16_to_irq; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); } @@ -755,8 +758,9 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm1_gpiochip_add32(struct device_node *np) +int cpm1_gpiochip_add32(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm1_gpio32_chip *cpm1_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -776,31 +780,10 @@ int cpm1_gpiochip_add32(struct device_node *np) gc->direction_output = cpm1_gpio32_dir_out; gc->get = cpm1_gpio32_get; gc->set = cpm1_gpio32_set; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); } -static int cpm_init_par_io(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a") - cpm1_gpiochip_add16(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b") - cpm1_gpiochip_add32(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c") - cpm1_gpiochip_add16(np); - - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d") - cpm1_gpiochip_add16(np); - - /* Port E uses CPM2 layout */ - for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e") - cpm2_gpiochip_add32(np); - return 0; -} -arch_initcall(cpm_init_par_io); - #endif /* CONFIG_8xx_GPIO */ diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index f78ff841652c..07718b9a2c99 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -354,14 +354,3 @@ void cpm2_set_pin(int port, int pin, int flags) else clrbits32(&iop[port].odr, pin); } - -static int cpm_init_par_io(void) -{ - struct device_node *np; - - for_each_compatible_node(np, NULL, "fsl,cpm2-pario-bank") - cpm2_gpiochip_add32(np); - return 0; -} -arch_initcall(cpm_init_par_io); - diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 51bf749a4f3a..b74508175b67 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -190,8 +190,9 @@ static int cpm2_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } -int cpm2_gpiochip_add32(struct device_node *np) +int cpm2_gpiochip_add32(struct device *dev) { + struct device_node *np = dev->of_node; struct cpm2_gpio32_chip *cpm2_gc; struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; @@ -211,6 +212,8 @@ int cpm2_gpiochip_add32(struct device_node *np) gc->direction_output = cpm2_gpio32_dir_out; gc->get = cpm2_gpio32_get; gc->set = cpm2_gpio32_set; + gc->parent = dev; + gc->owner = THIS_MODULE; return of_mm_gpiochip_add_data(np, mm_gc, cpm2_gc); } diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c new file mode 100644 index 000000000000..0badc90be666 --- /dev/null +++ b/arch/powerpc/sysdev/cpm_gpio.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common CPM GPIO wrapper for the CPM GPIO ports + * + * Author: Christophe Leroy + * + * Copyright 2017 CS Systemes d'Information. + * + */ + +#include +#include + +#include +#ifdef CONFIG_8xx_GPIO +#include +#endif + +static int cpm_gpio_probe(struct platform_device *ofdev) +{ + struct device *dev = &ofdev->dev; + int (*gp_add)(struct device *dev) = of_device_get_match_data(dev); + + if (!gp_add) + return -ENODEV; + + return gp_add(dev); +} + +static const struct of_device_id cpm_gpio_match[] = { +#ifdef CONFIG_8xx_GPIO + { + .compatible = "fsl,cpm1-pario-bank-a", + .data = cpm1_gpiochip_add16, + }, + { + .compatible = "fsl,cpm1-pario-bank-b", + .data = cpm1_gpiochip_add32, + }, + { + .compatible = "fsl,cpm1-pario-bank-c", + .data = cpm1_gpiochip_add16, + }, + { + .compatible = "fsl,cpm1-pario-bank-d", + .data = cpm1_gpiochip_add16, + }, + /* Port E uses CPM2 layout */ + { + .compatible = "fsl,cpm1-pario-bank-e", + .data = cpm2_gpiochip_add32, + }, +#endif + { + .compatible = "fsl,cpm2-pario-bank", + .data = cpm2_gpiochip_add32, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, cpm_gpio_match); + +static struct platform_driver cpm_gpio_driver = { + .probe = cpm_gpio_probe, + .driver = { + .name = "cpm-gpio", + .owner = THIS_MODULE, + .of_match_table = cpm_gpio_match, + }, +}; + +static int __init cpm_gpio_init(void) +{ + return platform_driver_register(&cpm_gpio_driver); +} +arch_initcall(cpm_gpio_init); + +MODULE_AUTHOR("Christophe Leroy "); +MODULE_DESCRIPTION("Driver for CPM GPIO"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:cpm-gpio"); -- cgit v1.2.3 From 76b03dc07eebe9c8829bc4c40ae357ad04b4b8c1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 6 Nov 2017 17:57:44 +0530 Subject: powerpc/mm: Remove unused flag arg in global_invalidates Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 26c11f678fbf..8888e625a999 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x) } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ -static int global_invalidates(struct kvm *kvm, unsigned long flags) +static int global_invalidates(struct kvm *kvm) { int global; int cpu; @@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); rb = compute_tlbie_rb(v, pte_r, pte_index); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* * The reference (R) and change (C) bits in a HPT * entry can be set by hardware at any time up until @@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) if (kvm_is_radix(kvm)) return H_FUNCTION; - global = global_invalidates(kvm, 0); + global = global_invalidates(kvm); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; for (; i < 4; ++i) { @@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), - true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* Don't lose R/C bit updates done by hardware */ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); -- cgit v1.2.3 From 10527e808123f4b12db604993638b368220e7814 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 28 Nov 2017 14:04:40 +0530 Subject: powerpc/hash: Skip non initialized page size in init_hpte_page_sizes One of the easiest way to test config with 4K HPTE is to disable 64K hardware page size like below. int __init htab_dt_scan_page_sizes(unsigned long node, size -= 3; prop += 3; base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { + if (base_idx < 0 || base_idx == MMU_PAGE_64K) { /* skip the pte encoding also */ prop += lpnum * 2; size -= lpnum * 2; But then this results in error in other part of the code such as MPSS parsing where we look at 4K base page size and 64K actual page size support. This patch fix MPSS parsing by ignoring the actual page sizes marked unsupported. In reality this can happen only with a corrupt device tree. But it is good to tighten the error check. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 462c34e7b01d..d97be8d20715 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -608,7 +608,7 @@ static void init_hpte_page_sizes(void) continue; /* not a supported page size */ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { penc = mmu_psize_defs[bp].penc[ap]; - if (penc == -1) + if (penc == -1 || !mmu_psize_defs[ap].shift) continue; shift = mmu_psize_defs[ap].shift - LP_SHIFT; if (shift <= 0) -- cgit v1.2.3 From 38833faa112c10dd6e3d888f8b53de2b80cb881a Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 26 Dec 2017 14:00:17 +0100 Subject: powerpc/xive: Properly use static keyword for inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix fatal warning during compilation: In file included from arch/powerpc/xmon/xmon.c:54:0: ./arch/powerpc/include/asm/xive.h:157:20: error: no previous prototype for ‘xive_smp_prepare_cpu’ [-Werror=missing-prototypes] extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } ^ Signed-off-by: Mathieu Malaterre Reviewed-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xive.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index b619a5585cd6..7624e22f5045 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -122,7 +122,7 @@ static inline bool xive_enabled(void) { return false; } static inline bool xive_spapr_init(void) { return false; } static inline bool xive_native_init(void) { return false; } static inline void xive_smp_probe(void) { } -extern inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } +static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } static inline void xive_smp_setup_cpu(void) { } static inline void xive_smp_disable_cpu(void) { } static inline void xive_kexec_teardown_cpu(int secondary) { } -- cgit v1.2.3 From 104d55ae4df928c205268c1eb83b806b53027e13 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 26 Dec 2017 14:25:47 +0100 Subject: powerpc/xmon: Do not compute/store the major opcode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5b102782c7f4 ("powerpc/xmon: Enable disassembly files (compilation changes)") usage of variable `op` has been removed. Completely remove opcode computation since not used anymore. Fix fatal warning: arch/powerpc/xmon/ppc-dis.c: In function ‘lookup_powerpc’: arch/powerpc/xmon/ppc-dis.c:96:17: error: variable ‘op’ set but not used [-Werror=unused-but-set-variable] unsigned long op; ^~ Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/ppc-dis.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c index 31db8c072acd..9deea5ee13f6 100644 --- a/arch/powerpc/xmon/ppc-dis.c +++ b/arch/powerpc/xmon/ppc-dis.c @@ -93,10 +93,6 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect) { const struct powerpc_opcode *opcode; const struct powerpc_opcode *opcode_end; - unsigned long op; - - /* Get the major opcode of the instruction. */ - op = PPC_OP (insn); opcode_end = powerpc_opcodes + powerpc_num_opcodes; /* Find the first match in the opcode table for this major opcode. */ -- cgit v1.2.3 From 4a7b8a499724c98493023e1e3009aab005968feb Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 26 Dec 2017 15:12:33 +0100 Subject: powerpc: Fix old-style function definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings such as: arch/powerpc/platforms/powermac/backlight.c: In function ‘pmac_backlight_get_legacy_brightness’: arch/powerpc/platforms/powermac/backlight.c:189:5: error: old-style function definition [-Werror=old-style-definition] int pmac_backlight_get_legacy_brightness() ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/backlight.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index a00096b1c713..6b5dcccae1d3 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -186,7 +186,7 @@ int pmac_backlight_set_legacy_brightness(int brightness) return __pmac_backlight_set_legacy_brightness(brightness); } -int pmac_backlight_get_legacy_brightness() +int pmac_backlight_get_legacy_brightness(void) { int result = -ENXIO; @@ -205,12 +205,12 @@ int pmac_backlight_get_legacy_brightness() return result; } -void pmac_backlight_disable() +void pmac_backlight_disable(void) { atomic_inc(&kernel_backlight_disabled); } -void pmac_backlight_enable() +void pmac_backlight_enable(void) { atomic_dec(&kernel_backlight_disabled); } -- cgit v1.2.3 From 600ecc1936be075f611f299755e2de90b205eb82 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 14 Dec 2017 17:54:00 +0100 Subject: powerpc/boot/dts: Remove leading 0x and 0s from bindings notation Improve the DTS files by removing all the leading "0x" and zeros to fix the following dtc warnings: Warning (unit_address_format): Node /XXX unit name should not have leading "0x" and: Warning (unit_address_format): Node /XXX unit name should not have leading 0s Converted using the following command: find . -type f \( -iname *.dts -o -iname *.dtsi \) -exec sed -E -i -e "s/@0x([0-9a-fA-F\.]+)\s?\{/@\L\1 \{/g" -e "s/@0+([0-9a-fA-F\.]+)\s?\{/@\L\1 \{/g" {} + For simplicity, two sed expressions were used to solve each warnings separately. To make the regex expression more robust a few other issues were resolved, namely setting unit-address to lower case, and adding a whitespace before the the opening curly brace: https://elinux.org/Device_Tree_Linux#Linux_conventions This is a follow up to commit 4c9847b7375a ("dt-bindings: Remove leading 0x from bindings notation") Reported-by: David Daney Suggested-by: Rob Herring Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/boot/dts/a3m071.dts | 10 +++++----- arch/powerpc/boot/dts/akebono.dts | 4 ++-- arch/powerpc/boot/dts/c2k.dts | 6 +++--- arch/powerpc/boot/dts/currituck.dts | 2 +- arch/powerpc/boot/dts/fsl/mpc8568mds.dts | 12 +++++------ arch/powerpc/boot/dts/fsl/mpc8569mds.dts | 20 +++++++++---------- arch/powerpc/boot/dts/fsl/p1021mds.dts | 6 +++--- arch/powerpc/boot/dts/fsl/p1025rdb.dtsi | 8 ++++---- arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts | 2 +- arch/powerpc/boot/dts/fsl/p1025twr.dtsi | 8 ++++---- arch/powerpc/boot/dts/fsl/t1040rdb.dts | 2 +- arch/powerpc/boot/dts/fsl/t1042d4rdb.dts | 10 +++++----- arch/powerpc/boot/dts/fsl/t1042rdb.dts | 2 +- arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 6 +++--- arch/powerpc/boot/dts/fsp2.dts | 6 +++--- arch/powerpc/boot/dts/gamecube.dts | 14 ++++++------- arch/powerpc/boot/dts/haleakala.dts | 2 +- arch/powerpc/boot/dts/kilauea.dts | 4 ++-- arch/powerpc/boot/dts/kmeter1.dts | 10 +++++----- arch/powerpc/boot/dts/makalu.dts | 4 ++-- arch/powerpc/boot/dts/mpc832x_mds.dts | 10 +++++----- arch/powerpc/boot/dts/mpc832x_rdb.dts | 8 ++++---- arch/powerpc/boot/dts/mpc836x_mds.dts | 8 ++++---- arch/powerpc/boot/dts/sbc8548-altflash.dts | 8 ++++---- arch/powerpc/boot/dts/sbc8548.dts | 8 ++++---- arch/powerpc/boot/dts/wii.dts | 32 +++++++++++++++--------------- 26 files changed, 106 insertions(+), 106 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts index bf81b8f9704c..187ce458d03a 100644 --- a/arch/powerpc/boot/dts/a3m071.dts +++ b/arch/powerpc/boot/dts/a3m071.dts @@ -105,24 +105,24 @@ reg = <0 0x0 0x02000000>; compatible = "cfi-flash"; bank-width = <2>; - partition@0x0 { + partition@0 { label = "u-boot"; reg = <0x00000000 0x00040000>; read-only; }; - partition@0x00040000 { + partition@40000 { label = "env"; reg = <0x00040000 0x00020000>; }; - partition@0x00060000 { + partition@60000 { label = "dtb"; reg = <0x00060000 0x00020000>; }; - partition@0x00080000 { + partition@80000 { label = "kernel"; reg = <0x00080000 0x00500000>; }; - partition@0x00580000 { + partition@580000 { label = "root"; reg = <0x00580000 0x00A80000>; }; diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index e61d5dc598c1..746779202a12 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -216,7 +216,7 @@ interrupts = <39 2>; }; - IIC0: i2c@00000000 { + IIC0: i2c@0 { compatible = "ibm,iic-476gtr", "ibm,iic"; reg = <0x0 0x00000020>; interrupt-parent = <&MPIC>; @@ -229,7 +229,7 @@ }; }; - IIC1: i2c@00000100 { + IIC1: i2c@100 { compatible = "ibm,iic-476gtr", "ibm,iic"; reg = <0x100 0x00000020>; interrupt-parent = <&MPIC>; diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts index 1e32903cb0a8..27f169e3ade9 100644 --- a/arch/powerpc/boot/dts/c2k.dts +++ b/arch/powerpc/boot/dts/c2k.dts @@ -276,14 +276,14 @@ >; }; - cpu-error@0070 { + cpu-error@70 { compatible = "marvell,mv64360-cpu-error"; reg = <0x0070 0x10 0x0128 0x28>; interrupts = <3>; interrupt-parent = <&PIC>; }; - sram-ctrl@0380 { + sram-ctrl@380 { compatible = "marvell,mv64360-sram-ctrl"; reg = <0x0380 0x80>; interrupts = <13>; @@ -311,7 +311,7 @@ interrupt-parent = <&PIC>; }; /* Devices attached to the device controller */ - devicebus@045c { + devicebus@45c { #address-cells = <2>; #size-cells = <1>; compatible = "marvell,mv64306-devctrl"; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index 4191e1850ea1..f2ad5815f08d 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -108,7 +108,7 @@ reg = <0x50000000 0x4>; }; - IIC0: i2c@00000000 { + IIC0: i2c@0 { compatible = "ibm,iic-currituck", "ibm,iic"; reg = <0x0 0x00000014>; interrupt-parent = <&MPIC>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts index 01706a339603..bc3e8039bdc7 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8568mds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts @@ -126,7 +126,7 @@ par_io@e0100 { num-ports = <7>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x4 0xa 0x1 0x0 0x2 0x0 /* TxD0 */ @@ -154,7 +154,7 @@ 0x1 0x1f 0x2 0x0 0x3 0x0>; /* GTX125 */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x5 0xa 0x1 0x0 0x2 0x0 /* TxD0 */ @@ -228,22 +228,22 @@ /* These are the same PHYs as on * gianfar's MDIO bus */ - qe_phy0: ethernet-phy@07 { + qe_phy0: ethernet-phy@7 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x7>; }; - qe_phy1: ethernet-phy@01 { + qe_phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x1>; }; - qe_phy2: ethernet-phy@02 { + qe_phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x2>; }; - qe_phy3: ethernet-phy@03 { + qe_phy3: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts index 76b2bd6f7742..d8367ceddea6 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8569mds.dts +++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts @@ -141,7 +141,7 @@ gpio-controller; }; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -161,7 +161,7 @@ 0x2 0x14 0x1 0x0 0x2 0x0>; /* ENET1_GTXCLK */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -181,7 +181,7 @@ 0x2 0x2 0x1 0x0 0x2 0x0>; /* ENET2_GTXCLK */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -201,7 +201,7 @@ 0x2 0x19 0x1 0x0 0x2 0x0>; /* ENET3_GTXCLK */ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -272,30 +272,30 @@ reg = <0x2120 0x18>; compatible = "fsl,ucc-mdio"; - qe_phy0: ethernet-phy@07 { + qe_phy0: ethernet-phy@7 { interrupt-parent = <&mpic>; interrupts = <1 1 0 0>; reg = <0x7>; }; - qe_phy1: ethernet-phy@01 { + qe_phy1: ethernet-phy@1 { interrupt-parent = <&mpic>; interrupts = <2 1 0 0>; reg = <0x1>; }; - qe_phy2: ethernet-phy@02 { + qe_phy2: ethernet-phy@2 { interrupt-parent = <&mpic>; interrupts = <3 1 0 0>; reg = <0x2>; }; - qe_phy3: ethernet-phy@03 { + qe_phy3: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <4 1 0 0>; reg = <0x3>; }; - qe_phy5: ethernet-phy@04 { + qe_phy5: ethernet-phy@4 { reg = <0x04>; }; - qe_phy7: ethernet-phy@06 { + qe_phy7: ethernet-phy@6 { reg = <0x6>; }; tbi1: tbi-phy@11 { diff --git a/arch/powerpc/boot/dts/fsl/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts index 291454c75dda..1047802f4d2a 100644 --- a/arch/powerpc/boot/dts/fsl/p1021mds.dts +++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts @@ -202,7 +202,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -225,7 +225,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -296,7 +296,7 @@ interrupts = <4 1 0 0>; reg = <0x0>; }; - qe_phy1: ethernet-phy@03 { + qe_phy1: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <5 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi index d44bb12debb0..0a5434a631c3 100644 --- a/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi @@ -245,7 +245,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -268,7 +268,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -283,7 +283,7 @@ 0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/ @@ -293,7 +293,7 @@ 0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/ diff --git a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts index b15acbaea34b..ea33b57f8774 100644 --- a/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts +++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts @@ -106,7 +106,7 @@ interrupts = <4 1 0 0>; reg = <0x6>; }; - qe_phy1: ethernet-phy@03 { + qe_phy1: ethernet-phy@3 { interrupt-parent = <&mpic>; interrupts = <5 1 0 0>; reg = <0x3>; diff --git a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi index 08816fb474f5..ab75b8f29ae2 100644 --- a/arch/powerpc/boot/dts/fsl/p1025twr.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi @@ -172,7 +172,7 @@ ranges = <0x0 0xe0100 0x60>; device_type = "par_io"; num-ports = <3>; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -195,7 +195,7 @@ 0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */ @@ -210,7 +210,7 @@ 0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */ }; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/ @@ -220,7 +220,7 @@ 0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/ diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index 621f2c6ee6ad..65ff34c49025 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -61,7 +61,7 @@ }; mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts index fcd2aeb5b8ac..4fa15f48a4c3 100644 --- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts @@ -77,23 +77,23 @@ }; mdio0: mdio@fc000 { - phy_sgmii_0: ethernet-phy@02 { + phy_sgmii_0: ethernet-phy@2 { reg = <0x02>; }; - phy_sgmii_1: ethernet-phy@03 { + phy_sgmii_1: ethernet-phy@3 { reg = <0x03>; }; - phy_sgmii_2: ethernet-phy@01 { + phy_sgmii_2: ethernet-phy@1 { reg = <0x01>; }; - phy_rgmii_0: ethernet-phy@04 { + phy_rgmii_0: ethernet-phy@4 { reg = <0x04>; }; - phy_rgmii_1: ethernet-phy@05 { + phy_rgmii_1: ethernet-phy@5 { reg = <0x05>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts index 2c138627b1b4..3ebb712224cb 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts @@ -59,7 +59,7 @@ }; mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 5fdddbd2a62b..099a598c74c0 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -148,15 +148,15 @@ }; mdio0: mdio@fc000 { - phy_sgmii_2: ethernet-phy@03 { + phy_sgmii_2: ethernet-phy@3 { reg = <0x03>; }; - phy_rgmii_0: ethernet-phy@01 { + phy_rgmii_0: ethernet-phy@1 { reg = <0x01>; }; - phy_rgmii_1: ethernet-phy@02 { + phy_rgmii_1: ethernet-phy@2 { reg = <0x02>; }; }; diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts index f10a64aeb83b..6560283c5aec 100644 --- a/arch/powerpc/boot/dts/fsp2.dts +++ b/arch/powerpc/boot/dts/fsp2.dts @@ -583,21 +583,21 @@ }; }; - OHCI1: ohci@02040000 { + OHCI1: ohci@2040000 { compatible = "ohci-le"; reg = <0x02040000 0xa0>; interrupt-parent = <&UIC1_3>; interrupts = <28 0x8 29 0x8>; }; - OHCI2: ohci@02080000 { + OHCI2: ohci@2080000 { compatible = "ohci-le"; reg = <0x02080000 0xa0>; interrupt-parent = <&UIC1_3>; interrupts = <30 0x8 31 0x8>; }; - EHCI: ehci@02000000 { + EHCI: ehci@2000000 { compatible = "usb-ehci"; reg = <0x02000000 0xa4>; interrupt-parent = <&UIC1_3>; diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts index ef3be0e58b02..58d06c9ee08b 100644 --- a/arch/powerpc/boot/dts/gamecube.dts +++ b/arch/powerpc/boot/dts/gamecube.dts @@ -54,13 +54,13 @@ ranges = <0x0c000000 0x0c000000 0x00010000>; interrupt-parent = <&PIC>; - video@0c002000 { + video@c002000 { compatible = "nintendo,flipper-vi"; reg = <0x0c002000 0x100>; interrupts = <8>; }; - processor-interface@0c003000 { + processor-interface@c003000 { compatible = "nintendo,flipper-pi"; reg = <0x0c003000 0x100>; @@ -71,7 +71,7 @@ }; }; - dsp@0c005000 { + dsp@c005000 { #address-cells = <1>; #size-cells = <1>; compatible = "nintendo,flipper-dsp"; @@ -84,26 +84,26 @@ }; }; - disk@0c006000 { + disk@c006000 { compatible = "nintendo,flipper-di"; reg = <0x0c006000 0x40>; interrupts = <2>; }; - audio@0c006c00 { + audio@c006c00 { compatible = "nintendo,flipper-ai"; reg = <0x0c006c00 0x20>; interrupts = <6>; }; - gamepad-controller@0c006400 { + gamepad-controller@c006400 { compatible = "nintendo,flipper-si"; reg = <0x0c006400 0x100>; interrupts = <3>; }; /* External Interface bus */ - exi@0c006800 { + exi@c006800 { compatible = "nintendo,flipper-exi"; reg = <0x0c006800 0x40>; virtual-reg = <0x0c006800>; diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts index 2b256694eca6..cb16dad43c92 100644 --- a/arch/powerpc/boot/dts/haleakala.dts +++ b/arch/powerpc/boot/dts/haleakala.dts @@ -237,7 +237,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 5ba7f01e2a29..2a3413221cc1 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -322,7 +322,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -363,7 +363,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@0c0000000 { + PCIE1: pciex@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts index 983aee185793..9fa33d9ba966 100644 --- a/arch/powerpc/boot/dts/kmeter1.dts +++ b/arch/powerpc/boot/dts/kmeter1.dts @@ -434,27 +434,27 @@ compatible = "fsl,ucc-mdio"; /* Piggy2 (UCC4, MDIO 0x00, RMII) */ - phy_piggy2: ethernet-phy@00 { + phy_piggy2: ethernet-phy@0 { reg = <0x0>; }; /* Eth-1 (UCC5, MDIO 0x08, RMII) */ - phy_eth1: ethernet-phy@08 { + phy_eth1: ethernet-phy@8 { reg = <0x08>; }; /* Eth-2 (UCC6, MDIO 0x09, RMII) */ - phy_eth2: ethernet-phy@09 { + phy_eth2: ethernet-phy@9 { reg = <0x09>; }; /* Eth-3 (UCC7, MDIO 0x0a, RMII) */ - phy_eth3: ethernet-phy@0a { + phy_eth3: ethernet-phy@a { reg = <0x0a>; }; /* Eth-4 (UCC8, MDIO 0x0b, RMII) */ - phy_eth4: ethernet-phy@0b { + phy_eth4: ethernet-phy@b { reg = <0x0b>; }; diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts index 63d48b632c84..bf8fe1629392 100644 --- a/arch/powerpc/boot/dts/makalu.dts +++ b/arch/powerpc/boot/dts/makalu.dts @@ -268,7 +268,7 @@ }; }; - PCIE0: pciex@0a0000000 { + PCIE0: pciex@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -309,7 +309,7 @@ 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@0c0000000 { + PCIE1: pciex@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts index 0793cdf0d46e..49c7d657118a 100644 --- a/arch/powerpc/boot/dts/mpc832x_mds.dts +++ b/arch/powerpc/boot/dts/mpc832x_mds.dts @@ -186,7 +186,7 @@ device_type = "par_io"; num-ports = <7>; - pio3: ucc_pin@03 { + pio3: ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 4 3 0 2 0 /* MDIO */ @@ -208,7 +208,7 @@ 1 12 1 0 1 0 /* TX_EN */ 1 13 2 0 1 0>; /* CRS */ }; - pio4: ucc_pin@04 { + pio4: ucc_pin@4 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 31 2 0 1 0 /* RX_CLK (CLK7) */ @@ -228,7 +228,7 @@ 1 30 1 0 1 0 /* TX_EN */ 1 31 2 0 1 0>; /* CRS */ }; - pio5: ucc_pin@05 { + pio5: ucc_pin@5 { pio-map = < /* * open has @@ -352,12 +352,12 @@ reg = <0x2320 0x18>; compatible = "fsl,ucc-mdio"; - phy3: ethernet-phy@03 { + phy3: ethernet-phy@3 { interrupt-parent = <&ipic>; interrupts = <17 0x8>; reg = <0x3>; }; - phy4: ethernet-phy@04 { + phy4: ethernet-phy@4 { interrupt-parent = <&ipic>; interrupts = <18 0x8>; reg = <0x4>; diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index 91df1eb16667..647cae14c16d 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -175,7 +175,7 @@ gpio-controller; }; - ucc2pio:ucc_pin@02 { + ucc2pio:ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 3 4 3 0 2 0 /* MDIO */ @@ -197,7 +197,7 @@ 0 30 1 0 1 0 /* TX_EN */ 0 31 2 0 1 0>; /* CRS */ }; - ucc3pio:ucc_pin@03 { + ucc3pio:ucc_pin@3 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 13 2 0 1 0 /* RX_CLK (CLK9) */ @@ -310,12 +310,12 @@ reg = <0x3120 0x18>; compatible = "fsl,ucc-mdio"; - phy00:ethernet-phy@00 { + phy00:ethernet-phy@0 { interrupt-parent = <&ipic>; interrupts = <0>; reg = <0x0>; }; - phy04:ethernet-phy@04 { + phy04:ethernet-phy@4 { interrupt-parent = <&ipic>; interrupts = <0>; reg = <0x4>; diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts index ecb6ccd3a6aa..539fd9f72eda 100644 --- a/arch/powerpc/boot/dts/mpc836x_mds.dts +++ b/arch/powerpc/boot/dts/mpc836x_mds.dts @@ -228,7 +228,7 @@ gpio-controller; }; - pio1: ucc_pin@01 { + pio1: ucc_pin@1 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 3 1 0 1 0 /* TxD0 */ @@ -255,7 +255,7 @@ 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ 2 8 2 0 1 0>; /* GTX125 - CLK9 */ }; - pio2: ucc_pin@02 { + pio2: ucc_pin@2 { pio-map = < /* port pin dir open_drain assignment has_irq */ 0 17 1 0 1 0 /* TxD0 */ @@ -393,12 +393,12 @@ reg = <0x2120 0x18>; compatible = "fsl,ucc-mdio"; - phy0: ethernet-phy@00 { + phy0: ethernet-phy@0 { interrupt-parent = <&ipic>; interrupts = <17 0x8>; reg = <0x0>; }; - phy1: ethernet-phy@01 { + phy1: ethernet-phy@1 { interrupt-parent = <&ipic>; interrupts = <18 0x8>; reg = <0x1>; diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts index 0b38a0defd2c..8967a56adad4 100644 --- a/arch/powerpc/boot/dts/sbc8548-altflash.dts +++ b/arch/powerpc/boot/dts/sbc8548-altflash.dts @@ -40,12 +40,12 @@ compatible = "intel,JS28F128", "cfi-flash"; bank-width = <4>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* FC000000 -> FFEFFFFF */ reg = <0x00000000 0x03f00000>; }; - partition@0x03f00000 { + partition@3f00000 { label = "bootloader"; /* FFF00000 -> FFFFFFFF */ reg = <0x03f00000 0x00100000>; @@ -95,12 +95,12 @@ reg = <0x6 0x0 0x800000>; bank-width = <1>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* EF800000 -> EFF9FFFF */ reg = <0x00000000 0x007a0000>; }; - partition@0x7a0000 { + partition@7a0000 { label = "bootloader"; /* EFFA0000 -> EFFFFFFF */ reg = <0x007a0000 0x00060000>; diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts index 1df2a0955668..9bdb828a504e 100644 --- a/arch/powerpc/boot/dts/sbc8548.dts +++ b/arch/powerpc/boot/dts/sbc8548.dts @@ -38,12 +38,12 @@ reg = <0x0 0x0 0x800000>; bank-width = <1>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* FF800000 -> FFF9FFFF */ reg = <0x00000000 0x007a0000>; }; - partition@0x7a0000 { + partition@7a0000 { label = "bootloader"; /* FFFA0000 -> FFFFFFFF */ reg = <0x007a0000 0x00060000>; @@ -92,12 +92,12 @@ compatible = "intel,JS28F128", "cfi-flash"; bank-width = <4>; device-width = <1>; - partition@0x0 { + partition@0 { label = "space"; /* EC000000 -> EFEFFFFF */ reg = <0x00000000 0x03f00000>; }; - partition@0x03f00000 { + partition@3f00000 { label = "bootloader"; /* EFF00000 -> EFFFFFFF */ reg = <0x03f00000 0x00100000>; diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts index 77528c9a8dbd..17a5babb098d 100644 --- a/arch/powerpc/boot/dts/wii.dts +++ b/arch/powerpc/boot/dts/wii.dts @@ -65,14 +65,14 @@ 0x0d800000 0x0d800000 0x00800000>; interrupt-parent = <&PIC0>; - video@0c002000 { + video@c002000 { compatible = "nintendo,hollywood-vi", "nintendo,flipper-vi"; reg = <0x0c002000 0x100>; interrupts = <8>; }; - processor-interface@0c003000 { + processor-interface@c003000 { compatible = "nintendo,hollywood-pi", "nintendo,flipper-pi"; reg = <0x0c003000 0x100>; @@ -84,7 +84,7 @@ }; }; - dsp@0c005000 { + dsp@c005000 { #address-cells = <1>; #size-cells = <1>; compatible = "nintendo,hollywood-dsp", @@ -93,14 +93,14 @@ interrupts = <6>; }; - gamepad-controller@0d006400 { + gamepad-controller@d006400 { compatible = "nintendo,hollywood-si", "nintendo,flipper-si"; reg = <0x0d006400 0x100>; interrupts = <3>; }; - audio@0c006c00 { + audio@c006c00 { compatible = "nintendo,hollywood-ai", "nintendo,flipper-ai"; reg = <0x0d006c00 0x20>; @@ -108,7 +108,7 @@ }; /* External Interface bus */ - exi@0d006800 { + exi@d006800 { compatible = "nintendo,hollywood-exi", "nintendo,flipper-exi"; reg = <0x0d006800 0x40>; @@ -116,7 +116,7 @@ interrupts = <4>; }; - usb@0d040000 { + usb@d040000 { compatible = "nintendo,hollywood-usb-ehci", "usb-ehci"; reg = <0x0d040000 0x100>; @@ -124,7 +124,7 @@ interrupt-parent = <&PIC1>; }; - usb@0d050000 { + usb@d050000 { compatible = "nintendo,hollywood-usb-ohci", "usb-ohci"; reg = <0x0d050000 0x100>; @@ -132,7 +132,7 @@ interrupt-parent = <&PIC1>; }; - usb@0d060000 { + usb@d060000 { compatible = "nintendo,hollywood-usb-ohci", "usb-ohci"; reg = <0x0d060000 0x100>; @@ -140,7 +140,7 @@ interrupt-parent = <&PIC1>; }; - sd@0d070000 { + sd@d070000 { compatible = "nintendo,hollywood-sdhci", "sdhci"; reg = <0x0d070000 0x200>; @@ -148,7 +148,7 @@ interrupt-parent = <&PIC1>; }; - sdio@0d080000 { + sdio@d080000 { compatible = "nintendo,hollywood-sdhci", "sdhci"; reg = <0x0d080000 0x200>; @@ -156,14 +156,14 @@ interrupt-parent = <&PIC1>; }; - ipc@0d000000 { + ipc@d000000 { compatible = "nintendo,hollywood-ipc"; reg = <0x0d000000 0x10>; interrupts = <30>; interrupt-parent = <&PIC1>; }; - PIC1: pic1@0d800030 { + PIC1: pic1@d800030 { #interrupt-cells = <1>; compatible = "nintendo,hollywood-pic"; reg = <0x0d800030 0x10>; @@ -171,7 +171,7 @@ interrupts = <14>; }; - GPIO: gpio@0d8000c0 { + GPIO: gpio@d8000c0 { #gpio-cells = <2>; compatible = "nintendo,hollywood-gpio"; reg = <0x0d8000c0 0x40>; @@ -203,12 +203,12 @@ */ }; - control@0d800100 { + control@d800100 { compatible = "nintendo,hollywood-control"; reg = <0x0d800100 0x300>; }; - disk@0d806000 { + disk@d806000 { compatible = "nintendo,hollywood-di"; reg = <0x0d806000 0x40>; interrupts = <2>; -- cgit v1.2.3 From 57ad583f2086d55ada284c54bfc440123cf73964 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 12 Jan 2017 14:54:13 +1100 Subject: powerpc: Use octal numbers for file permissions Symbolic macros are unintuitive and hard to read, whereas octal constants are much easier to interpret. Replace macros for the basic permission flags (user/group/other read/write/execute) with numeric constants instead, across the whole powerpc tree. Introducing a significant number of changes across the tree for no runtime benefit isn't exactly desirable, but so long as these macros are still used in the tree people will keep sending patches that add them. Not only are they hard to parse at a glance, there are multiple ways of coming to the same value (as you can see with 0444 and 0644 in this patch) which hurts readability. Signed-off-by: Russell Currey Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_sysfs.c | 2 +- arch/powerpc/kernel/proc_powerpc.c | 2 +- arch/powerpc/kernel/rtas-proc.c | 14 +++++++------- arch/powerpc/kernel/rtas_flash.c | 2 +- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/kernel/traps.c | 4 ++-- arch/powerpc/kvm/book3s_hv.c | 10 ++++------ arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 2 +- arch/powerpc/platforms/cell/spufs/inode.c | 4 ++-- arch/powerpc/platforms/powernv/opal-dump.c | 4 ++-- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++-- arch/powerpc/platforms/powernv/opal-sysparam.c | 6 +++--- arch/powerpc/platforms/pseries/cmm.c | 16 ++++++++-------- arch/powerpc/platforms/pseries/hvCall_inst.c | 2 +- arch/powerpc/platforms/pseries/ibmebus.c | 4 ++-- arch/powerpc/platforms/pseries/lparcfg.c | 4 ++-- arch/powerpc/platforms/pseries/mobility.c | 2 +- arch/powerpc/platforms/pseries/reconfig.c | 2 +- arch/powerpc/platforms/pseries/scanlog.c | 2 +- arch/powerpc/platforms/pseries/suspend.c | 3 +-- arch/powerpc/sysdev/axonram.c | 2 +- arch/powerpc/sysdev/mv64x60_pci.c | 2 +- 23 files changed, 47 insertions(+), 50 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 797549289798..2fb6a2d09461 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -48,7 +48,7 @@ static ssize_t eeh_show_##_name(struct device *dev, \ \ return sprintf(buf, _format "\n", edev->_memb); \ } \ -static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); +static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 56548bf6231f..9bfbd800d32f 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -63,7 +63,7 @@ static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; - pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL, + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, &page_map_fops, vdso_data); if (!pde) return 1; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index c8c5f3a550c2..fb070d8cad07 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -261,19 +261,19 @@ static int __init proc_rtas_init(void) if (rtas_node == NULL) return -ENODEV; - proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/progress", 0644, NULL, &ppc_rtas_progress_operations); - proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL, + proc_create("powerpc/rtas/clock", 0644, NULL, &ppc_rtas_clock_operations); - proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/poweron", 0644, NULL, &ppc_rtas_poweron_operations); - proc_create("powerpc/rtas/sensors", S_IRUGO, NULL, + proc_create("powerpc/rtas/sensors", 0444, NULL, &ppc_rtas_sensors_operations); - proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/frequency", 0644, NULL, &ppc_rtas_tone_freq_operations); - proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL, + proc_create("powerpc/rtas/volume", 0644, NULL, &ppc_rtas_tone_volume_operations); - proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL, + proc_create("powerpc/rtas/rmo_buffer", 0400, NULL, &ppc_rtas_rmo_buf_ops); return 0; } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index f6f6a8a5103a..10fabae2574d 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -727,7 +727,7 @@ static int __init rtas_flash_init(void) const struct rtas_flash_file *f = &rtas_flash_files[i]; int token; - if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops)) + if (!proc_create(f->filename, 0600, NULL, &f->fops)) goto enomem; /* diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 0f0b1b2f3b60..7ae4b8ba7fd3 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -581,7 +581,7 @@ static int __init rtas_init(void) if (!rtas_log_buf) return -ENODEV; - entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, + entry = proc_create("powerpc/rtas/error_log", 0400, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 122a3c883f4e..cb07d6519783 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2130,13 +2130,13 @@ static int __init ppc_warn_emulated_init(void) if (!dir) return -ENOMEM; - d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir, + d = debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated); if (!d) goto fail; for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) { - d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir, + d = debugfs_create_u32(entries[i].name, 0644, dir, (u32 *)&entries[i].val.counter); if (!d) goto fail; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2d46037ce936..e4f70c33fbc7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -93,10 +93,10 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); static int dynamic_mt_modes = 6; -module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR); +module_param(dynamic_mt_modes, int, 0644); MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)"); static int target_smt_mode; -module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); +module_param(target_smt_mode, int, 0644); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); static bool indep_threads_mode = true; @@ -109,12 +109,10 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; -module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, - S_IRUGO | S_IWUSR); +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644); MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); -module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, - S_IRUGO | S_IWUSR); +module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d329b2add7e2..b8356cdc0c04 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1039,7 +1039,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) return; } - xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, xics, &xics_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 96bb55ca61d3..d2ef39f0edc8 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -84,7 +84,7 @@ static ssize_t show_status(struct device *d, return sprintf(buf, "%02x\n", ret); } -static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); +static DEVICE_ATTR(status, 0444, show_status, NULL); static void mcu_power_off(void) { diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 9558d725a99b..db329d4bf1c3 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -455,7 +455,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, } } - ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); + ret = spufs_mkdir(inode, dentry, flags, mode & 0777); if (ret) goto out_aff_unlock; @@ -546,7 +546,7 @@ static int spufs_create_gang(struct inode *inode, struct path path = {.mnt = mnt, .dentry = dentry}; int ret; - ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); + ret = spufs_mkgang(inode, dentry, mode & 0777); if (!ret) { ret = spufs_gang_open(&path); if (ret < 0) { diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 4c827826c05e..0dc8fa4e0af2 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj, * due to the dynamic size of the dump */ static struct dump_attribute id_attribute = - __ATTR(id, S_IRUGO, dump_id_show, NULL); + __ATTR(id, 0444, dump_id_show, NULL); static struct dump_attribute type_attribute = - __ATTR(type, S_IRUGO, dump_type_show, NULL); + __ATTR(type, 0444, dump_type_show, NULL); static struct dump_attribute ack_attribute = __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ecd6d9177d13..ba6e437abb4b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj, } static struct elog_attribute id_attribute = - __ATTR(id, S_IRUGO, elog_id_show, NULL); + __ATTR(id, 0444, elog_id_show, NULL); static struct elog_attribute type_attribute = - __ATTR(type, S_IRUGO, elog_type_show, NULL); + __ATTR(type, 0444, elog_type_show, NULL); static struct elog_attribute ack_attribute = __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 23fb6647dced..6fd4092798d5 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,13 +260,13 @@ void __init opal_sys_param_init(void) /* If the parameter is read-only or read-write */ switch (perm[i] & 3) { case OPAL_SYSPARAM_READ: - attr[i].kobj_attr.attr.mode = S_IRUGO; + attr[i].kobj_attr.attr.mode = 0444; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUSR; + attr[i].kobj_attr.attr.mode = 0200; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; + attr[i].kobj_attr.attr.mode = 0644; break; default: break; diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 560aefde06c0..25427a48feae 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -72,20 +72,20 @@ MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); MODULE_LICENSE("GPL"); MODULE_VERSION(CMM_DRIVER_VERSION); -module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); +module_param_named(delay, delay, uint, 0644); MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); -module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); +module_param_named(hotplug_delay, hotplug_delay, uint, 0644); MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " "before loaning resumes. " "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); -module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); +module_param_named(oom_kb, oom_kb, uint, 0644); MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " "[Default=" __stringify(CMM_OOM_KB) "]"); -module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR); +module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); -module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR); +module_param_named(debug, cmm_debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " "[Default=" __stringify(CMM_DEBUG) "]"); @@ -385,7 +385,7 @@ static int cmm_thread(void *dummy) { \ return sprintf(buf, format, ##args); \ } \ - static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + static DEVICE_ATTR(name, 0444, show_##name, NULL) CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); @@ -411,7 +411,7 @@ static ssize_t store_oom_pages(struct device *dev, return count; } -static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO, +static DEVICE_ATTR(oom_freed_kb, 0644, show_oom_pages, store_oom_pages); static struct device_attribute *cmm_attrs[] = { @@ -765,7 +765,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp) } module_param_call(disable, cmm_set_disable, param_get_uint, - &cmm_disabled, S_IRUGO | S_IWUSR); + &cmm_disabled, 0644); MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " "[Default=" __stringify(CMM_DISABLE) "]"); diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 957ae347b0b3..89b7ce807e70 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -163,7 +163,7 @@ static int __init hcall_inst_init(void) for_each_possible_cpu(cpu) { snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); - hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO, + hcall_file = debugfs_create_file(cpu_name_buf, 0444, hcall_root, per_cpu(hcall_stats, cpu), &hcall_inst_seq_fops); diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 408a86044133..c7c1140c13b6 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -298,7 +298,7 @@ out: return rc; return count; } -static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe); +static BUS_ATTR(probe, 0200, NULL, ibmebus_store_probe); static ssize_t ibmebus_store_remove(struct bus_type *bus, const char *buf, size_t count) @@ -325,7 +325,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, return -ENODEV; } } -static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove); +static BUS_ATTR(remove, 0200, NULL, ibmebus_store_remove); static struct attribute *ibmbus_bus_attrs[] = { &bus_attr_probe.attr, diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f43eafe30c7f..c508c938dc71 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -697,11 +697,11 @@ static const struct file_operations lparcfg_fops = { static int __init lparcfg_init(void) { - umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + umode_t mode = 0444; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) - mode |= S_IWUSR; + mode |= 0200; if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f7042ad492ba..0f7fb7170b03 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -384,7 +384,7 @@ static ssize_t migration_store(struct class *class, #define MIGRATION_API_VERSION 1 static CLASS_ATTR_WO(migration); -static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION)); +static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION)); static int __init mobility_sysfs_init(void) { diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index f24d8159c9e1..0e0208117e77 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -405,7 +405,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); + ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops); if (ent) proc_set_size(ent, 0); diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index c47585a78b69..054ce7a16fc3 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -179,7 +179,7 @@ static int __init scanlog_init(void) if (!scanlog_buffer) goto err; - ent = proc_create("powerpc/rtas/scan-log-dump", S_IRUSR, NULL, + ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, &scanlog_fops); if (!ent) goto err; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 89726f07d249..52a021e1f86b 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -214,8 +214,7 @@ static ssize_t show_hibernate(struct device *dev, return sprintf(buf, "%d\n", KERN_DT_UPDATE); } -static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO, - show_hibernate, store_hibernate); +static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate); static struct bus_type suspend_subsys = { .name = "power", diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1b307c80b401..53b7f8ef570f 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -83,7 +83,7 @@ axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) return sprintf(buf, "%ld\n", bank->ecc_counter); } -static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL); +static DEVICE_ATTR(ecc, 0444, axon_ram_sysfs_ecc, NULL); /** * axon_ram_irq_handler - interrupt handler for Axon RAM ECC diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index d52b3b81e05f..50c411b1761e 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -73,7 +73,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */ .attr = { .name = "hs_reg", - .mode = S_IRUGO | S_IWUSR, + .mode = 0644, }, .size = MV64X60_VAL_LEN_MAX, .read = mv64x60_hs_reg_read, -- cgit v1.2.3 From 7339390d772ddee0447886d72c3aebc3b58d9583 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 17 Mar 2017 12:11:19 +1100 Subject: powerpc/pseries: Don't give a warning when HPT resizing isn't available As of 438cc81a41 "powerpc/pseries: Automatically resize HPT for memory hot add/remove" when running on the pseries platform, we always attempt to use the PAPR extension to resize the hashed page table (HPT) when we add or remove memory. This is fine, but when the extension is available we'll give a harmless, but scary warning. This patch suppresses the warning in this case. It will still warn if the feature is supposed to be available, but didn't work. Signed-off-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d97be8d20715..7d07c7e17db6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -774,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) int rc; rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc) + if (rc && (rc != -ENODEV)) printk(KERN_WARNING "Unable to resize hash page table to target order %d: %d\n", target_hpt_shift, rc); -- cgit v1.2.3 From 45baee1416a0086dccaa8a94c70e6721a6906fa2 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 17 Nov 2017 16:58:59 -0200 Subject: powerpc/powernv: Add ppc_pci_reset_phbs parameter to issue a PHB reset During a kdump kernel boot in PowerPC, we request a reset of the PHBs to the FW. It makes sense, since if we are booting a kdump kernel it means we had some trouble before and we cannot rely in the adapters' health; they could be in a bad state, hence the reset is needed. But this reset is useful not only in kdump - there are situations, specially when debugging drivers, that we could break an adapter in a way it requires such reset. One can tell to just go ahead and reboot the machine, but happens that many times doing kexec is much faster, and so preferable than a full power cycle. This patch adds the ppc_pci_reset_phbs parameter to perform such reset. Signed-off-by: Guilherme G. Piccoli Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1069f9cb273a..973cfb698180 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -89,6 +89,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, } static bool pnv_iommu_bypass_disabled __read_mostly; +static bool pci_reset_phbs __read_mostly; static int __init iommu_setup(char *str) { @@ -110,6 +111,14 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); +static int __init pci_reset_phbs_setup(char *str) +{ + pci_reset_phbs = true; + return 0; +} + +early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); + static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { /* @@ -4033,9 +4042,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, * If we're running in kdump kernel, the previous kernel never * shutdown PCI devices correctly. We already got IODA table * cleaned out. So we have to issue PHB reset to stop all PCI - * transactions from previous kernel. + * transactions from previous kernel. The ppc_pci_reset_phbs + * kernel parameter will force this reset too. */ - if (is_kdump_kernel()) { + if (is_kdump_kernel() || pci_reset_phbs) { pr_info(" Issue PHB reset ...\n"); pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); -- cgit v1.2.3 From 8d81296cfcce89013a714feb8d25004a156f8181 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Thu, 30 Nov 2017 14:35:54 +0530 Subject: powerpc/radix: Remove trace_tlbie call from radix__flush_tlb_all radix__flush_tlb_all() is called only in kexec path in real mode and any tracepoints at this stage will make kexec to fail if enabled. To verify enable tlbie trace before kexec. $ echo 1 > /sys/kernel/debug/tracing/events/powerpc/tlbie/enable == kexec into new kernel and kexec fails. Fix this by not calling trace_tlbie from radix__flush_tlb_all(). Fixes: 0428491cba92 ("powerpc/mm: Trace tlbie(l) instructions") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Mahesh Salgaonkar Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index fbaa429471a1..71d1b19ad1c0 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -666,14 +666,12 @@ void radix__flush_tlb_all(void) */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); /* * now flush host entires by passing PRS = 0 and LPID == 0 */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(0, 0, rb, 0, ric, prs, r); } void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, -- cgit v1.2.3 From b6d34eb4d280c893d0f442f4b9e039d73e3db420 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 8 Sep 2017 03:11:12 +1000 Subject: powerpc: Expose TSCR via sysfs The thread switch control register (TSCR) is a per core register that configures how the CPU shares resources between SMT threads. Exposing it via sysfs allows us to tune it at run time. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b8d4a1dac39f..5a8bfee6e187 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -485,6 +485,7 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_SPRSETUP(purr, SPRN_PURR); SYSFS_SPRSETUP(spurr, SPRN_SPURR); SYSFS_SPRSETUP(pir, SPRN_PIR); +SYSFS_SPRSETUP(tscr, SPRN_TSCR); /* Lets only enable read for phyp resources and @@ -495,6 +496,7 @@ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); +static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr); /* * This is the system wide DSCR register default value. Any @@ -785,6 +787,9 @@ static int register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_create_file(s, &dev_attr_pir); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) + device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_FSL_BOOK3E @@ -867,6 +872,9 @@ static int unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) device_remove_file(s, &dev_attr_pir); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) + device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_FSL_BOOK3E -- cgit v1.2.3 From 1c200e63d055ec0125e44a5e386b9b78aada7eb3 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Sun, 31 Dec 2017 18:20:45 -0500 Subject: powerpc/tm: Fix endianness flip on trap Currently it's possible that a thread on PPC64 LE has its endianness flipped inadvertently to Big-Endian resulting in a crash once the process is back from the signal handler. If giveup_all() is called when regs->msr has the bits MSR.FP and MSR.VEC disabled (and hence MSR.VSX disabled too) it returns without calling check_if_tm_restore_required() which copies regs->msr to ckpt_regs->msr if the process caught a signal whilst in transactional mode. Then once in setup_tm_sigcontexts() MSR from ckpt_regs.msr is used, but since check_if_tm_restore_required() was not called previuosly, gp_regs[PT_MSR] gets a copy of invalid MSR bits as MSR in ckpt_regs was not updated from regs->msr and so is zeroed. Later when leaving the signal handler once in sys_rt_sigreturn() the TS bits of gp_regs[PT_MSR] are checked to determine if restore_tm_sigcontexts() must be called to pull in the correct MSR state into the user context. Because TS bits are zeroed restore_tm_sigcontexts() is never called and MSR restored from the user context on returning from the signal handler has the MSR.LE (the endianness bit) forced to zero (Big-Endian). That leads, for instance, to 'nop' being treated as an illegal instruction in the following sequence: tbegin. beq 1f trap tend. 1: nop on PPC64 LE machines and the process dies just after returning from the signal handler. PPC64 BE is also affected but in a subtle way since forcing Big-Endian on a BE machine does not change the endianness. This commit fixes the issue described above by ensuring that once in setup_tm_sigcontexts() the MSR used is from regs->msr instead of from ckpt_regs->msr and by ensuring that we pull in only the MSR.FP, MSR.VEC, and MSR.VSX bits from ckpt_regs->msr. The fix was tested both on LE and BE machines and no regression regarding the powerpc/tm selftests was observed. Signed-off-by: Gustavo Romero Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/signal_64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2705fba544ad..720117690822 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -210,7 +210,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.ckpt_regs.msr; + unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); @@ -219,6 +219,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, WARN_ON(tm_suspend_disabled); + /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as + * it contains the correct FP, VEC, VSX state after we treclaimed + * the transaction and giveup_all() was called on reclaiming. + */ + msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext * just indicates to userland that we were doing a transaction, but we * don't want to return in transactional state. This also ensures -- cgit v1.2.3 From 35adacd6fc48d658419522f192a3c8b2785612da Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 24 Dec 2017 02:49:23 +1000 Subject: powerpc/pseries, ps3: panic flush kernel messages before halting system Platforms with a panic handler that halts the system can have problems getting kernel messages out, because the panic notifiers are called before kernel/panic.c does its flushing of printk buffers an console etc. This was attempted to be solved with commit a3b2cb30f252 ("powerpc: Do not call ppc_md.panic in fadump panic notifier"), but that wasn't the right approach and caused other problems, and was reverted by commit ab9dbf771ff9. Instead, the powernv shutdown paths have already had a similar problem, fixed by taking the message flushing sequence from kernel/panic.c. That's a little bit ugly, but while we have the code duplicated, it will work for this case as well. So have ppc panic handlers do the same flushing before they terminate. Without this patch, a qemu pseries_le_defconfig guest stops silently when issued the nmi command when xmon is off and no crash dumpers enabled. Afterwards, an oops is printed by each CPU as expected. Fixes: ab9dbf771ff9 ("Revert "powerpc: Do not call ppc_md.panic in fadump panic notifier"") Signed-off-by: Nicholas Piggin Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bug.h | 3 ++- arch/powerpc/kernel/traps.c | 24 ++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 18 ++++-------------- arch/powerpc/platforms/ps3/setup.c | 1 + arch/powerpc/platforms/pseries/setup.c | 8 +++++++- 5 files changed, 38 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 97c38472b924..fd06dbe7d7d3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -136,7 +136,8 @@ extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int); extern void die(const char *, struct pt_regs *, long); extern bool die_will_crash(void); - +extern void panic_flush_kmsg_start(void); +extern void panic_flush_kmsg_end(void); #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index cb07d6519783..4260c73461a3 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -143,6 +145,28 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; +extern void panic_flush_kmsg_start(void) +{ + /* + * These are mostly taken from kernel/panic.c, but tries to do + * relatively minimal work. Don't use delay functions (TB may + * be broken), don't crash dump (need to set a firmware log), + * don't run notifiers. We do want to get some information to + * Linux console. + */ + console_verbose(); + bust_spinlocks(1); +} + +extern void panic_flush_kmsg_end(void) +{ + printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(0); + debug_locks_off(); + console_flush_on_panic(); +} + static unsigned long oops_begin(struct pt_regs *regs) { int cpu; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 69b5263fc9e3..c15182765ff5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs, void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) { - /* - * This is mostly taken from kernel/panic.c, but tries to do - * relatively minimal work. Don't use delay functions (TB may - * be broken), don't crash dump (need to set a firmware log), - * don't run notifiers. We do want to get some information to - * Linux console. - */ - console_verbose(); - bust_spinlocks(1); + panic_flush_kmsg_start(); + pr_emerg("Hardware platform error: %s\n", msg); if (regs) show_regs(regs); smp_send_stop(); - printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - bust_spinlocks(0); - debug_locks_off(); - console_flush_on_panic(); + + panic_flush_kmsg_end(); /* * Don't bother to shut things down because this will diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 6244bc849469..77a37520068d 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -113,6 +113,7 @@ static void ps3_panic(char *str) printk(" System does not reboot automatically.\n"); printk(" Please press POWER button.\n"); printk("\n"); + panic_flush_kmsg_end(); while(1) lv1_pause(1); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index db76963e693d..42e9cf0b3180 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -533,6 +533,12 @@ static void __init pSeries_setup_arch(void) ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; } +static void pseries_panic(char *str) +{ + panic_flush_kmsg_end(); + rtas_os_term(str); +} + static int __init pSeries_init_panel(void) { /* Manually leave the kernel version on the panel. */ @@ -761,7 +767,7 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, - .panic = rtas_os_term, + .panic = pseries_panic, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, -- cgit v1.2.3 From 47355040d2760566901057287b35d5f10e217e12 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 16 Jan 2018 16:12:38 -0600 Subject: signal/powerpc: Remove unnecessary signal_code parameter of do_send_trap signal_code is always TRAP_HWBKPT Signed-off-by: "Eric W. Biederman" --- arch/powerpc/include/asm/debug.h | 2 +- arch/powerpc/kernel/process.c | 6 +++--- arch/powerpc/kernel/traps.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 14e71ff6579e..fc97404de0a3 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -49,7 +49,7 @@ void set_breakpoint(struct arch_hw_breakpoint *brk); void __set_breakpoint(struct arch_hw_breakpoint *brk); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, - unsigned long error_code, int signal_code, int brkpt); + unsigned long error_code, int brkpt); #else extern void do_break(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 72be0c32e902..bfb48cf56bc3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -601,11 +601,11 @@ EXPORT_SYMBOL(flush_all_to_thread); #ifdef CONFIG_PPC_ADV_DEBUG_REGS void do_send_trap(struct pt_regs *regs, unsigned long address, - unsigned long error_code, int signal_code, int breakpt) + unsigned long error_code, int breakpt) { siginfo_t info; - current->thread.trap_nr = signal_code; + current->thread.trap_nr = TRAP_HWBKPT; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 11, SIGSEGV) == NOTIFY_STOP) return; @@ -613,7 +613,7 @@ void do_send_trap(struct pt_regs *regs, unsigned long address, /* Deliver the signal to userspace */ info.si_signo = SIGTRAP; info.si_errno = breakpt; /* breakpoint or watchpoint id */ - info.si_code = signal_code; + info.si_code = TRAP_HWBKPT; info.si_addr = (void __user *)address; force_sig_info(SIGTRAP, &info, current); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f2e6e1838952..c93f1e6a9fff 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1750,34 +1750,34 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE; #endif - do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, 5); changed |= 0x01; } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) { dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W); - do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, 6); changed |= 0x01; } else if (debug_status & DBSR_IAC1) { current->thread.debug.dbcr0 &= ~DBCR0_IAC1; dbcr_iac_range(current) &= ~DBCR_IAC12MODE; - do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, 1); changed |= 0x01; } else if (debug_status & DBSR_IAC2) { current->thread.debug.dbcr0 &= ~DBCR0_IAC2; - do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, 2); changed |= 0x01; } else if (debug_status & DBSR_IAC3) { current->thread.debug.dbcr0 &= ~DBCR0_IAC3; dbcr_iac_range(current) &= ~DBCR_IAC34MODE; - do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, 3); changed |= 0x01; } else if (debug_status & DBSR_IAC4) { current->thread.debug.dbcr0 &= ~DBCR0_IAC4; - do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, + do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, 4); changed |= 0x01; } -- cgit v1.2.3 From f71dd7dc2dc989dc712b246a74d243e4b2c5f8a7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Jan 2018 14:37:25 -0600 Subject: signal/ptrace: Add force_sig_ptrace_errno_trap and use it where needed There are so many places that build struct siginfo by hand that at least one of them is bound to get it wrong. A handful of cases in the kernel arguably did just that when using the errno field of siginfo to pass no errno values to userspace. The usage is limited to a single si_code so at least does not mess up anything else. Encapsulate this questionable pattern in a helper function so that the userspace ABI is preserved. Update all of the places that use this pattern to use the new helper function. Signed-off-by: "Eric W. Biederman" --- arch/arm/kernel/ptrace.c | 8 +------- arch/arm64/kernel/ptrace.c | 6 ++++-- arch/powerpc/kernel/process.c | 9 ++------- arch/xtensa/kernel/ptrace.c | 8 +------- include/linux/sched/signal.h | 2 ++ kernel/signal.c | 15 +++++++++++++++ 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 58e3771e4c5b..7724b0f661b3 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -390,7 +390,6 @@ static void ptrace_hbptriggered(struct perf_event *bp, struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); long num; int i; - siginfo_t info; for (i = 0; i < ARM_MAX_HBP_SLOTS; ++i) if (current->thread.debug.hbp[i] == bp) @@ -398,12 +397,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, num = (i == ARM_MAX_HBP_SLOTS) ? 0 : ptrace_hbp_idx_to_num(i); - info.si_signo = SIGTRAP; - info.si_errno = (int)num; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)(bkpt->trigger); - - force_sig_info(SIGTRAP, &info, current); + force_sig_ptrace_errno_trap((int)num, (void __user *)(bkpt->trigger)); } /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 95daa1478a7c..6618036ae6d4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -190,21 +190,23 @@ static void ptrace_hbptriggered(struct perf_event *bp, #ifdef CONFIG_COMPAT if (is_compat_task()) { + int si_errno = 0; int i; for (i = 0; i < ARM_MAX_BRP; ++i) { if (current->thread.debug.hbp_break[i] == bp) { - info.si_errno = (i << 1) + 1; + si_errno = (i << 1) + 1; break; } } for (i = 0; i < ARM_MAX_WRP; ++i) { if (current->thread.debug.hbp_watch[i] == bp) { - info.si_errno = -((i << 1) + 1); + si_errno = -((i << 1) + 1); break; } } + force_sig_ptrace_errno_trap(si_errno, (void __user *)bkpt->trigger); } #endif force_sig_info(SIGTRAP, &info, current); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfb48cf56bc3..4208cbe2fb7f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -603,19 +603,14 @@ EXPORT_SYMBOL(flush_all_to_thread); void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int breakpt) { - siginfo_t info; - current->thread.trap_nr = TRAP_HWBKPT; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 11, SIGSEGV) == NOTIFY_STOP) return; /* Deliver the signal to userspace */ - info.si_signo = SIGTRAP; - info.si_errno = breakpt; /* breakpoint or watchpoint id */ - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)address; - force_sig_info(SIGTRAP, &info, current); + force_sig_ptrace_errno_trap(breakpt, /* breakpoint or watchpoint id */ + (void __user *)address); } #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ void do_break (struct pt_regs *regs, unsigned long address, diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index e2461968efb2..c0845cb1cbb9 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -278,7 +278,6 @@ static void ptrace_hbptriggered(struct perf_event *bp, struct pt_regs *regs) { int i; - siginfo_t info; struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); if (bp->attr.bp_type & HW_BREAKPOINT_X) { @@ -293,12 +292,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, i = (i << 1) | 1; } - info.si_signo = SIGTRAP; - info.si_errno = i; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)bkpt->address; - - force_sig_info(SIGTRAP, &info, current); + force_sig_ptrace_errno_trap(i, (void __user *)bkpt->address); } static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 944fe6356f4a..23b4f9cb82db 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -311,6 +311,8 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); int force_sig_pkuerr(void __user *addr, u32 pkey); +int force_sig_ptrace_errno_trap(int errno, void __user *addr); + extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); extern int force_sig_info(int, struct siginfo *, struct task_struct *); diff --git a/kernel/signal.c b/kernel/signal.c index 4f6300ef8062..e549174c0831 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1599,6 +1599,21 @@ int force_sig_pkuerr(void __user *addr, u32 pkey) } #endif +/* For the crazy architectures that include trap information in + * the errno field, instead of an actual errno value. + */ +int force_sig_ptrace_errno_trap(int errno, void __user *addr) +{ + struct siginfo info; + + clear_siginfo(&info); + info.si_signo = SIGTRAP; + info.si_errno = errno; + info.si_code = TRAP_HWBKPT; + info.si_addr = addr; + return force_sig_info(info.si_signo, &info, current); +} + int kill_pgrp(struct pid *pid, int sig, int priv) { int ret; -- cgit v1.2.3 From bdcb1aefc5b3f7d0f1dc8b02673602bca2ff7a4b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 17 Jan 2018 23:58:18 +1000 Subject: powerpc/64s: Improve RFI L1-D cache flush fallback The fallback RFI flush is used when firmware does not provide a way to flush the cache. It's a "displacement flush" that evicts useful data by displacing it with an uninteresting buffer. The flush has to take care to work with implementation specific cache replacment policies, so the recipe has been in flux. The initial slow but conservative approach is to touch all lines of a congruence class, with dependencies between each load. It has since been determined that a linear pattern of loads without dependencies is sufficient, and is significantly faster. Measuring the speed of a null syscall with RFI fallback flush enabled gives the relative improvement: P8 - 1.83x P9 - 1.75x The flush also becomes simpler and more adaptable to different cache geometries. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/paca.h | 3 +- arch/powerpc/kernel/asm-offsets.c | 3 +- arch/powerpc/kernel/exceptions-64s.S | 76 +++++++++++++++++------------------- arch/powerpc/kernel/setup_64.c | 13 +----- arch/powerpc/xmon/xmon.c | 2 - 5 files changed, 39 insertions(+), 58 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1b7fd535393b..b62c31037cad 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -239,8 +239,7 @@ struct paca_struct { */ u64 exrfi[EX_SIZE] __aligned(0x80); void *rfi_flush_fallback_area; - u64 l1d_flush_congruence; - u64 l1d_flush_sets; + u64 l1d_flush_size; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index fa5c4125f42a..88b84ac76b53 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -239,8 +239,7 @@ int main(void) OFFSET(PACA_IN_NMI, paca_struct, in_nmi); OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); OFFSET(PACA_EXRFI, paca_struct, exrfi); - OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); - OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9e6882bdc526..243d072a225a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1461,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); rfid @@ -1503,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); hrfid diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d1fa0e91f526..c388cc3357fa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -875,19 +875,8 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - /* - * The fallback flush is currently coded for 8-way - * associativity. Different associativity is possible, but it - * will be treated as 8-way and may not evict the lines as - * effectively. - * - * 128 byte lines are mandatory. - */ - u64 c = l1d_size / 8; - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_congruence = c; - paca[cpu].l1d_flush_sets = c / 128; + paca[cpu].l1d_flush_size = l1d_size; } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 01d9a2dcff20..82e1a3ee6e0f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); DUMP(p, rfi_flush_fallback_area, "px"); - DUMP(p, l1d_flush_congruence, "llx"); - DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E -- cgit v1.2.3 From 83b57531c58f4173d1c0d0b2c0bc88c853c32ea5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 9 Jul 2017 18:14:01 -0500 Subject: mm/memory_failure: Remove unused trapno from memory_failure Today 4 architectures set ARCH_SUPPORTS_MEMORY_FAILURE (arm64, parisc, powerpc, and x86), while 4 other architectures set __ARCH_SI_TRAPNO (alpha, metag, sparc, and tile). These two sets of architectures do not interesect so remove the trapno paramater to remove confusion. Signed-off-by: "Eric W. Biederman" --- arch/parisc/kernel/pdt.c | 2 +- .../powerpc/platforms/powernv/opal-memory-errors.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++-- drivers/acpi/apei/ghes.c | 2 +- drivers/base/memory.c | 2 +- drivers/ras/cec.c | 2 +- include/linux/mm.h | 4 +-- mm/hwpoison-inject.c | 2 +- mm/madvise.c | 2 +- mm/memory-failure.c | 33 +++++++++------------- 10 files changed, 25 insertions(+), 32 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index e07eb34c8750..36434d4da381 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -325,7 +325,7 @@ static int pdt_mainloop(void *unused) #ifdef CONFIG_MEMORY_FAILURE if ((pde & PDT_ADDR_PERM_ERR) || ((pde & PDT_ADDR_SINGLE_ERR) == 0)) - memory_failure(pde >> PAGE_SHIFT, 0, 0); + memory_failure(pde >> PAGE_SHIFT, 0); else soft_offline_page( pfn_to_page(pde >> PAGE_SHIFT), 0); diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index d9916ea62305..8ddc1accf199 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -60,7 +60,7 @@ static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) } for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { - memory_failure(paddr_start >> PAGE_SHIFT, 0, 0); + memory_failure(paddr_start >> PAGE_SHIFT, 0); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1d616d08eee..3b7319e25168 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -582,7 +582,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, MCE_VECTOR, 0); + memory_failure(pfn, 0); } return NOTIFY_OK; @@ -1046,7 +1046,7 @@ static int do_memory_failure(struct mce *m) pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); if (!(m->mcgstatus & MCG_STATUS_RIPV)) flags |= MF_MUST_KILL; - ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); + ret = memory_failure(m->addr >> PAGE_SHIFT, flags); if (ret) pr_err("Memory error not recovered"); return ret; @@ -1325,7 +1325,7 @@ out_ist: EXPORT_SYMBOL_GPL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE -int memory_failure(unsigned long pfn, int vector, int flags) +int memory_failure(unsigned long pfn, int flags) { /* mce_severity() should not hand us an ACTION_REQUIRED error */ BUG_ON(flags & MF_ACTION_REQUIRED); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 6402f7fad3bb..bb5f9c643e0e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -410,7 +410,7 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int flags = 0; if (flags != -1) - memory_failure_queue(pfn, 0, flags); + memory_failure_queue(pfn, flags); #endif } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 1d60b58a8c19..fe4b24f05f6a 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -569,7 +569,7 @@ store_hard_offline_page(struct device *dev, if (kstrtoull(buf, 0, &pfn) < 0) return -EINVAL; pfn >>= PAGE_SHIFT; - ret = memory_failure(pfn, 0, 0); + ret = memory_failure(pfn, 0); return ret ? ret : count; } diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index ca44e6977cf2..2d9ec378a8bc 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -327,7 +327,7 @@ int cec_add_elem(u64 pfn) } else { /* We have reached max count for this page, soft-offline it. */ pr_err("Soft-offlining pfn: 0x%llx\n", pfn); - memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); + memory_failure_queue(pfn, MF_SOFT_OFFLINE); ca->pfns_poisoned++; } diff --git a/include/linux/mm.h b/include/linux/mm.h index ea818ff739cd..7fc92384977e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2570,8 +2570,8 @@ enum mf_flags { MF_MUST_KILL = 1 << 2, MF_SOFT_OFFLINE = 1 << 3, }; -extern int memory_failure(unsigned long pfn, int trapno, int flags); -extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); +extern int memory_failure(unsigned long pfn, int flags); +extern void memory_failure_queue(unsigned long pfn, int flags); extern int unpoison_memory(unsigned long pfn); extern int get_hwpoison_page(struct page *page); #define put_hwpoison_page(page) put_page(page) diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 356df057a2a8..b6ac70616c32 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -52,7 +52,7 @@ static int hwpoison_inject(void *data, u64 val) inject: pr_info("Injecting memory failure at pfn %#lx\n", pfn); - return memory_failure(pfn, 18, MF_COUNT_INCREASED); + return memory_failure(pfn, MF_COUNT_INCREASED); put_out: put_hwpoison_page(p); return 0; diff --git a/mm/madvise.c b/mm/madvise.c index 751e97aa2210..4d3c922ea1a1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -661,7 +661,7 @@ static int madvise_inject_error(int behavior, pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", page_to_pfn(page), start); - ret = memory_failure(page_to_pfn(page), 0, MF_COUNT_INCREASED); + ret = memory_failure(page_to_pfn(page), MF_COUNT_INCREASED); if (ret) return ret; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4acdf393a801..c5f5f31bd979 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(hwpoison_filter); * ``action optional'' if they are not immediately affected by the error * ``action required'' if error happened in current execution context */ -static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, +static int kill_proc(struct task_struct *t, unsigned long addr, unsigned long pfn, struct page *page, int flags) { struct siginfo si; @@ -189,9 +189,6 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, si.si_signo = SIGBUS; si.si_errno = 0; si.si_addr = (void *)addr; -#ifdef __ARCH_SI_TRAPNO - si.si_trapno = trapno; -#endif si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { @@ -323,7 +320,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, * Also when FAIL is set do a force kill because something went * wrong earlier. */ -static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, +static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, struct page *page, unsigned long pfn, int flags) { @@ -348,7 +345,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, * check for that, but we need to tell the * process anyways. */ - else if (kill_proc(tk->tsk, tk->addr, trapno, + else if (kill_proc(tk->tsk, tk->addr, pfn, page, flags) < 0) pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n", pfn, tk->tsk->comm, tk->tsk->pid); @@ -927,7 +924,7 @@ EXPORT_SYMBOL_GPL(get_hwpoison_page); * the pages and send SIGBUS to the processes if the data was dirty. */ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, - int trapno, int flags, struct page **hpagep) + int flags, struct page **hpagep) { enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; struct address_space *mapping; @@ -1017,7 +1014,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, * any accesses to the poisoned memory. */ forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL); - kill_procs(&tokill, forcekill, trapno, !unmap_success, p, pfn, flags); + kill_procs(&tokill, forcekill, !unmap_success, p, pfn, flags); return unmap_success; } @@ -1045,7 +1042,7 @@ static int identify_page_state(unsigned long pfn, struct page *p, return page_action(ps, p, pfn); } -static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) +static int memory_failure_hugetlb(unsigned long pfn, int flags) { struct page *p = pfn_to_page(pfn); struct page *head = compound_head(p); @@ -1090,7 +1087,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags) return 0; } - if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) { + if (!hwpoison_user_mappings(p, pfn, flags, &head)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; goto out; @@ -1105,7 +1102,6 @@ out: /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page - * @trapno: Trap number reported in the signal to user space. * @flags: fine tune action taken * * This function is called by the low level machine check code @@ -1120,7 +1116,7 @@ out: * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. */ -int memory_failure(unsigned long pfn, int trapno, int flags) +int memory_failure(unsigned long pfn, int flags) { struct page *p; struct page *hpage; @@ -1129,7 +1125,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) unsigned long page_flags; if (!sysctl_memory_failure_recovery) - panic("Memory failure from trap %d on page %lx", trapno, pfn); + panic("Memory failure on page %lx", pfn); if (!pfn_valid(pfn)) { pr_err("Memory failure: %#lx: memory outside kernel control\n", @@ -1139,7 +1135,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) p = pfn_to_page(pfn); if (PageHuge(p)) - return memory_failure_hugetlb(pfn, trapno, flags); + return memory_failure_hugetlb(pfn, flags); if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); @@ -1268,7 +1264,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * When the raw error page is thp tail page, hpage points to the raw * page after thp split. */ - if (!hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)) { + if (!hwpoison_user_mappings(p, pfn, flags, &hpage)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; goto out; @@ -1296,7 +1292,6 @@ EXPORT_SYMBOL_GPL(memory_failure); struct memory_failure_entry { unsigned long pfn; - int trapno; int flags; }; @@ -1312,7 +1307,6 @@ static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); /** * memory_failure_queue - Schedule handling memory failure of a page. * @pfn: Page Number of the corrupted page - * @trapno: Trap number reported in the signal to user space. * @flags: Flags for memory failure handling * * This function is called by the low level hardware error handler @@ -1326,13 +1320,12 @@ static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu); * * Can run in IRQ context. */ -void memory_failure_queue(unsigned long pfn, int trapno, int flags) +void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; struct memory_failure_entry entry = { .pfn = pfn, - .trapno = trapno, .flags = flags, }; @@ -1365,7 +1358,7 @@ static void memory_failure_work_func(struct work_struct *work) if (entry.flags & MF_SOFT_OFFLINE) soft_offline_page(pfn_to_page(entry.pfn), entry.flags); else - memory_failure(entry.pfn, entry.trapno, entry.flags); + memory_failure(entry.pfn, entry.flags); } } -- cgit v1.2.3 From 7f2c39e91f61fcd2abed3b39c14e7037c060c6f1 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:36 +0100 Subject: powerpc/powernv: Introduce new PHB type for opencapi links The NPU was already abstracted by opal as a virtual PHB for nvlink, but it helps to be able to differentiate between a nvlink or opencapi PHB, as it's not completely transparent to linux. In particular, PE assignment differs and we'll also need the information in later patches. So rename existing PNV_PHB_NPU type to PNV_PHB_NPU_NVLINK and add a new type PNV_PHB_NPU_OCAPI. Signed-off-by: Frederic Barrat Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 41 +++++++++++++++++++++++++------ arch/powerpc/platforms/powernv/pci.c | 4 +++ arch/powerpc/platforms/powernv/pci.h | 8 +++--- 4 files changed, 43 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index f6cbc1a71472..c5899c107d59 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -277,7 +277,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) int64_t rc = 0; phys_addr_t top = memblock_end_of_DRAM(); - if (phb->type != PNV_PHB_NPU || !npe->pdev) + if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev) return -EINVAL; rc = pnv_npu_unset_window(npe, 0); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 973cfb698180..c1e235677c73 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -54,7 +54,8 @@ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 -static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" }; +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", + "NPU_OCAPI" }; static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -933,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) * Configure PELTV. NPUs don't have a PELTV table so skip * configuration on them. */ - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) pnv_ioda_set_peltv(phb, pe, true); /* Setup reverse map */ @@ -1281,16 +1282,23 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; struct pnv_phb *phb; + struct pci_bus *bus; + struct pci_dev *pdev; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU) { + if (phb->type == PNV_PHB_NPU_NVLINK) { /* PE#0 is needed for error reporting */ pnv_ioda_reserve_pe(phb, 0); pnv_ioda_setup_npu_PEs(hose->bus); if (phb->model == PNV_PHB_MODEL_NPU2) pnv_npu2_init(phb); } + if (phb->type == PNV_PHB_NPU_OCAPI) { + bus = hose->bus; + list_for_each_entry(pdev, &bus->devices, bus_list) + pnv_ioda_setup_dev_PE(pdev); + } } } @@ -2648,7 +2656,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque) hose = pci_bus_to_host(pdev->bus); phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) return 0; *ptmppe = &phb->ioda.pe_array[pdn->pe_number]; @@ -2732,7 +2740,7 @@ static void pnv_pci_ioda_setup_iommu_api(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - if (phb->type != PNV_PHB_NPU) + if (phb->type != PNV_PHB_NPU_NVLINK) continue; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3782,6 +3790,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .shutdown = pnv_pci_ioda_shutdown, +}; + #ifdef CONFIG_CXL_BASE const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, @@ -4015,9 +4030,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - if (phb->type == PNV_PHB_NPU) { + switch (phb->type) { + case PNV_PHB_NPU_NVLINK: hose->controller_ops = pnv_npu_ioda_controller_ops; - } else { + break; + case PNV_PHB_NPU_OCAPI: + hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; + break; + default: phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; hose->controller_ops = pnv_pci_ioda_controller_ops; } @@ -4063,7 +4083,12 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np) void __init pnv_pci_init_npu_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK); +} + +void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); } void __init pnv_pci_init_ioda_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5422f4a6317c..69d102cbf48f 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") pnv_pci_init_npu_phb(np); + /* Look for NPU2 OpenCAPI PHBs */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_pci_init_npu2_opencapi_phb(np); + /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b772d7473896..eada4b6068cb 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -12,9 +12,10 @@ struct pci_dn; #define NV_NMMU_ATSD_REGS 8 enum pnv_phb_type { - PNV_PHB_IODA1 = 0, - PNV_PHB_IODA2 = 1, - PNV_PHB_NPU = 2, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU_NVLINK = 2, + PNV_PHB_NPU_OCAPI = 3, }; /* Precise PHB model for error management */ @@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); +extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); -- cgit v1.2.3 From 228c2f4103788ba2a8df636f383ec2df33b47b73 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 23 Jan 2018 12:31:37 +0100 Subject: powerpc/powernv: Set correct configuration space size for opencapi devices The configuration space for opencapi devices doesn't have a PCI Express capability, therefore confusing linux in thinking it's of an old PCI type with a 256-byte configuration space size, instead of the desired 4k. So add a PCI fixup to declare the correct size. Signed-off-by: Andrew Donnellan Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c1e235677c73..886014ed85fe 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -4091,6 +4091,19 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); } +static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + if (!machine_is(powernv)) + return; + + if (phb->type == PNV_PHB_NPU_OCAPI) + dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE; +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup); + void __init pnv_pci_init_ioda_hub(struct device_node *np) { struct device_node *phbn; -- cgit v1.2.3 From 74d656d219b98ef3b96f92439337aa6392a7577d Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:38 +0100 Subject: powerpc/powernv: Add opal calls for opencapi Add opal calls to interact with the NPU: OPAL_NPU_SPA_SETUP: set the Shared Process Area (SPA) The SPA is a table containing one entry (Process Element) per memory context which can be accessed by the opencapi device. OPAL_NPU_SPA_CLEAR_CACHE: clear the context cache The NPU keeps a cache of recently accessed memory contexts. When a Process Element is removed from the SPA, the cache for the link must be cleared. OPAL_NPU_TL_SET: configure the Transaction Layer The Transaction Layer specification defines several templates for messages to be exchanged on the link. During link setup, the host and device must negotiate what templates are supported on both sides and at what rates those messages can be sent. Signed-off-by: Frederic Barrat Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 5 ++++- arch/powerpc/include/asm/opal.h | 6 ++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 3 +++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 233c7504b1f2..24c73f5575ee 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -201,7 +201,10 @@ #define OPAL_SET_POWER_SHIFT_RATIO 155 #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 -#define OPAL_LAST 157 +#define OPAL_NPU_SPA_SETUP 159 +#define OPAL_NPU_SPA_CLEAR_CACHE 160 +#define OPAL_NPU_TL_SET 161 +#define OPAL_LAST 161 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0c545f7fc77b..12e70fb58700 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -34,6 +34,12 @@ int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr, uint64_t bdf); int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, uint64_t lpcr); +int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn, + uint64_t addr, uint64_t PE_mask); +int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn, + uint64_t PE_handle); +int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap, + uint64_t rate_phys, uint32_t size); int64_t opal_console_write(int64_t term_number, __be64 *length, const uint8_t *buffer); int64_t opal_console_read(int64_t term_number, __be64 *length, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6f4b00a2ac46..1b2936ba6040 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); +OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); +OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); +OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); -- cgit v1.2.3 From 6914c757118e2a60ba826d9959ccf5532779781b Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:39 +0100 Subject: powerpc/powernv: Add platform-specific services for opencapi Implement a few platform-specific calls which can be used by drivers: - provide the Transaction Layer capabilities of the host, so that the driver can find some common ground and configure the device and host appropriately. - provide the hw interrupt to be used for translation faults raised by the NPU - map/unmap some NPU mmio registers to get the fault context when the NPU raises an address translation fault The rest are wrappers around the previously-introduced opal calls. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-ocxl.h | 29 +++++ arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/ocxl.c | 180 ++++++++++++++++++++++++++++++++ 3 files changed, 210 insertions(+) create mode 100644 arch/powerpc/include/asm/pnv-ocxl.h create mode 100644 arch/powerpc/platforms/powernv/ocxl.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h new file mode 100644 index 000000000000..36868d49aeed --- /dev/null +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _ASM_PNV_OCXL_H +#define _ASM_PNV_OCXL_H + +#include + +#define PNV_OCXL_TL_MAX_TEMPLATE 63 +#define PNV_OCXL_TL_BITS_PER_RATE 4 +#define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) + +extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size); +extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size); + +extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); +extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle); +extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle); + +extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data); +extern void pnv_ocxl_spa_release(void *platform_data); +extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle); + +#endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 3732118a0482..6c9d5199a7e2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_PPC_FTW) += nx-ftw.o +obj-$(CONFIG_OCXL_BASE) += ocxl.o diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c new file mode 100644 index 000000000000..d61186805a07 --- /dev/null +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include "pci.h" + +#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ +#define PNV_OCXL_PASID_BITS 15 +#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) + + +static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) +{ + int shift, idx; + + WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); + idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); + buf[idx] |= rate << shift; +} + +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size) +{ + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + /* + * The TL capabilities are a characteristic of the NPU, so + * we go with hard-coded values. + * + * The receiving rate of each template is encoded on 4 bits. + * + * On P9: + * - templates 0 -> 3 are supported + * - templates 0, 1 and 3 have a 0 receiving rate + * - template 2 has receiving rate of 1 (extra cycle) + */ + memset(rate_buf, 0, rate_buf_size); + set_templ_rate(2, 1, rate_buf); + *cap = PNV_OCXL_TL_P9_RECV_CAP; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); + +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int rc; + + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + + rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, + rate_buf_phys, rate_buf_size); + if (rc) { + dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) +{ + int rc; + + rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); + if (rc) { + dev_err(&dev->dev, + "Can't get translation interrupt for device\n"); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); + +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle) +{ + iounmap(dsisr); + iounmap(dar); + iounmap(tfc); + iounmap(pe_handle); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); + +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle) +{ + u64 reg; + int i, j, rc = 0; + void __iomem *regs[4]; + + /* + * opal stores the mmio addresses of the DSISR, DAR, TFC and + * PE_HANDLE registers in a device tree property, in that + * order + */ + for (i = 0; i < 4; i++) { + rc = of_property_read_u64_index(dev->dev.of_node, + "ibm,opal-xsl-mmio", i, ®); + if (rc) + break; + regs[i] = ioremap(reg, 8); + if (!regs[i]) { + rc = -EINVAL; + break; + } + } + if (rc) { + dev_err(&dev->dev, "Can't map translation mmio registers\n"); + for (j = i - 1; j >= 0; j--) + iounmap(regs[j]); + } else { + *dsisr = regs[0]; + *dar = regs[1]; + *tfc = regs[2]; + *pe_handle = regs[3]; + } + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); + +struct spa_data { + u64 phb_opal_id; + u32 bdfn; +}; + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct spa_data *data; + u32 bdfn; + int rc; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + bdfn = (dev->bus->number << 8) | dev->devfn; + rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), + PE_mask); + if (rc) { + dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); + kfree(data); + return rc; + } + data->phb_opal_id = phb->opal_id; + data->bdfn = bdfn; + *platform_data = (void *) data; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); + +void pnv_ocxl_spa_release(void *platform_data) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); + WARN_ON(rc); + kfree(data); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); + +int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe); -- cgit v1.2.3 From 2cb3d64b26984703a6bb80e66adcc3727ad37f9f Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:40 +0100 Subject: powerpc/powernv: Capture actag information for the device In the opencapi protocol, host memory contexts are referenced by a 'actag'. During setup, a driver must tell the device how many actags it can used, and what values are acceptable. On POWER9, the NPU can handle 64 actags per link, so they must be shared between all the PCI functions of the link. To get a global picture of how many actags are used by each AFU of every function, we capture some data at the end of PCI enumeration, so that actags can be shared fairly if needed. This is not powernv specific per say, but rather a consequence of the opencapi configuration specification being quite general. The number of available actags on POWER9 makes it more likely to be hit. This is somewhat mitigated by the fact that existing AFUs are coded by requesting a reasonable count of actags and existing devices carry only one AFU. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-ocxl.h | 4 + arch/powerpc/platforms/powernv/ocxl.c | 305 ++++++++++++++++++++++++++++++++++ include/misc/ocxl-config.h | 45 +++++ 3 files changed, 354 insertions(+) create mode 100644 include/misc/ocxl-config.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 36868d49aeed..398d05b30600 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -9,6 +9,10 @@ #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) +extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported); +extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); + extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size); extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index d61186805a07..1faaa4ef6903 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -2,13 +2,318 @@ // Copyright 2017 IBM Corp. #include #include +#include #include "pci.h" #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +#define PNV_OCXL_ACTAG_MAX 64 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ #define PNV_OCXL_PASID_BITS 15 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) +#define AFU_PRESENT (1 << 31) +#define AFU_INDEX_MASK 0x3F000000 +#define AFU_INDEX_SHIFT 24 +#define ACTAG_MASK 0xFFF + + +struct actag_range { + u16 start; + u16 count; +}; + +struct npu_link { + struct list_head list; + int domain; + int bus; + int dev; + u16 fn_desired_actags[8]; + struct actag_range fn_actags[8]; + bool assignment_done; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + + +/* + * opencapi actags handling: + * + * When sending commands, the opencapi device references the memory + * context it's targeting with an 'actag', which is really an alias + * for a (BDF, pasid) combination. When it receives a command, the NPU + * must do a lookup of the actag to identify the memory context. The + * hardware supports a finite number of actags per link (64 for + * POWER9). + * + * The device can carry multiple functions, and each function can have + * multiple AFUs. Each AFU advertises in its config space the number + * of desired actags. The host must configure in the config space of + * the AFU how many actags the AFU is really allowed to use (which can + * be less than what the AFU desires). + * + * When a PCI function is probed by the driver, it has no visibility + * about the other PCI functions and how many actags they'd like, + * which makes it impossible to distribute actags fairly among AFUs. + * + * Unfortunately, the only way to know how many actags a function + * desires is by looking at the data for each AFU in the config space + * and add them up. Similarly, the only way to know how many actags + * all the functions of the physical device desire is by adding the + * previously computed function counts. Then we can match that against + * what the hardware supports. + * + * To get a comprehensive view, we use a 'pci fixup': at the end of + * PCI enumeration, each function counts how many actags its AFUs + * desire and we save it in a 'npu_link' structure, shared between all + * the PCI functions of a same device. Therefore, when the first + * function is probed by the driver, we can get an idea of the total + * count of desired actags for the device, and assign the actags to + * the AFUs, by pro-rating if needed. + */ + +static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) +{ + int vsec = pos; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u8 idx; + + while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, + vsec))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + return 0; +} + +static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) +{ + int pos; + u32 val; + + pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); + if (!pos) + return -ESRCH; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + if (val & AFU_PRESENT) + *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; + else + *afu_idx = -1; + return 0; +} + +static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) +{ + int pos; + u16 actag_sup; + + pos = find_dvsec_afu_ctrl(dev, afu_idx); + if (!pos) + return -ESRCH; + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, + &actag_sup); + *actag = actag_sup & ACTAG_MASK; + return 0; +} + +static struct npu_link *find_link(struct pci_dev *dev) +{ + struct npu_link *link; + + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + return link; + } + } + + /* link doesn't exist yet. Allocate one */ + link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); + if (!link) + return NULL; + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + list_add(&link->list, &links_list); + return link; +} + +static void pnv_ocxl_fixup_actag(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct npu_link *link; + int rc, afu_idx = -1, i, actag; + + if (!machine_is(powernv)) + return; + + if (phb->type != PNV_PHB_NPU_OCAPI) + return; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_warn(&dev->dev, "couldn't update actag information\n"); + mutex_unlock(&links_list_lock); + return; + } + + /* + * Check how many actags are desired for the AFUs under that + * function and add it to the count for the link + */ + rc = get_max_afu_index(dev, &afu_idx); + if (rc) { + /* Most likely an invalid config space */ + dev_dbg(&dev->dev, "couldn't find AFU information\n"); + afu_idx = -1; + } + + link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; + for (i = 0; i <= afu_idx; i++) { + /* + * AFU index 'holes' are allowed. So don't fail if we + * can't read the actag info for an index + */ + rc = get_actag_count(dev, i, &actag); + if (rc) + continue; + link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; + } + dev_dbg(&dev->dev, "total actags for function: %d\n", + link->fn_desired_actags[PCI_FUNC(dev->devfn)]); + + mutex_unlock(&links_list_lock); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); + +static u16 assign_fn_actags(u16 desired, u16 total) +{ + u16 count; + + if (total <= PNV_OCXL_ACTAG_MAX) + count = desired; + else + count = PNV_OCXL_ACTAG_MAX * desired / total; + + return count; +} + +static void assign_actags(struct npu_link *link) +{ + u16 actag_count, range_start = 0, total_desired = 0; + int i; + + for (i = 0; i < 8; i++) + total_desired += link->fn_desired_actags[i]; + + for (i = 0; i < 8; i++) { + if (link->fn_desired_actags[i]) { + actag_count = assign_fn_actags( + link->fn_desired_actags[i], + total_desired); + link->fn_actags[i].start = range_start; + link->fn_actags[i].count = actag_count; + range_start += actag_count; + WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); + } + pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", + link->domain, link->bus, link->dev, i, + link->fn_actags[i].start, link->fn_actags[i].count, + link->fn_desired_actags[i]); + } + link->assignment_done = true; +} + +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + struct npu_link *link; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + /* + * On p9, we only have 64 actags per link, so they must be + * shared by all the functions of the same adapter. We counted + * the desired actag counts during PCI enumeration, so that we + * can allocate a pro-rated number of actags to each function. + */ + if (!link->assignment_done) + assign_actags(link); + + *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; + *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; + *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; + + mutex_unlock(&links_list_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); + +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) +{ + struct npu_link *link; + int i, rc = -EINVAL; + + /* + * The number of PASIDs (process address space ID) which can + * be used by a function depends on how many functions exist + * on the device. The NPU needs to be configured to know how + * many bits are available to PASIDs and how many are to be + * used by the function BDF indentifier. + * + * We only support one AFU-carrying function for now. + */ + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + + for (i = 0; i < 8; i++) + if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { + *count = PNV_OCXL_PASID_MAX; + rc = 0; + break; + } + + mutex_unlock(&links_list_lock); + dev_dbg(&dev->dev, "%d PASIDs available for function\n", + rc ? 0 : *count); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) { diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h new file mode 100644 index 000000000000..3526fa996a22 --- /dev/null +++ b/include/misc/ocxl-config.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _OCXL_CONFIG_H_ +#define _OCXL_CONFIG_H_ + +/* + * This file lists the various constants used to read the + * configuration space of an opencapi adapter. + * + * It follows the specification for opencapi 3.0 + */ + +#define OCXL_EXT_CAP_ID_DVSEC 0x23 + +#define OCXL_DVSEC_VENDOR_OFFSET 0x4 +#define OCXL_DVSEC_ID_OFFSET 0x8 +#define OCXL_DVSEC_TL_ID 0xF000 +#define OCXL_DVSEC_TL_BACKOFF_TIMERS 0x10 +#define OCXL_DVSEC_TL_RECV_CAP 0x18 +#define OCXL_DVSEC_TL_SEND_CAP 0x20 +#define OCXL_DVSEC_TL_RECV_RATE 0x30 +#define OCXL_DVSEC_TL_SEND_RATE 0x50 +#define OCXL_DVSEC_FUNC_ID 0xF001 +#define OCXL_DVSEC_FUNC_OFF_INDEX 0x08 +#define OCXL_DVSEC_FUNC_OFF_ACTAG 0x0C +#define OCXL_DVSEC_AFU_INFO_ID 0xF003 +#define OCXL_DVSEC_AFU_INFO_AFU_IDX 0x0A +#define OCXL_DVSEC_AFU_INFO_OFF 0x0C +#define OCXL_DVSEC_AFU_INFO_DATA 0x10 +#define OCXL_DVSEC_AFU_CTRL_ID 0xF004 +#define OCXL_DVSEC_AFU_CTRL_AFU_IDX 0x0A +#define OCXL_DVSEC_AFU_CTRL_TERM_PASID 0x0C +#define OCXL_DVSEC_AFU_CTRL_ENABLE 0x0F +#define OCXL_DVSEC_AFU_CTRL_PASID_SUP 0x10 +#define OCXL_DVSEC_AFU_CTRL_PASID_EN 0x11 +#define OCXL_DVSEC_AFU_CTRL_PASID_BASE 0x14 +#define OCXL_DVSEC_AFU_CTRL_ACTAG_SUP 0x18 +#define OCXL_DVSEC_AFU_CTRL_ACTAG_EN 0x1A +#define OCXL_DVSEC_AFU_CTRL_ACTAG_BASE 0x1C +#define OCXL_DVSEC_VENDOR_ID 0xF0F0 +#define OCXL_DVSEC_VENDOR_CFG_VERS 0x0C +#define OCXL_DVSEC_VENDOR_TLX_VERS 0x10 +#define OCXL_DVSEC_VENDOR_DLX_VERS 0x20 + +#endif /* _OCXL_CONFIG_H_ */ -- cgit v1.2.3 From aeddad1760aeb206d912b27b230269407efd5b06 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 23 Jan 2018 12:31:42 +0100 Subject: ocxl: Add AFU interrupt support Add user APIs through ioctl to allocate, free, and be notified of an AFU interrupt. For opencapi, an AFU can trigger an interrupt on the host by sending a specific command targeting a 64-bit object handle. On POWER9, this is implemented by mapping a special page in the address space of a process and a write to that page will trigger an interrupt. Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-ocxl.h | 3 + arch/powerpc/platforms/powernv/ocxl.c | 30 ++++++ drivers/misc/ocxl/afu_irq.c | 197 ++++++++++++++++++++++++++++++++++ drivers/misc/ocxl/context.c | 51 ++++++++- drivers/misc/ocxl/file.c | 34 ++++++ drivers/misc/ocxl/link.c | 28 +++++ drivers/misc/ocxl/ocxl_internal.h | 7 ++ include/uapi/misc/ocxl.h | 9 ++ 8 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 drivers/misc/ocxl/afu_irq.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 398d05b30600..f6945d3bc971 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -30,4 +30,7 @@ extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, extern void pnv_ocxl_spa_release(void *platform_data); extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle); +extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +extern void pnv_ocxl_free_xive_irq(u32 irq); + #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 1faaa4ef6903..fa9b53af3c7b 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -2,6 +2,7 @@ // Copyright 2017 IBM Corp. #include #include +#include #include #include "pci.h" @@ -483,3 +484,32 @@ int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle) return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe); + +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) +{ + __be64 flags, trigger_page; + s64 rc; + u32 hwirq; + + hwirq = xive_native_alloc_irq(); + if (!hwirq) + return -ENOENT; + + rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, + NULL); + if (rc || !trigger_page) { + xive_native_free_irq(hwirq); + return -ENOENT; + } + *irq = hwirq; + *trigger_addr = be64_to_cpu(trigger_page); + return 0; + +} +EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); + +void pnv_ocxl_free_xive_irq(u32 irq) +{ + xive_native_free_irq(irq); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c new file mode 100644 index 000000000000..f40d853de401 --- /dev/null +++ b/drivers/misc/ocxl/afu_irq.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include "ocxl_internal.h" + +struct afu_irq { + int id; + int hw_irq; + unsigned int virq; + char *name; + u64 trigger_page; + struct eventfd_ctx *ev_ctx; +}; + +static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) +{ + return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; +} + +static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) +{ + return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); +} + +static irqreturn_t afu_irq_handler(int virq, void *data) +{ + struct afu_irq *irq = (struct afu_irq *) data; + + if (irq->ev_ctx) + eventfd_signal(irq->ev_ctx, 1); + return IRQ_HANDLED; +} + +static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) +{ + int rc; + + irq->virq = irq_create_mapping(NULL, irq->hw_irq); + if (!irq->virq) { + pr_err("irq_create_mapping failed\n"); + return -ENOMEM; + } + pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq); + + irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq); + if (!irq->name) { + irq_dispose_mapping(irq->virq); + return -ENOMEM; + } + + rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq); + if (rc) { + kfree(irq->name); + irq->name = NULL; + irq_dispose_mapping(irq->virq); + pr_err("request_irq failed: %d\n", rc); + return rc; + } + return 0; +} + +static void release_afu_irq(struct afu_irq *irq) +{ + free_irq(irq->virq, irq); + irq_dispose_mapping(irq->virq); + kfree(irq->name); +} + +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) +{ + struct afu_irq *irq; + int rc; + + irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL); + if (!irq) + return -ENOMEM; + + /* + * We limit the number of afu irqs per context and per link to + * avoid a single process or user depleting the pool of IPIs + */ + + mutex_lock(&ctx->irq_lock); + + irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT, + GFP_KERNEL); + if (irq->id < 0) { + rc = -ENOSPC; + goto err_unlock; + } + + rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq, + &irq->trigger_page); + if (rc) + goto err_idr; + + rc = setup_afu_irq(ctx, irq); + if (rc) + goto err_alloc; + + *irq_offset = irq_id_to_offset(ctx, irq->id); + + mutex_unlock(&ctx->irq_lock); + return 0; + +err_alloc: + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); +err_idr: + idr_remove(&ctx->irq_idr, irq->id); +err_unlock: + mutex_unlock(&ctx->irq_lock); + kfree(irq); + return rc; +} + +static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) +{ + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, + irq_id_to_offset(ctx, irq->id), + 1 << PAGE_SHIFT, 1); + release_afu_irq(irq); + if (irq->ev_ctx) + eventfd_ctx_put(irq->ev_ctx); + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); + kfree(irq); +} + +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + mutex_unlock(&ctx->irq_lock); + return -EINVAL; + } + idr_remove(&ctx->irq_idr, irq->id); + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); + return 0; +} + +void ocxl_afu_irq_free_all(struct ocxl_context *ctx) +{ + struct afu_irq *irq; + int id; + + mutex_lock(&ctx->irq_lock); + idr_for_each_entry(&ctx->irq_idr, irq, id) + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); +} + +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) +{ + struct afu_irq *irq; + struct eventfd_ctx *ev_ctx; + int rc = 0, id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + rc = -EINVAL; + goto unlock; + } + + ev_ctx = eventfd_ctx_fdget(eventfd); + if (IS_ERR(ev_ctx)) { + rc = -EINVAL; + goto unlock; + } + + irq->ev_ctx = ev_ctx; +unlock: + mutex_unlock(&ctx->irq_lock); + return rc; +} + +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + u64 addr = 0; + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (irq) + addr = irq->trigger_page; + mutex_unlock(&ctx->irq_lock); + return addr; +} diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index b34b836f924c..269149490063 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -31,6 +31,8 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, mutex_init(&ctx->mapping_lock); init_waitqueue_head(&ctx->events_wq); mutex_init(&ctx->xsl_error_lock); + mutex_init(&ctx->irq_lock); + idr_init(&ctx->irq_idr); /* * Keep a reference on the AFU to make sure it's valid for the * duration of the life of the context @@ -80,6 +82,19 @@ out: return rc; } +static int map_afu_irq(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 trigger_addr; + + trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); + if (!trigger_addr) + return VM_FAULT_SIGBUS; + + vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); + return VM_FAULT_NOPAGE; +} + static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address, u64 offset, struct ocxl_context *ctx) { @@ -118,7 +133,10 @@ static int ocxl_mmap_fault(struct vm_fault *vmf) pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, ctx->pasid, vmf->address, offset); - rc = map_pp_mmio(vma, vmf->address, offset, ctx); + if (offset < ctx->afu->irq_base_offset) + rc = map_pp_mmio(vma, vmf->address, offset, ctx); + else + rc = map_afu_irq(vma, vmf->address, offset, ctx); return rc; } @@ -126,6 +144,30 @@ static const struct vm_operations_struct ocxl_vmops = { .fault = ocxl_mmap_fault, }; +static int check_mmap_afu_irq(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + /* only one page */ + if (vma_pages(vma) != 1) + return -EINVAL; + + /* check offset validty */ + if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT)) + return -EINVAL; + + /* + * trigger page should only be accessible in write mode. + * + * It's a bit theoretical, as a page mmaped with only + * PROT_WRITE is currently readable, but it doesn't hurt. + */ + if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) || + !(vma->vm_flags & VM_WRITE)) + return -EINVAL; + vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC); + return 0; +} + static int check_mmap_mmio(struct ocxl_context *ctx, struct vm_area_struct *vma) { @@ -139,7 +181,10 @@ int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) { int rc; - rc = check_mmap_mmio(ctx, vma); + if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset) + rc = check_mmap_mmio(ctx, vma); + else + rc = check_mmap_afu_irq(ctx, vma); if (rc) return rc; @@ -224,6 +269,8 @@ void ocxl_context_free(struct ocxl_context *ctx) idr_remove(&ctx->afu->contexts_idr, ctx->pasid); mutex_unlock(&ctx->afu->contexts_lock); + ocxl_afu_irq_free_all(ctx); + idr_destroy(&ctx->irq_idr); /* reference to the AFU taken in ocxl_context_init */ ocxl_afu_put(ctx->afu); kfree(ctx); diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 6f0befda6a8a..c90c1a578d2f 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -103,12 +103,17 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, } #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ + x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ + x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ + x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ "UNKNOWN") static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long args) { struct ocxl_context *ctx = file->private_data; + struct ocxl_ioctl_irq_fd irq_fd; + u64 irq_offset; long rc; pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, @@ -123,6 +128,35 @@ static long afu_ioctl(struct file *file, unsigned int cmd, (struct ocxl_ioctl_attach __user *) args); break; + case OCXL_IOCTL_IRQ_ALLOC: + rc = ocxl_afu_irq_alloc(ctx, &irq_offset); + if (!rc) { + rc = copy_to_user((u64 __user *) args, &irq_offset, + sizeof(irq_offset)); + if (rc) + ocxl_afu_irq_free(ctx, irq_offset); + } + break; + + case OCXL_IOCTL_IRQ_FREE: + rc = copy_from_user(&irq_offset, (u64 __user *) args, + sizeof(irq_offset)); + if (rc) + return -EFAULT; + rc = ocxl_afu_irq_free(ctx, irq_offset); + break; + + case OCXL_IOCTL_IRQ_SET_FD: + rc = copy_from_user(&irq_fd, (u64 __user *) args, + sizeof(irq_fd)); + if (rc) + return -EFAULT; + if (irq_fd.reserved) + return -EINVAL; + rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset, + irq_fd.eventfd); + break; + default: rc = -EINVAL; } diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index 64d7a98c904a..8bdcef9c3cba 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -601,3 +601,31 @@ unlock: mutex_unlock(&spa->spa_lock); return rc; } + +int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) +{ + struct link *link = (struct link *) link_handle; + int rc, irq; + u64 addr; + + if (atomic_dec_if_positive(&link->irq_available) < 0) + return -ENOSPC; + + rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); + if (rc) { + atomic_inc(&link->irq_available); + return rc; + } + + *hw_irq = irq; + *trigger_addr = addr; + return 0; +} + +void ocxl_link_free_irq(void *link_handle, int hw_irq) +{ + struct link *link = (struct link *) link_handle; + + pnv_ocxl_free_xive_irq(hw_irq); + atomic_inc(&link->irq_available); +} diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 04fc160c7bd5..a89b88ac67eb 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -190,4 +190,11 @@ extern void ocxl_context_free(struct ocxl_context *ctx); extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); +extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx); +extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, + int eventfd); +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); + #endif /* _OCXL_INTERNAL_H_ */ diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h index a37e34edf52f..4b0b0b756f3e 100644 --- a/include/uapi/misc/ocxl.h +++ b/include/uapi/misc/ocxl.h @@ -32,9 +32,18 @@ struct ocxl_ioctl_attach { __u64 reserved3; }; +struct ocxl_ioctl_irq_fd { + __u64 irq_offset; + __s32 eventfd; + __u32 reserved; +}; + /* ioctl numbers */ #define OCXL_MAGIC 0xCA /* AFU devices */ #define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach) +#define OCXL_IOCTL_IRQ_ALLOC _IOR(OCXL_MAGIC, 0x11, __u64) +#define OCXL_IOCTL_IRQ_FREE _IOW(OCXL_MAGIC, 0x12, __u64) +#define OCXL_IOCTL_IRQ_SET_FD _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd) #endif /* _UAPI_MISC_OCXL_H */ -- cgit v1.2.3 From 003948c54e5b5034a9bbb4923336f5aba125eae6 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Tue, 23 Jan 2018 18:06:30 -0800 Subject: USB: serial: keyspan: Drop firmware Kconfig options The USB_SERIAL_KEYSPAN_* firmware options no longer do anything. Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware") Signed-off-by: Benjamin Gilbert Cc: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- arch/mips/configs/rm200_defconfig | 9 ---- arch/powerpc/configs/c2k_defconfig | 12 ------ arch/powerpc/configs/g5_defconfig | 12 ------ arch/powerpc/configs/maple_defconfig | 12 ------ arch/powerpc/configs/pmac32_defconfig | 12 ------ drivers/usb/serial/Kconfig | 78 ----------------------------------- 6 files changed, 135 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 99679e514042..5f71aa598b06 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -325,15 +325,6 @@ CONFIG_USB_SERIAL_EDGEPORT=m CONFIG_USB_SERIAL_EDGEPORT_TI=m CONFIG_USB_SERIAL_KEYSPAN_PDA=m CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_KLSI=m CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index f1552af9eecc..4bb832a41d55 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -272,18 +272,6 @@ CONFIG_USB_SERIAL_EDGEPORT=m CONFIG_USB_SERIAL_EDGEPORT_TI=m CONFIG_USB_SERIAL_KEYSPAN_PDA=m CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_KLSI=m CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 063817fee61c..67c39f4acede 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -189,18 +189,6 @@ CONFIG_USB_SERIAL_GARMIN=m CONFIG_USB_SERIAL_IPW=m CONFIG_USB_SERIAL_KEYSPAN_PDA=m CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_KLSI=m CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 078cdb427fc9..59e47ec85336 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -82,18 +82,6 @@ CONFIG_USB_SERIAL_CYPRESS_M8=m CONFIG_USB_SERIAL_GARMIN=m CONFIG_USB_SERIAL_IPW=m CONFIG_USB_SERIAL_KEYSPAN=y -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_TI=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 1aab9a62a681..62948d198d7f 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -264,18 +264,6 @@ CONFIG_USB_SERIAL_VISOR=m CONFIG_USB_SERIAL_IPAQ=m CONFIG_USB_SERIAL_KEYSPAN_PDA=m CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_APPLEDISPLAY=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_EXT2_FS=y diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index a8d5f2e4878d..716a3aa142ff 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -322,84 +322,6 @@ config USB_SERIAL_KEYSPAN To compile this driver as a module, choose M here: the module will be called keyspan. -config USB_SERIAL_KEYSPAN_MPR - bool "USB Keyspan MPR Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the Keyspan MPR converter. - -config USB_SERIAL_KEYSPAN_USA28 - bool "USB Keyspan USA-28 Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-28 converter. - -config USB_SERIAL_KEYSPAN_USA28X - bool "USB Keyspan USA-28X Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-28X converter. - Be sure you have a USA-28X, there are also 28XA and 28XB - models, the label underneath has the actual part number. - -config USB_SERIAL_KEYSPAN_USA28XA - bool "USB Keyspan USA-28XA Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-28XA converter. - Be sure you have a USA-28XA, there are also 28X and 28XB - models, the label underneath has the actual part number. - -config USB_SERIAL_KEYSPAN_USA28XB - bool "USB Keyspan USA-28XB Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-28XB converter. - Be sure you have a USA-28XB, there are also 28X and 28XA - models, the label underneath has the actual part number. - -config USB_SERIAL_KEYSPAN_USA19 - bool "USB Keyspan USA-19 Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-19 converter. - -config USB_SERIAL_KEYSPAN_USA18X - bool "USB Keyspan USA-18X Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-18X converter. - -config USB_SERIAL_KEYSPAN_USA19W - bool "USB Keyspan USA-19W Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-19W converter. - -config USB_SERIAL_KEYSPAN_USA19QW - bool "USB Keyspan USA-19QW Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-19QW converter. - -config USB_SERIAL_KEYSPAN_USA19QI - bool "USB Keyspan USA-19QI Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-19QI converter. - -config USB_SERIAL_KEYSPAN_USA49W - bool "USB Keyspan USA-49W Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-49W converter. - -config USB_SERIAL_KEYSPAN_USA49WLC - bool "USB Keyspan USA-49WLC Firmware" - depends on USB_SERIAL_KEYSPAN && FIRMWARE_IN_KERNEL - help - Say Y here to include firmware for the USA-49WLC converter. - config USB_SERIAL_KLSI tristate "USB KL5KUSB105 (Palmconnect) Driver" ---help--- -- cgit v1.2.3 From 7f55c733b6607d6bfc16057641f18280e587cca8 Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Tue, 23 Jan 2018 18:06:31 -0800 Subject: firmware: Drop FIRMWARE_IN_KERNEL Kconfig option It doesn't actually do anything. Merge its help text into EXTRA_FIRMWARE. Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware") Fixes: 0946b2fb38fd ("firmware: cleanup FIRMWARE_IN_KERNEL message") Signed-off-by: Benjamin Gilbert Signed-off-by: Robin H. Johnson Signed-off-by: Greg Kroah-Hartman --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - arch/arc/configs/haps_hs_defconfig | 1 - arch/arc/configs/haps_hs_smp_defconfig | 1 - arch/arc/configs/hsdk_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - arch/arc/configs/tb10x_defconfig | 1 - arch/arc/configs/vdk_hs38_defconfig | 1 - arch/arc/configs/vdk_hs38_smp_defconfig | 1 - arch/arm/configs/cns3420vb_defconfig | 1 - arch/arm/configs/magician_defconfig | 1 - arch/arm/configs/mini2440_defconfig | 1 - arch/arm/configs/mv78xx0_defconfig | 1 - arch/arm/configs/mxs_defconfig | 1 - arch/arm/configs/orion5x_defconfig | 1 - arch/arm/configs/tegra_defconfig | 1 - arch/arm/configs/vf610m4_defconfig | 1 - arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/mips/configs/ar7_defconfig | 1 - arch/mips/configs/ath25_defconfig | 1 - arch/mips/configs/ath79_defconfig | 1 - arch/mips/configs/pic32mzda_defconfig | 1 - arch/mips/configs/qi_lb60_defconfig | 1 - arch/mips/configs/rt305x_defconfig | 1 - arch/mips/configs/xway_defconfig | 1 - arch/mn10300/configs/asb2364_defconfig | 1 - arch/powerpc/configs/44x/warp_defconfig | 1 - arch/powerpc/configs/mpc512x_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/powerpc/configs/wii_defconfig | 1 - arch/s390/configs/zfcpdump_defconfig | 1 - arch/sh/configs/polaris_defconfig | 1 - arch/tile/configs/tilegx_defconfig | 1 - arch/tile/configs/tilepro_defconfig | 1 - drivers/base/Kconfig | 28 +++++----------------------- 53 files changed, 5 insertions(+), 75 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index ec7c849a5c8e..09f85154c5a4 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -44,7 +44,6 @@ CONFIG_IP_PNP_RARP=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 63d3cf69e0b0..09fed3ef22b6 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -44,7 +44,6 @@ CONFIG_IP_PNP_RARP=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index f613ecac14a7..ea2f6d817d1a 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -45,7 +45,6 @@ CONFIG_IP_PNP_RARP=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index db04ea4dd2d9..ab231c040efe 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -40,7 +40,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ARC is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 3507be2af6fe..cf449cbf440d 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -43,7 +43,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ARC is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..128d615d1ca7 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -32,7 +32,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 6dff83a238b8..31c2c70b34a1 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -36,7 +36,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y CONFIG_ARC_EMAC=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 31ee51b987e7..a578c721d50f 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -40,7 +40,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 8d3b1f67cae4..37d7395f3272 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -39,7 +39,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 6168ce2ac2ef..1e1470e2a7f0 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -35,7 +35,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index a70bdeb2b3fd..084a6e42685b 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -36,7 +36,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y CONFIG_EZCHIP_NPS_MANAGEMENT_ENET=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index ef96406c446e..f36d47990415 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -39,7 +39,6 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ARC is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index f30182549395..1aca2e8fd1ba 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -42,7 +42,6 @@ CONFIG_IP_MULTICAST=y # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_NETDEVICES=y # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 4fcf4f2503f6..f629493929ea 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -31,7 +31,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 7b71464f6c2f..21f0ca26a05d 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -34,7 +34,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/cns3420vb_defconfig b/arch/arm/configs/cns3420vb_defconfig index 63a953d855a6..c6dcd6e4f4e6 100644 --- a/arch/arm/configs/cns3420vb_defconfig +++ b/arch/arm/configs/cns3420vb_defconfig @@ -28,7 +28,6 @@ CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="console=ttyS0,38400 mem=128M root=/dev/mmcblk0p1 ro rootwait" CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig index ec5674c229a3..de5be2fc7306 100644 --- a/arch/arm/configs/magician_defconfig +++ b/arch/arm/configs/magician_defconfig @@ -54,7 +54,6 @@ CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=m CONFIG_BT_HCIBTUSB=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/mini2440_defconfig b/arch/arm/configs/mini2440_defconfig index cf7dcb2c86e6..88ea02e7ba19 100644 --- a/arch/arm/configs/mini2440_defconfig +++ b/arch/arm/configs/mini2440_defconfig @@ -77,7 +77,6 @@ CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y CONFIG_MAC80211_LEDS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_CONNECTOR=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 752e2e74de5b..0448bd8075ac 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -37,7 +37,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index e5822ab01b7d..bbfb6759447b 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -46,7 +46,6 @@ CONFIG_CAN_FLEXCAN=m # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index b831baddae02..bf9046331f6e 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -60,7 +60,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_NET_DSA=y CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 6678f2929356..9dccd305132b 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -76,7 +76,6 @@ CONFIG_RFKILL_INPUT=y CONFIG_RFKILL_GPIO=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_TEGRA_GMI=y diff --git a/arch/arm/configs/vf610m4_defconfig b/arch/arm/configs/vf610m4_defconfig index b7ecb83a95b6..a89f035c3b01 100644 --- a/arch/arm/configs/vf610m4_defconfig +++ b/arch/arm/configs/vf610m4_defconfig @@ -23,7 +23,6 @@ CONFIG_BINFMT_SHARED_FLAT=y # CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=4 diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 5b5fa9831b4d..c911adf52b53 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -313,7 +313,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_PARPORT=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 72a7764b74ed..0e32b8566179 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -311,7 +311,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 884b43a2f0d9..e873c6f97f00 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -311,7 +311,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_PARPORT=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index fcfa60d31499..7b42cc1c2e61 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -309,7 +309,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 9d597bbbbbfe..9f4b83541c31 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -311,7 +311,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 45da20d1286c..64094bf74822 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -313,7 +313,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_SWIM=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index fda880c10861..9ae1f91ea690 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -323,7 +323,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_PARPORT=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7d5e4863efec..31586b2f852e 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -308,7 +308,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7763b71a9c49..7fd5e2de8a50 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -309,7 +309,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 17eaebfa3e19..0339309b5175 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -309,7 +309,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_PARPORT=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index d1cb7a04ae1d..5aef62938f32 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -306,7 +306,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index ea3a331c62d5..45df9a6559e5 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -306,7 +306,6 @@ CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_TEST_ASYNC_DRIVER_PROBE=m CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index 92fca3c42eac..5651f4d8f45c 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -82,7 +82,6 @@ CONFIG_MAC80211=m CONFIG_MAC80211_RC_PID=y CONFIG_MAC80211_RC_DEFAULT_PID=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y diff --git a/arch/mips/configs/ath25_defconfig b/arch/mips/configs/ath25_defconfig index 2c829950be17..b8d48038e74f 100644 --- a/arch/mips/configs/ath25_defconfig +++ b/arch/mips/configs/ath25_defconfig @@ -38,7 +38,6 @@ CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_DEBUGFS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2 diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 25ed914933e5..951c4231bdb8 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -39,7 +39,6 @@ CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_DEBUGFS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2 diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig index 52192c632ae8..41190c2036e6 100644 --- a/arch/mips/configs/pic32mzda_defconfig +++ b/arch/mips/configs/pic32mzda_defconfig @@ -26,7 +26,6 @@ CONFIG_BINFMT_MISC=m # CONFIG_SUSPEND is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_BLK_DEV_LOOP=m CONFIG_SCSI=y diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig index 3f1333517405..3b02ff9a7c64 100644 --- a/arch/mips/configs/qi_lb60_defconfig +++ b/arch/mips/configs/qi_lb60_defconfig @@ -42,7 +42,6 @@ CONFIG_TCP_CONG_WESTWOOD=y # CONFIG_TCP_CONG_HTCP is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_NAND=y diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index c695b7b1c4ae..dbe6a4639d05 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -76,7 +76,6 @@ CONFIG_VLAN_8021Q=y CONFIG_NET_SCHED=y CONFIG_HAMRADIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 4365108bef77..fa750d501c11 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -75,7 +75,6 @@ CONFIG_VLAN_8021Q=y CONFIG_NET_SCHED=y CONFIG_HAMRADIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/mn10300/configs/asb2364_defconfig b/arch/mn10300/configs/asb2364_defconfig index b1d80cee97ee..a84c3153f22a 100644 --- a/arch/mn10300/configs/asb2364_defconfig +++ b/arch/mn10300/configs/asb2364_defconfig @@ -44,7 +44,6 @@ CONFIG_IPV6=y # CONFIG_INET6_XFRM_MODE_TRANSPORT is not set # CONFIG_INET6_XFRM_MODE_TUNNEL is not set # CONFIG_INET6_XFRM_MODE_BEET is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_DEBUG=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index b5c866073efd..6c02f53271cd 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -28,7 +28,6 @@ CONFIG_NETFILTER=y CONFIG_VLAN_8021Q=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index 10be5773ad5d..c2b1c4404683 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -39,7 +39,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index da0e8d535eb8..7ee736f20774 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -347,7 +347,6 @@ CONFIG_MAC80211_DEBUGFS=y CONFIG_NET_9P=m CONFIG_NET_9P_VIRTIO=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_PARPORT=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2efa025bf483..187e2f7c12c8 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -64,7 +64,6 @@ CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m # CONFIG_MAC80211_RC_MINSTREL is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65535 diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 9c7400a19e9d..0b0f78823a1b 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -43,7 +43,6 @@ CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 04e042edbab7..7dc7f58c4287 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -26,7 +26,6 @@ CONFIG_NET=y # CONFIG_IUCV is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set # CONFIG_DCSSBLK is not set diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig index 0a432b5f50e7..87641b7d6c4e 100644 --- a/arch/sh/configs/polaris_defconfig +++ b/arch/sh/configs/polaris_defconfig @@ -38,7 +38,6 @@ CONFIG_IP_MULTICAST=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 9f94435cc44f..357a4c271ad4 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -159,7 +159,6 @@ CONFIG_DNS_RESOLVER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 1c5bd4f8ffca..da2858755fa1 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -289,7 +289,6 @@ CONFIG_DNS_RESOLVER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index bdc87907d6a1..698db8549686 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -86,10 +86,9 @@ config FW_LOADER require userspace firmware loading support, but a module built out-of-tree does. -config FIRMWARE_IN_KERNEL - bool "Include in-kernel firmware blobs in kernel binary" +config EXTRA_FIRMWARE + string "External firmware blobs to build into the kernel binary" depends on FW_LOADER - default y help Various drivers in the kernel source tree may require firmware, which is generally available in your distribution's linux-firmware @@ -99,23 +98,6 @@ config FIRMWARE_IN_KERNEL /lib/firmware/ on your system, so they can be loaded by userspace helpers on request. - Enabling this option will build each required firmware blob - specified by EXTRA_FIRMWARE into the kernel directly, where - request_firmware() will find them without having to call out to - userspace. This may be useful if your root file system requires a - device that uses such firmware and you do not wish to use an - initrd. - - This single option controls the inclusion of firmware for - every driver that uses request_firmware(), which avoids a - proliferation of 'Include firmware for xxx device' options. - - Say 'N' and let firmware be loaded from userspace. - -config EXTRA_FIRMWARE - string "External firmware blobs to build into the kernel binary" - depends on FW_LOADER - help This option allows firmware to be built into the kernel for the case where the user either cannot or doesn't want to provide it from userspace at runtime (for example, when the firmware in question is @@ -126,11 +108,11 @@ config EXTRA_FIRMWARE firmware files -- the same names that appear in MODULE_FIRMWARE() and request_firmware() in the source. These files should exist under the directory specified by the EXTRA_FIRMWARE_DIR option, which is - by default the firmware subdirectory of the kernel source tree. + /lib/firmware by default. For example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin", copy - the usb8388.bin file into the firmware directory, and build the kernel. - Then any request_firmware("usb8388.bin") will be satisfied internally + the usb8388.bin file into /lib/firmware, and build the kernel. Then + any request_firmware("usb8388.bin") will be satisfied internally without needing to call out to userspace. WARNING: If you include additional firmware files into your binary -- cgit v1.2.3 From 53fbf5719aab34e747ec32a41ff55bed07d16406 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Jan 2018 23:33:43 +0100 Subject: bpf, ppc64: remove obsolete exception handling from div/mod Since we've changed div/mod exception handling for src_reg in eBPF verifier itself, remove the leftovers from ppc64 JIT. Signed-off-by: Daniel Borkmann Cc: Naveen N. Rao Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 217a78e84865..0a34b0cec7b7 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -381,10 +381,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - PPC_CMPWI(src_reg, 0); - PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); if (BPF_OP(code) == BPF_MOD) { PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); PPC_MULW(b2p[TMP_REG_1], src_reg, @@ -395,10 +391,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ - PPC_CMPDI(src_reg, 0); - PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); if (BPF_OP(code) == BPF_MOD) { PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); PPC_MULD(b2p[TMP_REG_1], src_reg, -- cgit v1.2.3 From 64ba3dc7bf7cb0cbc89821db54edfe5180726fbf Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:46 -0600 Subject: powerpc/eeh: Update VF config space after EEH Add EEH platform operations for pseries to update VF config space. With this change after EEH, the VF will have updated config space for pseries platform. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh.c | 59 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/eeh-powernv.c | 65 ++-------------------------- arch/powerpc/platforms/pseries/eeh_pseries.c | 26 ++++++++++- 4 files changed, 88 insertions(+), 63 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5161c37dd039..82829c65f31a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -297,6 +297,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); +int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cbca0a667682..cc649809885e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -740,6 +740,65 @@ static void *eeh_restore_dev_state(void *data, void *userdata) return NULL; } +int eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 961e64115d92..0665b6d03cb3 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1655,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & 0x10) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= 0x10; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; - s64 ret; + s64 ret = 0; int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) @@ -1732,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) * to be exported by firmware in extendible way. */ if (edev->physfn) { - ret = pnv_eeh_restore_vf_config(pdn); + ret = eeh_restore_vf_config(pdn); } else { phb = pdn->phb->private_data; ret = opal_pci_reinit(phb->opal_id, @@ -1745,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) return -EIO; } - return 0; + return ret; } static struct eeh_ops pnv_eeh_ops = { diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 2295f117e2d3..a671ef4f57f5 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -708,6 +708,30 @@ static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 return rtas_write_config(pdn, where, size, val); } +static int pseries_eeh_restore_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + s64 ret = 0; + + if (!edev) + return -EEXIST; + + /* + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) + ret = eeh_restore_vf_config(pdn); + + if (ret) { + pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", + __func__, edev->pe_config_addr, ret); + return -EIO; + } + + return ret; +} + static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, @@ -723,7 +747,7 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = NULL + .restore_config = pseries_eeh_restore_config }; /** -- cgit v1.2.3 From 856e1eb9bdd4bd703907925be112519ff65d991f Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:47 -0600 Subject: PCI/AER: Add uevents in AER and EEH error/resume Devices can go offline when erors reported. This patch adds a change to the kernel object and lets udev know of error. When device resumes, a change is also set reporting device as online. Therefore, EEH and AER events are better propagated to user space for PCI devices in all arches. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 6 ++++++ drivers/pci/pcie/aer/aerdrv_core.c | 3 +++ include/linux/pci.h | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3c0fa99c5533..beea2182d754 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -228,6 +228,7 @@ static void *eeh_report_error(void *data, void *userdata) edev->in_error = true; eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); return NULL; } @@ -381,6 +382,10 @@ static void *eeh_report_resume(void *data, void *userdata) driver->err_handler->resume(dev); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); +#ifdef CONFIG_PCI_IOV + eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); +#endif return NULL; } @@ -416,6 +421,7 @@ static void *eeh_report_failure(void *data, void *userdata) driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); eeh_pcid_put(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); return NULL; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 744805232155..8d7448063fd1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -278,6 +278,7 @@ static int report_error_detected(struct pci_dev *dev, void *data) } else { err_handler = dev->driver->err_handler; vote = err_handler->error_detected(dev, result_data->state); + pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); } result_data->result = merge_result(result_data->result, vote); @@ -341,6 +342,7 @@ static int report_resume(struct pci_dev *dev, void *data) err_handler = dev->driver->err_handler; err_handler->resume(dev); + pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); out: device_unlock(&dev->dev); return 0; @@ -541,6 +543,7 @@ static void do_recovery(struct pci_dev *dev, int severity) return; failed: + pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ dev_info(&dev->dev, "AER: Device recovery failed\n"); } diff --git a/include/linux/pci.h b/include/linux/pci.h index e3e94467687a..405630441b74 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2277,6 +2277,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } +/** + * pci_uevent_ers - emit a uevent during recovery path of pci device + * @pdev: pci device to check + * @err_type: type of error event + * + */ +static inline void pci_uevent_ers(struct pci_dev *pdev, + enum pci_ers_result err_type) +{ + int idx = 0; + char *envp[3]; + + switch (err_type) { + case PCI_ERS_RESULT_NONE: + case PCI_ERS_RESULT_CAN_RECOVER: + envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + case PCI_ERS_RESULT_RECOVERED: + envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=1"; + break; + case PCI_ERS_RESULT_DISCONNECT: + envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + default: + break; + } + + if (idx > 0) { + envp[idx++] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); + } +} + /* provide the legacy pci_dma_* API */ #include -- cgit v1.2.3 From 565a744dd2bedde8a8abe8827748bdceb20444ee Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:48 -0600 Subject: powerpc/pseries: Set eeh_pe of EEH_PE_VF type To correctly use EEH code one has to make sure that the EEH_PE_VF is set for dynamic created VFs. Therefore this patch allocates an eeh_pe of eeh type EEH_PE_VF and associates PE with parent. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 3 ++- arch/powerpc/platforms/pseries/eeh_pseries.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 9f66ddebb799..b98a44667333 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -211,7 +211,8 @@ struct pci_dn { unsigned int *pe_num_map; /* PE# for the first VF PE or array */ bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) - int (*m64_map)[PCI_SRIOV_NUM_BARS]; + int (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */ + int last_allow_rc; /* Only used on pseries */ #endif /* CONFIG_PCI_IOV */ int mps; /* Maximum Payload Size */ struct list_head child_list; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index a671ef4f57f5..428bfd10bd5a 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -55,9 +55,12 @@ static int ibm_get_config_addr_info; static int ibm_get_config_addr_info2; static int ibm_configure_pe; +#ifdef CONFIG_PCI_IOV void pseries_pcibios_bus_add_device(struct pci_dev *pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); + struct pci_dn *physfn_pdn; + struct eeh_dev *edev; if (!pdev->is_virtfn) return; @@ -65,6 +68,15 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) pdn->device_id = pdev->device; pdn->vendor_id = pdev->vendor; pdn->class_code = pdev->class; + /* + * Last allow unfreeze return code used for retrieval + * by user space in eeh-sysfs to show the last command + * completion from platform. + */ + pdn->last_allow_rc = 0; + physfn_pdn = pci_get_pdn(pdev->physfn); + pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; + edev = pdn_to_eeh_dev(pdn); /* * The following operations will fail if VF's sysfs files @@ -72,9 +84,13 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) */ eeh_add_device_early(pdn); eeh_add_device_late(pdev); + edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ + eeh_add_to_parent_pe(edev); /* Add as VF PE type */ eeh_sysfs_add_device(pdev); } +#endif /* * Buffer for reporting slot-error-detail rtas calls. Its here @@ -141,8 +157,10 @@ static int pseries_eeh_init(void) /* Set EEH probe mode */ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); +#ifdef CONFIG_PCI_IOV /* Set EEH machine dependent code */ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; +#endif return 0; } -- cgit v1.2.3 From 67923cfcfa1427351bc5fa9d05f1c2a2e7cb9399 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:49 -0600 Subject: powerpc/eeh: Add EEH operations to notify resume When pseries SR-IOV is enabled and after a PF driver has resumed from EEH, platform has to be notified of the event so the child VFs can be allowed to resume their normal recovery path. This patch makes the EEH operation allow unfreeze platform dependent code and adds the call to pseries EEH code. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/platforms/powernv/eeh-powernv.c | 3 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 96 +++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 82829c65f31a..fd37cc101f4f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -214,6 +214,7 @@ struct eeh_ops { int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); int (*restore_config)(struct pci_dn *pdn); + int (*notify_resume)(struct pci_dn *pdn); }; extern int eeh_subsystem_flags; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 0665b6d03cb3..33c86c1a1720 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1704,7 +1704,8 @@ static struct eeh_ops pnv_eeh_ops = { .read_config = pnv_eeh_read_config, .write_config = pnv_eeh_write_config, .next_error = pnv_eeh_next_error, - .restore_config = pnv_eeh_restore_config + .restore_config = pnv_eeh_restore_config, + .notify_resume = NULL }; #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 428bfd10bd5a..823cb27efa8b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -750,6 +750,97 @@ static int pseries_eeh_restore_config(struct pci_dn *pdn) return ret; } +#ifdef CONFIG_PCI_IOV +int pseries_send_allow_unfreeze(struct pci_dn *pdn, + u16 *vf_pe_array, int cur_vfs) +{ + int rc; + int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); + unsigned long buid, addr; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL, + addr, + BUID_HI(buid), + BUID_LO(buid), + rtas_data_buf, cur_vfs * sizeof(u16)); + spin_unlock(&rtas_data_buf_lock); + if (rc) + pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n", + __func__, + pdn->phb->global_number, addr, rc); + return rc; +} + +static int pseries_call_allow_unfreeze(struct eeh_dev *edev) +{ + struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; + int cur_vfs = 0, rc = 0, vf_index, bus, devfn; + u16 *vf_pe_array; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) { + if (edev->pdev->is_physfn) { + cur_vfs = pci_num_vf(edev->pdev); + pdn = eeh_dev_to_pdn(edev); + parent = pdn->parent; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) + vf_pe_array[vf_index] = + cpu_to_be16(pdn->pe_num_map[vf_index]); + rc = pseries_send_allow_unfreeze(pdn, vf_pe_array, + cur_vfs); + pdn->last_allow_rc = rc; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) { + list_for_each_entry_safe(pdn, tmp, + &parent->child_list, + list) { + bus = pci_iov_virtfn_bus(edev->pdev, + vf_index); + devfn = pci_iov_virtfn_devfn(edev->pdev, + vf_index); + if (pdn->busno != bus || + pdn->devfn != devfn) + continue; + pdn->last_allow_rc = rc; + } + } + } else { + pdn = pci_get_pdn(edev->pdev); + vf_pe_array[0] = cpu_to_be16(pdn->pe_number); + physfn_pdn = pci_get_pdn(edev->physfn); + rc = pseries_send_allow_unfreeze(physfn_pdn, + vf_pe_array, 1); + pdn->last_allow_rc = rc; + } + } + + kfree(vf_pe_array); + return rc; +} + +static int pseries_notify_resume(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + + if (!edev) + return -EEXIST; + + if (rtas_token("ibm,open-sriov-allow-unfreeze") + == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + if (edev->pdev->is_physfn || edev->pdev->is_virtfn) + return pseries_call_allow_unfreeze(edev); + + return 0; +} +#endif + static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, @@ -765,7 +856,10 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = pseries_eeh_restore_config + .restore_config = pseries_eeh_restore_config, +#ifdef CONFIG_PCI_IOV + .notify_resume = pseries_notify_resume +#endif }; /** -- cgit v1.2.3 From 6ea3df6931128acf8989687d674731e22af0b731 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:50 -0600 Subject: powerpc/eeh: Add EEH notify resume sysfs Introduce a method for notify resume to be called from sysfs. In this patch one can now call notify resume from sysfs when is supported by platform. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey [mpe: Add NULL check, add empty versions to avoid #ifdefs] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_sysfs.c | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 2fb6a2d09461..deed906dd8f1 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -90,6 +90,65 @@ static ssize_t eeh_pe_state_store(struct device *dev, static DEVICE_ATTR_RW(eeh_pe_state); +#ifdef CONFIG_PCI_IOV +static ssize_t eeh_notify_resume_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!edev || !edev->pe) + return -ENODEV; + + pdn = pci_get_pdn(pdev); + return sprintf(buf, "%d\n", pdn->last_allow_rc); +} + +static ssize_t eeh_notify_resume_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); + + if (!edev || !edev->pe || !eeh_ops->notify_resume) + return -ENODEV; + + if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + return -EIO; + + return count; +} +static DEVICE_ATTR_RW(eeh_notify_resume); + +static int eeh_notify_resume_add(struct pci_dev *pdev) +{ + struct device_node *np; + int rc = 0; + + np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn); + + if (of_property_read_bool(np, "ibm,is-open-sriov-pf")) + rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume); + + return rc; +} + +static void eeh_notify_resume_remove(struct pci_dev *pdev) +{ + struct device_node *np; + + np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn); + + if (of_property_read_bool(np, "ibm,is-open-sriov-pf")) + device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume); +} +#else +static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; } +static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { } +#endif /* CONFIG_PCI_IOV */ + void eeh_sysfs_add_device(struct pci_dev *pdev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); @@ -104,6 +163,7 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state); + rc += eeh_notify_resume_add(pdev); if (rc) pr_warn("EEH: Unable to create sysfs entries\n"); @@ -129,6 +189,8 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state); + eeh_notify_resume_remove(pdev); + if (edev) edev->mode &= ~EEH_DEV_SYSFS; } -- cgit v1.2.3 From 9a7f6b438664ed79dacdd345269aed429218d091 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:51 -0600 Subject: powerpc/pseries/pci: Associate PEs to VFs in configure SR-IOV After initial validation of SR-IOV resources, firmware will associate PEs to the dynamic VFs created within this call. This patch adds the association of PEs to the PF array of PE numbers indexed by VF. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pci.c | 150 ++++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 48d3af026f90..eab96637d6cf 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -59,16 +59,162 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif #ifdef CONFIG_PCI_IOV +#define MAX_VFS_FOR_MAP_PE 256 +struct pe_map_bar_entry { + __be64 bar; /* Input: Virtual Function BAR */ + __be16 rid; /* Input: Virtual Function Router ID */ + __be16 pe_num; /* Output: Virtual Function PE Number */ + __be32 reserved; /* Reserved Space */ +}; + +int pseries_send_map_pe(struct pci_dev *pdev, + u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) +{ + struct pci_dn *pdn; + int rc; + unsigned long buid, addr; + int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number"); + + if (ibm_map_pes == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + pdn = pci_get_pdn(pdev); + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, + RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), + rtas_data_buf, + num_vfs * sizeof(struct pe_map_bar_entry)); + memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + if (rc) + dev_err(&pdev->dev, + "%s: Failed to associate pes PE#%lx, rc=%x\n", + __func__, addr, rc); + + return rc; +} + +void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num); + dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), + pdn->pe_num_map[vf_index]); +} + +int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int i, rc, vf_index; + struct pe_map_bar_entry *vf_pe_array; + struct resource *res; + u64 size; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + + pdn = pci_get_pdn(pdev); + /* create firmware structure to associate pes */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + pdn->pe_num_map[vf_index] = IODA_INVALID_PE; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->parent) + continue; + size = pcibios_iov_resource_alignment(pdev, i + + PCI_IOV_RESOURCES); + vf_pe_array[vf_index].bar = + cpu_to_be64(res->start + size * vf_index); + vf_pe_array[vf_index].rid = + cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index) + << 8) | pci_iov_virtfn_devfn(pdev, + vf_index)); + vf_pe_array[vf_index].pe_num = + cpu_to_be16(IODA_INVALID_PE); + } + } + + rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array); + /* Only zero is success */ + if (!rc) + for (vf_index = 0; vf_index < num_vfs; vf_index++) + pseries_set_pe_num(pdev, vf_index, + vf_pe_array[vf_index].pe_num); + + kfree(vf_pe_array); + return rc; +} + +int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int rc; + const int *max_vfs; + int max_config_vfs; + struct device_node *dn = pci_device_to_OF_node(pdev); + + max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL); + + if (!max_vfs) + return -EINVAL; + + /* First integer stores max config */ + max_config_vfs = of_read_number(&max_vfs[0], 1); + if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) { + dev_err(&pdev->dev, + "Num VFs %x > %x Configurable VFs\n", + num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ? + MAX_VFS_FOR_MAP_PE : max_config_vfs); + return -EINVAL; + } + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map = kmalloc_array(num_vfs, + sizeof(*pdn->pe_num_map), + GFP_KERNEL); + if (!pdn->pe_num_map) + return -ENOMEM; + + rc = pseries_associate_pes(pdev, num_vfs); + + /* Anything other than zero is failure */ + if (rc) { + dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc); + kfree(pdn->pe_num_map); + } else { + pci_vf_drivers_autoprobe(pdev, false); + } + + return rc; +} + int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) { /* Allocate PCI data */ add_dev_pci_data(pdev); - pci_vf_drivers_autoprobe(pdev, false); - return 0; + return pseries_pci_sriov_enable(pdev, num_vfs); } int pseries_pcibios_sriov_disable(struct pci_dev *pdev) { + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); /* Release PCI data */ remove_dev_pci_data(pdev); pci_vf_drivers_autoprobe(pdev, true); -- cgit v1.2.3 From fc5f622163637308a3d520a315527481cff023f5 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:52 -0600 Subject: powerpc/pseries: Add Initialization of VF Bars When enabling SR-IOV in pseries platform, the VF bar properties for a PF are reported on the device node in the device tree. This patch adds the IOV Bar resources to Linux structures from the device tree for later use when configuring SR-IOV by PF driver. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci.h | 2 + arch/powerpc/kernel/pci_of_scan.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 164 +++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 8dc32eacc97c..d82802ff5088 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -121,6 +121,8 @@ extern int remove_phb_dynamic(struct pci_controller *phb); extern struct pci_dev *of_create_pci_dev(struct device_node *node, struct pci_bus *bus, int devfn); +extern unsigned int pci_parse_of_flags(u32 addr0, int bridge); + extern void of_scan_pci_bridge(struct pci_dev *dev); extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 0d790f8432d2..20ceec4a5f5e 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def) * @addr0: value of 1st cell of a device tree PCI address. * @bridge: Set this flag if the address is from a bridge 'ranges' property */ -static unsigned int pci_parse_of_flags(u32 addr0, int bridge) +unsigned int pci_parse_of_flags(u32 addr0, int bridge) { unsigned int flags = 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 42e9cf0b3180..372d7ada1a0c 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -492,6 +492,162 @@ static void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); } +#ifdef CONFIG_PCI_IOV +enum rtas_iov_fw_value_map { + NUM_RES_PROPERTY = 0, /* Number of Resources */ + LOW_INT = 1, /* Lowest 32 bits of Address */ + START_OF_ENTRIES = 2, /* Always start of entry */ + APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */ + WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */ + NEXT_ENTRY = 7 /* Go to next entry on array */ +}; + +enum get_iov_fw_value_index { + BAR_ADDRS = 1, /* Get Bar Address */ + APERTURE_SIZE = 2, /* Get Aperture Size */ + WDW_SIZE = 3 /* Get Window Size */ +}; + +resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(dev); + int i, num_res, ret = 0; + + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return 0; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bar + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + if (resno >= num_res) + return 0; /* or an errror */ + + i = START_OF_ENTRIES + NEXT_ENTRY * resno; + switch (value) { + case BAR_ADDRS: + ret = of_read_number(&indexes[i], 2); + break; + case APERTURE_SIZE: + ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + break; + case WDW_SIZE: + ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + break; + } + + return ret; +} + +void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +{ + struct resource *res; + resource_size_t base, size; + int i, r, num_res; + + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + res->flags = pci_parse_of_flags(of_read_number + (&indexes[i + LOW_INT], 1), 0); + res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + } +} + +void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +{ + struct resource *res, *root, *conflict; + resource_size_t base, size; + int i, r, num_res; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bars assign them from firmware into resources structure. + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + root = &iomem_resource; + dev_dbg(&dev->dev, + "pSeries IOV BAR %d: trying firmware assignment %pR\n", + r + PCI_IOV_RESOURCES, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", + r + PCI_IOV_RESOURCES, res, + conflict->name, conflict); + res->flags |= IORESOURCE_UNSET; + } + } +} + +static void pseries_pci_fixup_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise dont configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return; + /* Assign the addresses from device tree*/ + of_pci_set_vf_bar_size(pdev, indexes); +} + +static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + if (!pdev->is_physfn || pdev->is_added) + return; + /*Firmware must support open sriov otherwise dont configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return; + /* Assign the addresses from device tree*/ + of_pci_parse_iov_addrs(pdev, indexes); +} + +static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + const __be32 *reg; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise report regular alignment*/ + reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL); + if (!reg) + return pci_iov_resource_size(pdev, resno); + + if (!pdev->is_physfn) + return 0; + return pseries_get_iov_fw_value(pdev, + resno - PCI_IOV_RESOURCES, + APERTURE_SIZE); +} +#endif + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -525,6 +681,14 @@ static void __init pSeries_setup_arch(void) vpa_init(boot_cpuid); ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_fixup_resources = + pseries_pci_fixup_resources; + ppc_md.pcibios_fixup_sriov = + pseries_pci_fixup_iov_resources; + ppc_md.pcibios_iov_resource_alignment = + pseries_pci_iov_resource_alignment; +#endif } else { /* No special idle routine */ ppc_md.enable_pmcs = power4_enable_pmcs; -- cgit v1.2.3 From 5c8136fa1af7c0e9b4aec89cf2832f6e5197ce32 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 23 Jan 2018 14:22:50 +0100 Subject: powerpc/mm/nohash: do not flush the entire mm when range is a single page Most of the time, flush_tlb_range() is called on single pages. At the time being, flush_tlb_range() inconditionnaly calls flush_tlb_mm() which flushes at least the entire PID pages and on older CPUs like 4xx or 8xx it flushes the entire TLB table. This patch calls flush_tlb_page() instead of flush_tlb_mm() when the range is a single page. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb_nohash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bfc4a0869609..15fe5f0c8665 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -388,7 +388,10 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - flush_tlb_mm(vma->vm_mm); + if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK)) + flush_tlb_page(vma, start); + else + flush_tlb_mm(vma->vm_mm); } EXPORT_SYMBOL(flush_tlb_range); -- cgit v1.2.3 From 902bdc57451c2c64aa139bbe24067f70a186db0a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 23 Oct 2017 19:07:02 +1100 Subject: powerpc/powernv/idoa: Remove unnecessary pcidev from pci_dn The pcidev value stored in pci_dn is only used for NPU/NPU2 initialization. We can easily drop the cached pointer and use an ancient helper - pci_get_domain_bus_and_slot() instead in order to reduce complexity. Signed-off-by: Alexey Kardashevskiy Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 -- arch/powerpc/platforms/powernv/npu-dma.c | 5 ++++- arch/powerpc/platforms/powernv/pci-ioda.c | 3 --- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b98a44667333..94d449031b18 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -197,8 +197,6 @@ struct pci_dn { struct iommu_table_group *table_group; /* for phb's or bridges */ int pci_ext_config_space; /* for pci devices */ - - struct pci_dev *pcidev; /* back-pointer to the pci device */ #ifdef CONFIG_EEH struct eeh_dev *edev; /* eeh device */ #endif diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index c5899c107d59..0a253b64ac5f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -39,7 +39,10 @@ */ static struct pci_dev *get_pci_dev(struct device_node *dn) { - return PCI_DN(dn)->pcidev; + struct pci_dn *pdn = PCI_DN(dn); + + return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), + pdn->busno, pdn->devfn); } /* Given a NPU device get the associated PCI device. */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 886014ed85fe..398cc5e4b91b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1082,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) * At some point we want to remove the PDN completely anyways */ pci_dev_get(dev); - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; @@ -1129,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) continue; pe->device_count++; - pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_same_PE(dev->subordinate, pe); @@ -1244,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; - npu_pdn->pcidev = npu_pdev; npu_pdn->pe_number = pe_num; phb->ioda.pe_rmap[rid] = pe->pe_number; -- cgit v1.2.3 From 384dfd627f1ee67d028e6f14c6e9bf5a1e2a7a24 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 28 Nov 2017 13:39:43 -0600 Subject: powerpc/kernel: Block interrupts when updating TIDR clear_thread_tidr() is called in interrupt context as a part of delayed put of the task structure (i.e as a part of timer interrupt). To prevent a deadlock, block interrupts when holding vas_thread_id_lock to set/ clear TIDR for a task. Fixes: ec233ede4c86 ("powerpc: Add support for setting SPRN_TIDR") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2fd01c9045ba..69af583dce98 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1515,14 +1515,15 @@ static int assign_thread_tidr(void) { int index; int err; + unsigned long flags; again: if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL)) return -ENOMEM; - spin_lock(&vas_thread_id_lock); + spin_lock_irqsave(&vas_thread_id_lock, flags); err = ida_get_new_above(&vas_thread_ida, 1, &index); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); if (err == -EAGAIN) goto again; @@ -1530,9 +1531,9 @@ again: return err; if (index > MAX_THREAD_CONTEXT) { - spin_lock(&vas_thread_id_lock); + spin_lock_irqsave(&vas_thread_id_lock, flags); ida_remove(&vas_thread_ida, index); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); return -ENOMEM; } @@ -1541,9 +1542,11 @@ again: static void free_thread_tidr(int id) { - spin_lock(&vas_thread_id_lock); + unsigned long flags; + + spin_lock_irqsave(&vas_thread_id_lock, flags); ida_remove(&vas_thread_ida, id); - spin_unlock(&vas_thread_id_lock); + spin_unlock_irqrestore(&vas_thread_id_lock, flags); } /* -- cgit v1.2.3 From a346137e9142b039fd13af2e59696e3d40c487ef Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Tue, 28 Nov 2017 16:58:36 -0600 Subject: powerpc/numa: Use ibm,max-associativity-domains to discover possible nodes On powerpc systems which allow 'hot-add' of CPU or memory resources, it may occur that the new resources are to be inserted into nodes that were not used for these resources at bootup. In the kernel, any node that is used must be defined and initialized. These empty nodes may occur when, * Dedicated vs. shared resources. Shared resources require information such as the VPHN hcall for CPU assignment to nodes. Associativity decisions made based on dedicated resource rules, such as associativity properties in the device tree, may vary from decisions made using the values returned by the VPHN hcall. * memoryless nodes at boot. Nodes need to be defined as 'possible' at boot for operation with other code modules. Previously, the powerpc code would limit the set of possible nodes to those which have memory assigned at boot, and were thus online. Subsequent add/remove of CPUs or memory would only work with this subset of possible nodes. * memoryless nodes with CPUs at boot. Due to the previous restriction on nodes, nodes that had CPUs but no memory were being collapsed into other nodes that did have memory at boot. In practice this meant that the node assignment presented by the runtime kernel differed from the affinity and associativity attributes presented by the device tree or VPHN hcalls. Nodes that might be known to the pHyp were not 'possible' in the runtime kernel because they did not have memory at boot. This patch ensures that sufficient nodes are defined to support configuration requirements after boot, as well as at boot. This patch set fixes a couple of problems. * Nodes known to powerpc to be memoryless at boot, but to have CPUs in them are allowed to be 'possible' and 'online'. Memory allocations for those nodes are taken from another node that does have memory until and if memory is hot-added to the node. * Nodes which have no resources assigned at boot, but which may still be referenced subsequently by affinity or associativity attributes, are kept in the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs to the system can reference these nodes and bring them online instead of redirecting to one of the set of nodes that were known to have memory at boot. This patch extracts the value of the lowest domain level (number of allocable resources) from the device tree property "ibm,max-associativity-domains" to use as the maximum number of nodes to setup as possibly available in the system. This new setting will override the instruction: nodes_and(node_possible_map, node_possible_map, node_online_map); presently seen in the function arch/powerpc/mm/numa.c:initmem_init(). If the "ibm,max-associativity-domains" property is not present at boot, no operation will be performed to define or enable additional nodes, or enable the above 'nodes_and()'. Signed-off-by: Michael Bringmann Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 268c7a2d9a5b..f9cd40cd5485 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -810,6 +810,34 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) NODE_DATA(nid)->node_spanned_pages = spanned_pages; } +static void __init find_possible_nodes(void) +{ + struct device_node *rtas; + u32 numnodes, i; + + if (min_common_depth <= 0) + return; + + rtas = of_find_node_by_path("/rtas"); + if (!rtas) + return; + + if (of_property_read_u32_index(rtas, + "ibm,max-associativity-domains", + min_common_depth, &numnodes)) + goto out; + + for (i = 0; i < numnodes; i++) { + if (!node_possible(i)) { + setup_node_data(i, 0, 0); + node_set(i, node_possible_map); + } + } + +out: + of_node_put(rtas); +} + void __init initmem_init(void) { int nid, cpu; @@ -823,12 +851,15 @@ void __init initmem_init(void) memblock_dump_all(); /* - * Reduce the possible NUMA nodes to the online NUMA nodes, - * since we do not support node hotplug. This ensures that we - * lower the maximum NUMA node ID to what is actually present. + * Modify the set of possible NUMA nodes to reflect information + * available about the set of online nodes, and the set of nodes + * that we expect to make use of for this platform's affinity + * calculations. */ nodes_and(node_possible_map, node_possible_map, node_online_map); + find_possible_nodes(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; -- cgit v1.2.3 From ea05ba7c559c8e5a5946c3a94a2a266e9a6680a6 Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Tue, 28 Nov 2017 16:58:40 -0600 Subject: powerpc/numa: Ensure nodes initialized for hotplug This patch fixes some problems encountered at runtime with configurations that support memory-less nodes, or that hot-add CPUs into nodes that are memoryless during system execution after boot. The problems of interest include: * Nodes known to powerpc to be memoryless at boot, but to have CPUs in them are allowed to be 'possible' and 'online'. Memory allocations for those nodes are taken from another node that does have memory until and if memory is hot-added to the node. * Nodes which have no resources assigned at boot, but which may still be referenced subsequently by affinity or associativity attributes, are kept in the list of 'possible' nodes for powerpc. Hot-add of memory or CPUs to the system can reference these nodes and bring them online instead of redirecting the references to one of the set of nodes known to have memory at boot. Note that this software operates under the context of CPU hotplug. We are not doing memory hotplug in this code, but rather updating the kernel's CPU topology (i.e. arch_update_cpu_topology / numa_update_cpu_topology). We are initializing a node that may be used by CPUs or memory before it can be referenced as invalid by a CPU hotplug operation. CPU hotplug operations are protected by a range of APIs including cpu_maps_update_begin/cpu_maps_update_done, cpus_read/write_lock / cpus_read/write_unlock, device locks, and more. Memory hotplug operations, including try_online_node, are protected by mem_hotplug_begin/mem_hotplug_done, device locks, and more. In the case of CPUs being hot-added to a previously memoryless node, the try_online_node operation occurs wholly within the CPU locks with no overlap. Using HMC hot-add/hot-remove operations, we have been able to add and remove CPUs to any possible node without failures. HMC operations involve a degree self-serialization, though. Signed-off-by: Michael Bringmann Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f9cd40cd5485..1bead2c67272 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -486,7 +486,7 @@ static int numa_setup_cpu(unsigned long lcpu) nid = of_node_to_nid_single(cpu); out_present: - if (nid < 0 || !node_online(nid)) + if (nid < 0 || !node_possible(nid)) nid = first_online_node; map_cpu_to_node(lcpu, nid); @@ -828,10 +828,8 @@ static void __init find_possible_nodes(void) goto out; for (i = 0; i < numnodes; i++) { - if (!node_possible(i)) { - setup_node_data(i, 0, 0); + if (!node_possible(i)) node_set(i, node_possible_map); - } } out: @@ -1200,6 +1198,40 @@ static long vphn_get_associativity(unsigned long cpu, return rc; } +static inline int find_and_online_cpu_nid(int cpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + int new_nid; + + /* Use associativity from first thread for all siblings */ + vphn_get_associativity(cpu, associativity); + new_nid = associativity_to_nid(associativity); + if (new_nid < 0 || !node_possible(new_nid)) + new_nid = first_online_node; + + if (NODE_DATA(new_nid) == NULL) { +#ifdef CONFIG_MEMORY_HOTPLUG + /* + * Need to ensure that NODE_DATA is initialized for a node from + * available memory (see memblock_alloc_try_nid). If unable to + * init the node, then default to nearest node that has memory + * installed. + */ + if (try_online_node(new_nid)) + new_nid = first_online_node; +#else + /* + * Default to using the nearest node that has memory installed. + * Otherwise, it would be necessary to patch the kernel MM code + * to deal with more memoryless-node error conditions. + */ + new_nid = first_online_node; +#endif + } + + return new_nid; +} + /* * Update the CPU maps and sysfs entries for a single CPU when its NUMA * characteristics change. This function doesn't perform any locking and is @@ -1267,7 +1299,6 @@ int numa_update_cpu_topology(bool cpus_locked) { unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; - __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; cpumask_t updated_cpus; struct device *dev; int weight, new_nid, i = 0; @@ -1305,11 +1336,7 @@ int numa_update_cpu_topology(bool cpus_locked) continue; } - /* Use associativity from first thread for all siblings */ - vphn_get_associativity(cpu, associativity); - new_nid = associativity_to_nid(associativity); - if (new_nid < 0 || !node_online(new_nid)) - new_nid = first_online_node; + new_nid = find_and_online_cpu_nid(cpu); if (new_nid == numa_cpu_lookup_table[cpu]) { cpumask_andnot(&cpu_associativity_changes_mask, -- cgit v1.2.3 From e67e02a544e9a0d24b99eb383e808bb3433b048d Mon Sep 17 00:00:00 2001 From: Michael Bringmann Date: Tue, 28 Nov 2017 16:58:43 -0600 Subject: powerpc/pseries: Fix cpu hotplug crash with memoryless nodes On powerpc systems with shared configurations of CPUs and memory and memoryless nodes at boot, an event ordering problem was observed on a SLES12 build platforms with the hot-add of CPUs to the memoryless nodes. * The most common error occurred when the memory SLAB driver attempted to reference the memoryless node to which a CPU was being added before the kernel had finished initializing all of the data structures for the CPU and exited 'device_online' under DLPAR/hot-add. Normally the memoryless node would be initialized through the call path device_online ... arch_update_cpu_topology ... find_cpu_nid ... try_online_node. This patch ensures that the powerpc node will be initialized as early as possible, even if it was memoryless and CPU-less at the point when we are trying to hot-add a new CPU to it. Signed-off-by: Michael Bringmann Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 4 +++- arch/powerpc/platforms/pseries/hotplug-cpu.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 1bead2c67272..314d19ab9385 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1198,7 +1198,7 @@ static long vphn_get_associativity(unsigned long cpu, return rc; } -static inline int find_and_online_cpu_nid(int cpu) +int find_and_online_cpu_nid(int cpu) { __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; int new_nid; @@ -1229,6 +1229,8 @@ static inline int find_and_online_cpu_nid(int cpu) #endif } + pr_debug("%s:%d cpu %d nid %d\n", __FUNCTION__, __LINE__, + cpu, new_nid); return new_nid; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a7d14aa7bb7c..dceb51454d8d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -340,6 +340,8 @@ static void pseries_remove_processor(struct device_node *np) cpu_maps_update_done(); } +extern int find_and_online_cpu_nid(int cpu); + static int dlpar_online_cpu(struct device_node *dn) { int rc = 0; @@ -364,6 +366,7 @@ static int dlpar_online_cpu(struct device_node *dn) != CPU_STATE_OFFLINE); cpu_maps_update_done(); timed_topology_update(1); + find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); if (rc) goto out; -- cgit v1.2.3 From 77720c82915a8b7797e0041af95707d7485b4a40 Mon Sep 17 00:00:00 2001 From: Julia Cartwright Date: Tue, 21 Mar 2017 17:43:03 -0500 Subject: powerpc/mpc52xx_gpt: make use of raw_spinlock variants The mpc52xx_gpt code currently implements an irq_chip for handling interrupts; due to how irq_chip handling is done, it's necessary for the irq_chip methods to be invoked from hardirq context, even on a a real-time kernel. Because the spinlock_t type becomes a "sleeping" spinlock w/ RT kernels, it is not suitable to be used with irq_chips. A quick audit of the operations under the lock reveal that they do only minimal, bounded work, and are therefore safe to do under a raw spinlock. Signed-off-by: Julia Cartwright Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 52 +++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 9e974b1e1697..17cf249b18ee 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -90,7 +90,7 @@ struct mpc52xx_gpt_priv { struct list_head list; /* List of all GPT devices */ struct device *dev; struct mpc52xx_gpt __iomem *regs; - spinlock_t lock; + raw_spinlock_t lock; struct irq_domain *irqhost; u32 ipb_freq; u8 wdt_mode; @@ -141,9 +141,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d) struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static void mpc52xx_gpt_irq_mask(struct irq_data *d) @@ -151,9 +151,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d) struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static void mpc52xx_gpt_irq_ack(struct irq_data *d) @@ -171,14 +171,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type) dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type); - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK; if (flow_type & IRQF_TRIGGER_RISING) reg |= MPC52xx_GPT_MODE_ICT_RISING; if (flow_type & IRQF_TRIGGER_FALLING) reg |= MPC52xx_GPT_MODE_ICT_FALLING; out_be32(&gpt->regs->mode, reg); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -264,11 +264,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) /* If the GPT is currently disabled, then change it to be in Input * Capture mode. If the mode is non-zero, then the pin could be * already in use for something. */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); mode = in_be32(&gpt->regs->mode); if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0) out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq); } @@ -295,9 +295,9 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v) dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v); r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); } static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) @@ -307,9 +307,9 @@ static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio); - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -436,16 +436,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period, } /* Set and enable the timer, reject an attempt to use a wdt as gpt */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); if (as_wdt) gpt->wdt_mode |= MPC52xx_GPT_IS_WDT; else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return -EBUSY; } out_be32(&gpt->regs->count, prescale << 16 | clocks); clrsetbits_be32(&gpt->regs->mode, clear, set); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } @@ -476,14 +476,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt) unsigned long flags; /* reject the operation if the timer is used as watchdog (gpt 0 only) */ - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return -EBUSY; } clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); return 0; } EXPORT_SYMBOL(mpc52xx_gpt_stop_timer); @@ -500,9 +500,9 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt) u64 prescale; unsigned long flags; - spin_lock_irqsave(&gpt->lock, flags); + raw_spin_lock_irqsave(&gpt->lock, flags); period = in_be32(&gpt->regs->count); - spin_unlock_irqrestore(&gpt->lock, flags); + raw_spin_unlock_irqrestore(&gpt->lock, flags); prescale = period >> 16; period &= 0xffff; @@ -532,9 +532,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt) { unsigned long flags; - spin_lock_irqsave(&gpt_wdt->lock, flags); + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING); - spin_unlock_irqrestore(&gpt_wdt->lock, flags); + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); } /* wdt misc device api */ @@ -638,11 +638,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file) struct mpc52xx_gpt_priv *gpt_wdt = file->private_data; unsigned long flags; - spin_lock_irqsave(&gpt_wdt->lock, flags); + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); clrbits32(&gpt_wdt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN); gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT; - spin_unlock_irqrestore(&gpt_wdt->lock, flags); + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); #endif clear_bit(0, &wdt_is_active); return 0; @@ -723,7 +723,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) if (!gpt) return -ENOMEM; - spin_lock_init(&gpt->lock); + raw_spin_lock_init(&gpt->lock); gpt->dev = &ofdev->dev; gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); gpt->regs = of_iomap(ofdev->dev.of_node, 0); -- cgit v1.2.3 From 1d65b1c886be6111f2347bd1a548bec58da17ccf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 3 Nov 2017 10:56:32 +1100 Subject: powerpc/cell: Remove axonram driver The QS21/22 IBM Cell blades had a southbridge chip called Axon. This could have DDR DIMMs attached to it, though they were not directly usable as RAM, instead they could be used as some sort of buffer, if applications were written specifically to use the block device provided by the driver. Although the driver supposedly had direct access support, it was apparently never tested (see commit 91117a20245b ("axonram: Fix bug in direct_access")). These machines have not been available for over 5 years, and were never widely in use. It seems highly unlikely anyone is using this driver. In general we're happy to leave old drivers in the tree, but because DAX is involved this driver is caught up in the ongoing work in that area, but none of the DAX folks are able to test it. So remove the driver, if any one *is* using it, we'll be happy to put it back. Signed-off-by: Michael Ellerman --- Documentation/filesystems/dax.txt | 1 - arch/powerpc/platforms/Kconfig | 11 -- arch/powerpc/sysdev/Makefile | 1 - arch/powerpc/sysdev/axonram.c | 383 -------------------------------------- 4 files changed, 396 deletions(-) delete mode 100644 arch/powerpc/sysdev/axonram.c (limited to 'arch/powerpc') diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index 3be3b266be41..70cb68bed2e8 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -46,7 +46,6 @@ stall the CPU for an extended period, you should also not attempt to implement direct_access. These block devices may be used for inspiration: -- axonram: Axon DDR2 device driver - brd: RAM backed block device driver - dcssblk: s390 dcss block device driver - pmem: NVDIMM persistent memory driver diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 5a96a2763e4a..14ef17e10ec9 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -293,17 +293,6 @@ config CPM2 you wish to build a kernel for a machine with a CPM2 coprocessor on it (826x, 827x, 8560). -config AXON_RAM - tristate "Axon DDR2 memory device driver" - depends on PPC_IBM_CELL_BLADE && BLOCK - select DAX - default m - help - It registers one block device per Axon's DDR2 memory bank found - on a system. Block devices are called axonram?, their major and - minor numbers are available in /proc/devices, /proc/partitions or - in /sys/block/axonram?/dev. - config FSL_ULI1575 bool default n diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index fc31a271830b..9861407d644a 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -32,7 +32,6 @@ mv64x60-$(CONFIG_PCI) += mv64x60_pci.o obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \ mv64x60_udbg.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o -obj-$(CONFIG_AXON_RAM) += axonram.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c deleted file mode 100644 index 53b7f8ef570f..000000000000 --- a/arch/powerpc/sysdev/axonram.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * (C) Copyright IBM Deutschland Entwicklung GmbH 2006 - * - * Author: Maxim Shchetynin - * - * Axon DDR2 device driver. - * It registers one block device per Axon's DDR2 memory bank found on a system. - * Block devices are called axonram?, their major and minor numbers are - * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define AXON_RAM_MODULE_NAME "axonram" -#define AXON_RAM_DEVICE_NAME "axonram" -#define AXON_RAM_MINORS_PER_DISK 16 -#define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT -#define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT -#define AXON_RAM_SECTOR_SHIFT 9 -#define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT -#define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING - -static int azfs_major, azfs_minor; - -struct axon_ram_bank { - struct platform_device *device; - struct gendisk *disk; - struct dax_device *dax_dev; - unsigned int irq_id; - unsigned long ph_addr; - unsigned long io_addr; - unsigned long size; - unsigned long ecc_counter; -}; - -static ssize_t -axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct platform_device *device = to_platform_device(dev); - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - return sprintf(buf, "%ld\n", bank->ecc_counter); -} - -static DEVICE_ATTR(ecc, 0444, axon_ram_sysfs_ecc, NULL); - -/** - * axon_ram_irq_handler - interrupt handler for Axon RAM ECC - * @irq: interrupt ID - * @dev: pointer to of_device - */ -static irqreturn_t -axon_ram_irq_handler(int irq, void *dev) -{ - struct platform_device *device = dev; - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank); - - dev_err(&device->dev, "Correctable memory error occurred\n"); - bank->ecc_counter++; - return IRQ_HANDLED; -} - -/** - * axon_ram_make_request - make_request() method for block device - * @queue, @bio: see blk_queue_make_request() - */ -static blk_qc_t -axon_ram_make_request(struct request_queue *queue, struct bio *bio) -{ - struct axon_ram_bank *bank = bio->bi_disk->private_data; - unsigned long phys_mem, phys_end; - void *user_mem; - struct bio_vec vec; - unsigned int transfered; - struct bvec_iter iter; - - phys_mem = bank->io_addr + (bio->bi_iter.bi_sector << - AXON_RAM_SECTOR_SHIFT); - phys_end = bank->io_addr + bank->size; - transfered = 0; - bio_for_each_segment(vec, bio, iter) { - if (unlikely(phys_mem + vec.bv_len > phys_end)) { - bio_io_error(bio); - return BLK_QC_T_NONE; - } - - user_mem = page_address(vec.bv_page) + vec.bv_offset; - if (bio_data_dir(bio) == READ) - memcpy(user_mem, (void *) phys_mem, vec.bv_len); - else - memcpy((void *) phys_mem, user_mem, vec.bv_len); - - phys_mem += vec.bv_len; - transfered += vec.bv_len; - } - bio_endio(bio); - return BLK_QC_T_NONE; -} - -static const struct block_device_operations axon_ram_devops = { - .owner = THIS_MODULE, -}; - -static long -__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) -{ - resource_size_t offset = pgoff * PAGE_SIZE; - - *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); - return (bank->size - offset) / PAGE_SIZE; -} - -static long -axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) -{ - struct axon_ram_bank *bank = dax_get_private(dax_dev); - - return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); -} - -static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) -{ - return copy_from_iter(addr, bytes, i); -} - -static const struct dax_operations axon_ram_dax_ops = { - .direct_access = axon_ram_dax_direct_access, - .copy_from_iter = axon_ram_copy_from_iter, -}; - -/** - * axon_ram_probe - probe() method for platform driver - * @device: see platform_driver method - */ -static int axon_ram_probe(struct platform_device *device) -{ - static int axon_ram_bank_id = -1; - struct axon_ram_bank *bank; - struct resource resource; - int rc; - - axon_ram_bank_id++; - - dev_info(&device->dev, "Found memory controller on %pOF\n", - device->dev.of_node); - - bank = kzalloc(sizeof(*bank), GFP_KERNEL); - if (!bank) - return -ENOMEM; - - device->dev.platform_data = bank; - - bank->device = device; - - if (of_address_to_resource(device->dev.of_node, 0, &resource) != 0) { - dev_err(&device->dev, "Cannot access device tree\n"); - rc = -EFAULT; - goto failed; - } - - bank->size = resource_size(&resource); - - if (bank->size == 0) { - dev_err(&device->dev, "No DDR2 memory found for %s%d\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - rc = -ENODEV; - goto failed; - } - - dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); - - bank->ph_addr = resource.start; - bank->io_addr = (unsigned long) ioremap_prot( - bank->ph_addr, bank->size, _PAGE_NO_CACHE); - if (bank->io_addr == 0) { - dev_err(&device->dev, "ioremap() failed\n"); - rc = -EFAULT; - goto failed; - } - - bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK); - if (bank->disk == NULL) { - dev_err(&device->dev, "Cannot register disk\n"); - rc = -EFAULT; - goto failed; - } - - - bank->disk->major = azfs_major; - bank->disk->first_minor = azfs_minor; - bank->disk->fops = &axon_ram_devops; - bank->disk->private_data = bank; - - sprintf(bank->disk->disk_name, "%s%d", - AXON_RAM_DEVICE_NAME, axon_ram_bank_id); - - bank->dax_dev = alloc_dax(bank, bank->disk->disk_name, - &axon_ram_dax_ops); - if (!bank->dax_dev) { - rc = -ENOMEM; - goto failed; - } - - bank->disk->queue = blk_alloc_queue(GFP_KERNEL); - if (bank->disk->queue == NULL) { - dev_err(&device->dev, "Cannot register disk queue\n"); - rc = -EFAULT; - goto failed; - } - - set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); - blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); - device_add_disk(&device->dev, bank->disk); - - bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0); - if (!bank->irq_id) { - dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); - rc = -EFAULT; - goto failed; - } - - rc = request_irq(bank->irq_id, axon_ram_irq_handler, - AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); - if (rc != 0) { - dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); - bank->irq_id = 0; - rc = -EFAULT; - goto failed; - } - - rc = device_create_file(&device->dev, &dev_attr_ecc); - if (rc != 0) { - dev_err(&device->dev, "Cannot create sysfs file\n"); - rc = -EFAULT; - goto failed; - } - - azfs_minor += bank->disk->minors; - - return 0; - -failed: - if (bank->irq_id) - free_irq(bank->irq_id, device); - if (bank->disk != NULL) { - if (bank->disk->major > 0) - unregister_blkdev(bank->disk->major, - bank->disk->disk_name); - if (bank->disk->flags & GENHD_FL_UP) - del_gendisk(bank->disk); - put_disk(bank->disk); - } - kill_dax(bank->dax_dev); - put_dax(bank->dax_dev); - device->dev.platform_data = NULL; - if (bank->io_addr != 0) - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - return rc; -} - -/** - * axon_ram_remove - remove() method for platform driver - * @device: see of_platform_driver method - */ -static int -axon_ram_remove(struct platform_device *device) -{ - struct axon_ram_bank *bank = device->dev.platform_data; - - BUG_ON(!bank || !bank->disk); - - device_remove_file(&device->dev, &dev_attr_ecc); - free_irq(bank->irq_id, device); - kill_dax(bank->dax_dev); - put_dax(bank->dax_dev); - del_gendisk(bank->disk); - put_disk(bank->disk); - iounmap((void __iomem *) bank->io_addr); - kfree(bank); - - return 0; -} - -static const struct of_device_id axon_ram_device_id[] = { - { - .type = "dma-memory" - }, - {} -}; -MODULE_DEVICE_TABLE(of, axon_ram_device_id); - -static struct platform_driver axon_ram_driver = { - .probe = axon_ram_probe, - .remove = axon_ram_remove, - .driver = { - .name = AXON_RAM_MODULE_NAME, - .of_match_table = axon_ram_device_id, - }, -}; - -/** - * axon_ram_init - */ -static int __init -axon_ram_init(void) -{ - azfs_major = register_blkdev(azfs_major, AXON_RAM_DEVICE_NAME); - if (azfs_major < 0) { - printk(KERN_ERR "%s cannot become block device major number\n", - AXON_RAM_MODULE_NAME); - return -EFAULT; - } - azfs_minor = 0; - - return platform_driver_register(&axon_ram_driver); -} - -/** - * axon_ram_exit - */ -static void __exit -axon_ram_exit(void) -{ - platform_driver_unregister(&axon_ram_driver); - unregister_blkdev(azfs_major, AXON_RAM_DEVICE_NAME); -} - -module_init(axon_ram_init); -module_exit(axon_ram_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Maxim Shchetynin "); -MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE"); -- cgit v1.2.3 From d8fa82e0e2b1ad9c66aa76810615ad389b414d44 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 12 Oct 2017 15:44:32 +1100 Subject: powerpc/watchdog: Tweak watchdog printks Use pr_fmt() in the watchdog code, so we don't have to say "Watchdog" so many times. Rather than "CPU:%d" just spell it "CPU %d", "Hard" doesn't need a capital in the middle of a sentence, and "LOCKUP other CPUS" should be "LOCKUP on other CPUS". Also make it clear when a CPU self detects a lockup by spelling it out. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 0673230c0261..c00baea616ba 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -6,6 +6,9 @@ * * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c */ + +#define pr_fmt(fmt) "watchdog: " fmt + #include #include #include @@ -108,7 +111,7 @@ static inline void wd_smp_unlock(unsigned long *flags) static void wd_lockup_ipi(struct pt_regs *regs) { - pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id()); + pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id()); print_modules(); print_irqtrace_events(current); if (regs) @@ -149,8 +152,8 @@ static void watchdog_smp_panic(int cpu, u64 tb) if (cpumask_weight(&wd_smp_cpus_pending) == 0) goto out; - pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", + cpu, cpumask_pr_args(&wd_smp_cpus_pending)); if (!sysctl_hardlockup_all_cpu_backtrace) { /* @@ -193,7 +196,7 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { unsigned long flags; - pr_emerg("Watchdog CPU:%d became unstuck\n", cpu); + pr_emerg("CPU %d became unstuck\n", cpu); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); @@ -251,7 +254,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) } set_cpu_stuck(cpu, tb); - pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu); + pr_emerg("CPU %d self-detected hard LOCKUP\n", cpu); print_modules(); print_irqtrace_events(current); if (regs) @@ -406,7 +409,7 @@ int __init watchdog_nmi_probe(void) "powerpc/watchdog:online", start_wd_on_cpu, stop_wd_on_cpu); if (err < 0) { - pr_warn("Watchdog could not be initialized"); + pr_warn("could not be initialized"); return err; } return 0; -- cgit v1.2.3 From 3ba45b7e46e2ae7c059f3335120aed5e2c749bd9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 12 Oct 2017 15:44:33 +1100 Subject: powerpc/watchdog: regs can't be null in soft_nmi_interrupt() soft_nmi_interrupt() is called directly from the asm exception handling code, which passes regs as a pointer to the stack. So regs can't be NULL, it may be full of junk, but that's a separate problem. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index c00baea616ba..80a467eb532a 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -257,10 +257,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) pr_emerg("CPU %d self-detected hard LOCKUP\n", cpu); print_modules(); print_irqtrace_events(current); - if (regs) - show_regs(regs); - else - dump_stack(); + show_regs(regs); wd_smp_unlock(&flags); -- cgit v1.2.3 From 0bc00914010e3826a3d168d1c879f79800a11bae Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 12 Oct 2017 15:44:34 +1100 Subject: powerpc/watchdog: Print the NIP in soft_nmi_interrupt() When a CPU detects its locked up via soft_nmi_interrupt() we have pt_regs, so print the regs->nip, which points to where we took the soft-NMI. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 80a467eb532a..6256dc3b0087 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -254,7 +254,7 @@ void soft_nmi_interrupt(struct pt_regs *regs) } set_cpu_stuck(cpu, tb); - pr_emerg("CPU %d self-detected hard LOCKUP\n", cpu); + pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); print_modules(); print_irqtrace_events(current); show_regs(regs); -- cgit v1.2.3 From 015eb1b89e959c9349f0a01803fb8ed1ced36f09 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 30 Jan 2018 20:37:21 +1100 Subject: powerpc/mm/radix: Fix build error when RADIX_MMU=n The recent TLB flush rework broke the build when the Radix MMU is disabled at build time, eg: (.text+0x264): undefined reference to `.radix__tlbiel_all' We could add an empty version, but if we ever called it by accident that would indicate a bad bug, so add a stub that just WARNs if we do. Fixes: d4748276ae14 ("powerpc/64s: Improve local TLB flush for boot and MCE on POWER9") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-radix.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index b8f9ad587087..8eea90f80e45 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -11,7 +11,11 @@ static inline int mmu_get_ap(int psize) return mmu_psize_defs[psize].ap; } +#ifdef CONFIG_PPC_RADIX_MMU extern void radix__tlbiel_all(unsigned int action); +#else +static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }; +#endif extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 2e3ca40f03bb13709df40eff2f7fc157803fa5a3 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 Jan 2018 16:16:02 -0800 Subject: mm: relax deferred struct page requirements There is no need to have ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT, as all the page initialization code is in common code. Also, there is no need to depend on MEMORY_HOTPLUG, as initialization code does not really use hotplug memory functionality. So, we can remove this requirement as well. This patch allows to use deferred struct page initialization on all platforms with memblock allocator. Tested on x86, arm64, and sparc. Also, verified that code compiles on PPC with CONFIG_MEMORY_HOTPLUG disabled. Link: http://lkml.kernel.org/r/20171117014601.31606-1-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Acked-by: Heiko Carstens [s390] Reviewed-by: Khalid Aziz Acked-by: Michael Ellerman Acked-by: Michal Hocko Cc: Steven Sistare Cc: Daniel Jordan Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Kirill A. Shutemov Cc: Reza Arbab Cc: Martin Schwidefsky Cc: Thomas Gleixner Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/x86/Kconfig | 1 - mm/Kconfig | 7 +------ 4 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e92432ae9737..73fcf592ee91 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -151,7 +151,6 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9376637229c9..0105ce28e246 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -108,7 +108,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dbe5542a6666..7a1c51198af1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,7 +69,6 @@ config X86 select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS diff --git a/mm/Kconfig b/mm/Kconfig index 03ff7703d322..c782e8fb7235 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -639,15 +639,10 @@ config MAX_STACK_SIZE_MB A sane initial value is 80 MB. -# For architectures that support deferred memory initialisation -config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - bool - config DEFERRED_STRUCT_PAGE_INIT bool "Defer initialisation of struct pages to kthreads" default n - depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - depends on NO_BOOTMEM && MEMORY_HOTPLUG + depends on NO_BOOTMEM depends on !FLATMEM help Ordinarily all struct pages are initialised during early boot in a -- cgit v1.2.3 From 8cc931e03339eebbdbaa2ac1998d25a8a90b77d4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 31 Jan 2018 16:18:02 -0800 Subject: powerpc/mm: update pmdp_invalidate to return old pmd value It's required to avoid losing dirty and accessed bits. Link: http://lkml.kernel.org/r/20171213105756.69879-7-kirill.shutemov@linux.intel.com Signed-off-by: Aneesh Kumar K.V Signed-off-by: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++-- arch/powerpc/mm/pgtable-book3s64.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 44697817ccc6..ee19d5bbee06 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1137,8 +1137,8 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, } #define __HAVE_ARCH_PMDP_INVALIDATE -extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); +extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 3b65917785a5..422e80253a33 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -90,16 +90,19 @@ void serialize_against_pte_lookup(struct mm_struct *mm) * We use this to invalidate a pmdp entry before switching from a * hugepte to regular pmd entry. */ -void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, +pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + unsigned long old_pmd; + + old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. */ serialize_against_pte_lookup(vma->vm_mm); + return __pmd(old_pmd); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit v1.2.3 From 423ac9af3ceff967a77b0714781033629593b077 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 31 Jan 2018 16:18:24 -0800 Subject: mm/thp: remove pmd_huge_split_prepare() Instead of marking the pmd ready for split, invalidate the pmd. This should take care of powerpc requirement. Only side effect is that we mark the pmd invalid early. This can result in us blocking access to the page a bit longer if we race against a thp split. [kirill.shutemov@linux.intel.com: rebased, dirty THP once] Link: http://lkml.kernel.org/r/20171213105756.69879-13-kirill.shutemov@linux.intel.com Signed-off-by: Aneesh Kumar K.V Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Catalin Marinas Cc: David Daney Cc: David Miller Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michal Hocko Cc: Nitin Gupta Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 - arch/powerpc/include/asm/book3s/64/hash-64k.h | 2 - arch/powerpc/include/asm/book3s/64/pgtable.h | 9 ---- arch/powerpc/include/asm/book3s/64/radix.h | 6 --- arch/powerpc/mm/pgtable-hash64.c | 22 -------- include/asm-generic/pgtable.h | 8 --- mm/huge_memory.c | 72 +++++++++++++-------------- 7 files changed, 35 insertions(+), 86 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0..2d9df40446f6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -101,8 +101,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67..cb46d1034f33 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -203,8 +203,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ee19d5bbee06..6ca1208cedcb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1140,15 +1140,6 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - if (radix_enabled()) - return radix__pmdp_huge_split_prepare(vma, address, pmdp); - return hash__pmdp_huge_split_prepare(vma, address, pmdp); -} - #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495ae..365010f66570 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -269,12 +269,6 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } -static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - /* Nothing to do for radix. */ - return; -} extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long clr, diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index ec277913e01b..469808e77e58 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -296,28 +296,6 @@ pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } -void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); - VM_BUG_ON(pmd_devmap(*pmdp)); - - /* - * We can't mark the pmd none here, because that will cause a race - * against exit_mmap. We need to continue mark pmd TRANS HUGE, while - * we spilt, but at the same time we wan't rest of the ppc64 code - * not to insert hash pte on this, because we will be modifying - * the deposited pgtable in the caller of this function. Hence - * clear the _PAGE_USER so that we move the fault handling to - * higher level function and that will serialize against ptl. - * We need to flush existing hash pte entries here even though, - * the translation is still valid, because we will withdraw - * pgtable_t after this. - */ - pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED); -} - /* * A linux hugepage PMD was changed and the corresponding hash table entries * neesd to be flushed. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 51eebd7546b2..2cfa3075d148 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -329,14 +329,6 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif -#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - -} -#endif - #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a79a6b7d19b..87ab9b8f56b5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2063,7 +2063,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct mm_struct *mm = vma->vm_mm; struct page *page; pgtable_t pgtable; - pmd_t old, _pmd; + pmd_t old_pmd, _pmd; bool young, write, soft_dirty, pmd_migration = false; unsigned long addr; int i; @@ -2106,23 +2106,50 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } + /* + * Up to this point the pmd is present and huge and userland has the + * whole access to the hugepage during the split (which happens in + * place). If we overwrite the pmd with the not-huge version pointing + * to the pte here (which of course we could if all CPUs were bug + * free), userland could trigger a small page size TLB miss on the + * small sized TLB while the hugepage TLB entry is still established in + * the huge TLB. Some CPU doesn't like that. + * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum + * 383 on page 93. Intel should be safe but is also warns that it's + * only safe if the permission and cache attributes of the two entries + * loaded in the two TLB is identical (which should be the case here). + * But it is generally safer to never allow small and huge TLB entries + * for the same virtual address to be loaded simultaneously. So instead + * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the + * current pmd notpresent (atomically because here the pmd_trans_huge + * must remain set at all times on the pmd until the split is complete + * for this pmd), then we flush the SMP TLB and finally we write the + * non-huge version of the pmd entry with pmd_populate. + */ + old_pmd = pmdp_invalidate(vma, haddr, pmd); + #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - pmd_migration = is_pmd_migration_entry(*pmd); + pmd_migration = is_pmd_migration_entry(old_pmd); if (pmd_migration) { swp_entry_t entry; - entry = pmd_to_swp_entry(*pmd); + entry = pmd_to_swp_entry(old_pmd); page = pfn_to_page(swp_offset(entry)); } else #endif - page = pmd_page(*pmd); + page = pmd_page(old_pmd); VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); - write = pmd_write(*pmd); - young = pmd_young(*pmd); - soft_dirty = pmd_soft_dirty(*pmd); + if (pmd_dirty(old_pmd)) + SetPageDirty(page); + write = pmd_write(old_pmd); + young = pmd_young(old_pmd); + soft_dirty = pmd_soft_dirty(old_pmd); - pmdp_huge_split_prepare(vma, haddr, pmd); + /* + * Withdraw the table only after we mark the pmd entry invalid. + * This's critical for some architectures (Power). + */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -2176,35 +2203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } smp_wmb(); /* make pte visible before pmd */ - /* - * Up to this point the pmd is present and huge and userland has the - * whole access to the hugepage during the split (which happens in - * place). If we overwrite the pmd with the not-huge version pointing - * to the pte here (which of course we could if all CPUs were bug - * free), userland could trigger a small page size TLB miss on the - * small sized TLB while the hugepage TLB entry is still established in - * the huge TLB. Some CPU doesn't like that. - * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum - * 383 on page 93. Intel should be safe but is also warns that it's - * only safe if the permission and cache attributes of the two entries - * loaded in the two TLB is identical (which should be the case here). - * But it is generally safer to never allow small and huge TLB entries - * for the same virtual address to be loaded simultaneously. So instead - * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the - * current pmd notpresent (atomically because here the pmd_trans_huge - * must remain set at all times on the pmd until the split is complete - * for this pmd), then we flush the SMP TLB and finally we write the - * non-huge version of the pmd entry with pmd_populate. - */ - old = pmdp_invalidate(vma, haddr, pmd); - - /* - * Transfer dirty bit using value returned by pmd_invalidate() to be - * sure we don't race with CPU that can set the bit under us. - */ - if (pmd_dirty(old)) - SetPageDirty(page); - pmd_populate(mm, pmd, pgtable); if (freeze) { -- cgit v1.2.3