diff options
Diffstat (limited to 'arch/powerpc/platforms')
25 files changed, 828 insertions, 210 deletions
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a78f255111f2..ae07470fde3c 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -295,10 +295,6 @@ config PPC_STD_MMU_32 def_bool y depends on PPC_STD_MMU && PPC32 -config PPC_STD_MMU_64 - def_bool y - depends on PPC_STD_MMU && PPC64 - config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 @@ -309,6 +305,19 @@ config PPC_RADIX_MMU is only implemented by IBM Power9 CPUs, if you don't have one of them you can probably disable this. +config PPC_RADIX_MMU_DEFAULT + bool "Default to using the Radix MMU when possible" + depends on PPC_RADIX_MMU + default y + help + When the hardware supports the Radix MMU, default to using it unless + "disable_radix[=yes]" is specified on the kernel command line. + + If this option is disabled, the Hash MMU will be used by default, + unless "disable_radix=no" is specified on the kernel command line. + + If you're unsure, say Y. + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION @@ -324,7 +333,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool - default y if PPC_STD_MMU_64 + default y if PPC_BOOK3S_64 default n config PPC_HAVE_PMU_SUPPORT diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 70183eb3d5c8..39a1d4225e0f 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -513,9 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) mutex_init(&host->mutex); init_completion(&host->complete); spin_lock_init(&host->lock); - init_timer(&host->timeout_timer); - host->timeout_timer.function = kw_i2c_timeout; - host->timeout_timer.data = (unsigned long)host; + setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host); psteps = of_get_property(np, "AAPL,address-step", NULL); steps = psteps ? (*psteps) : 0x10; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 7a31c26500e6..3732118a0482 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -15,4 +15,5 @@ obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o +obj-$(CONFIG_PPC_FTW) += nx-ftw.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 8864065eba22..4650fb294e7a 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -41,7 +41,6 @@ #include "powernv.h" #include "pci.h" -static bool pnv_eeh_nb_init = false; static int eeh_event_irq = -EINVAL; static int pnv_eeh_init(void) @@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); * been built. If the I/O cache staff has been built, EEH is * ready to supply service. */ -static int pnv_eeh_post_init(void) +int pnv_eeh_post_init(void) { struct pci_controller *hose; struct pnv_phb *phb; int ret = 0; - /* Register OPAL event notifier */ - if (!pnv_eeh_nb_init) { - eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); - if (eeh_event_irq < 0) { - pr_err("%s: Can't register OPAL event interrupt (%d)\n", - __func__, eeh_event_irq); - return eeh_event_irq; - } + /* Probe devices & build address cache */ + eeh_probe_devices(); + eeh_addr_cache_build(); - ret = request_irq(eeh_event_irq, pnv_eeh_event, - IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); - if (ret < 0) { - irq_dispose_mapping(eeh_event_irq); - pr_err("%s: Can't request OPAL event interrupt (%d)\n", - __func__, eeh_event_irq); - return ret; - } + /* Register OPAL event notifier */ + eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); + if (eeh_event_irq < 0) { + pr_err("%s: Can't register OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return eeh_event_irq; + } - pnv_eeh_nb_init = true; + ret = request_irq(eeh_event_irq, pnv_eeh_event, + IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); + if (ret < 0) { + irq_dispose_mapping(eeh_event_irq); + pr_err("%s: Can't request OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return ret; } if (!eeh_enabled()) @@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; + /* Skip if we haven't probed yet */ + if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) + return NULL; + /* Initialize eeh device */ edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; @@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) static struct eeh_ops pnv_eeh_ops = { .name = "powernv", .init = pnv_eeh_init, - .post_init = pnv_eeh_post_init, .probe = pnv_eeh_probe, .set_option = pnv_eeh_set_option, .get_pe_addr = pnv_eeh_get_pe_addr, diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 2cb6cbea4b3b..f6cbc1a71472 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -395,6 +395,7 @@ struct npu_context { struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; struct mmu_notifier mn; struct kref kref; + bool nmmu_flush; /* Callback to stop translation requests on a given GPU */ struct npu_context *(*release_cb)(struct npu_context *, void *); @@ -545,11 +546,13 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; - /* - * Unfortunately the nest mmu does not support flushing specific - * addresses so we have to flush the whole mm. - */ - flush_tlb_mm(npu_context->mm); + if (npu_context->nmmu_flush) + /* + * Unfortunately the nest mmu does not support flushing specific + * addresses so we have to flush the whole mm once before + * shooting down the GPU translation. + */ + flush_all_mm(npu_context->mm); /* * Loop over all the NPUs this process is active on and launch @@ -722,6 +725,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, return ERR_PTR(-ENODEV); npu_context->npdev[npu->index][nvlink_index] = npdev; + if (!nphb->npu.nmmu_flush) { + /* + * If we're not explicitly flushing ourselves we need to mark + * the thread for global flushes + */ + npu_context->nmmu_flush = false; + mm_context_add_copro(mm); + } else + npu_context->nmmu_flush = true; + return npu_context; } EXPORT_SYMBOL(pnv_npu2_init_context); @@ -731,6 +744,9 @@ static void pnv_npu2_release_context(struct kref *kref) struct npu_context *npu_context = container_of(kref, struct npu_context, kref); + if (!npu_context->nmmu_flush) + mm_context_remove_copro(npu_context->mm); + npu_context->mm->context.npu_context = NULL; mmu_notifier_unregister(&npu_context->mn, npu_context->mm); @@ -819,6 +835,8 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; + phb->npu.nmmu_flush = + of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); for_each_child_of_node(phb->hose->dn, dn) { gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); if (gpdev) { diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index cf33769a7b72..18a355fa15e8 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -1,7 +1,7 @@ /* * PowerNV OPAL asynchronous completion interfaces * - * Copyright 2013 IBM Corp. + * Copyright 2013-2017 IBM Corp. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -23,40 +23,50 @@ #include <asm/machdep.h> #include <asm/opal.h> -#define N_ASYNC_COMPLETIONS 64 +enum opal_async_token_state { + ASYNC_TOKEN_UNALLOCATED = 0, + ASYNC_TOKEN_ALLOCATED, + ASYNC_TOKEN_DISPATCHED, + ASYNC_TOKEN_ABANDONED, + ASYNC_TOKEN_COMPLETED +}; + +struct opal_async_token { + enum opal_async_token_state state; + struct opal_msg response; +}; -static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL}; -static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS); static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait); static DEFINE_SPINLOCK(opal_async_comp_lock); static struct semaphore opal_async_sem; -static struct opal_msg *opal_async_responses; static unsigned int opal_max_async_tokens; +static struct opal_async_token *opal_async_tokens; -int __opal_async_get_token(void) +static int __opal_async_get_token(void) { unsigned long flags; - int token; + int i, token = -EBUSY; spin_lock_irqsave(&opal_async_comp_lock, flags); - token = find_first_bit(opal_async_complete_map, opal_max_async_tokens); - if (token >= opal_max_async_tokens) { - token = -EBUSY; - goto out; - } - if (__test_and_set_bit(token, opal_async_token_map)) { - token = -EBUSY; - goto out; + for (i = 0; i < opal_max_async_tokens; i++) { + if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) { + opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED; + token = i; + break; + } } - __clear_bit(token, opal_async_complete_map); - -out: spin_unlock_irqrestore(&opal_async_comp_lock, flags); return token; } +/* + * Note: If the returned token is used in an opal call and opal returns + * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or + * opal_async_wait_response_interruptible() at least once before calling another + * opal_async_* function + */ int opal_async_get_token_interruptible(void) { int token; @@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void) } EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); -int __opal_async_release_token(int token) +static int __opal_async_release_token(int token) { unsigned long flags; + int rc; if (token < 0 || token >= opal_max_async_tokens) { pr_err("%s: Passed token is out of range, token %d\n", @@ -84,11 +95,26 @@ int __opal_async_release_token(int token) } spin_lock_irqsave(&opal_async_comp_lock, flags); - __set_bit(token, opal_async_complete_map); - __clear_bit(token, opal_async_token_map); + switch (opal_async_tokens[token].state) { + case ASYNC_TOKEN_COMPLETED: + case ASYNC_TOKEN_ALLOCATED: + opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED; + rc = 0; + break; + /* + * DISPATCHED and ABANDONED tokens must wait for OPAL to respond. + * Mark a DISPATCHED token as ABANDONED so that the response handling + * code knows no one cares and that it can free it then. + */ + case ASYNC_TOKEN_DISPATCHED: + opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED; + /* Fall through */ + default: + rc = 1; + } spin_unlock_irqrestore(&opal_async_comp_lock, flags); - return 0; + return rc; } int opal_async_release_token(int token) @@ -96,12 +122,10 @@ int opal_async_release_token(int token) int ret; ret = __opal_async_release_token(token); - if (ret) - return ret; - - up(&opal_async_sem); + if (!ret) + up(&opal_async_sem); - return 0; + return ret; } EXPORT_SYMBOL_GPL(opal_async_release_token); @@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return -EINVAL; } - /* Wakeup the poller before we wait for events to speed things + /* + * There is no need to mark the token as dispatched, wait_event() + * will block until the token completes. + * + * Wakeup the poller before we wait for events to speed things * up on platforms or simulators where the interrupts aren't * functional. */ opal_wake_poller(); - wait_event(opal_async_wait, test_bit(token, opal_async_complete_map)); - memcpy(msg, &opal_async_responses[token], sizeof(*msg)); + wait_event(opal_async_wait, opal_async_tokens[token].state + == ASYNC_TOKEN_COMPLETED); + memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg)); return 0; } EXPORT_SYMBOL_GPL(opal_async_wait_response); +int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg) +{ + unsigned long flags; + int ret; + + if (token >= opal_max_async_tokens) { + pr_err("%s: Invalid token passed\n", __func__); + return -EINVAL; + } + + if (!msg) { + pr_err("%s: Invalid message pointer passed\n", __func__); + return -EINVAL; + } + + /* + * The first time this gets called we mark the token as DISPATCHED + * so that if wait_event_interruptible() returns not zero and the + * caller frees the token, we know not to actually free the token + * until the response comes. + * + * Only change if the token is ALLOCATED - it may have been + * completed even before the caller gets around to calling this + * the first time. + * + * There is also a dirty great comment at the token allocation + * function that if the opal call returns OPAL_ASYNC_COMPLETION to + * the caller then the caller *must* call this or the not + * interruptible version before doing anything else with the + * token. + */ + if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) { + spin_lock_irqsave(&opal_async_comp_lock, flags); + if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) + opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED; + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + } + + /* + * Wakeup the poller before we wait for events to speed things + * up on platforms or simulators where the interrupts aren't + * functional. + */ + opal_wake_poller(); + ret = wait_event_interruptible(opal_async_wait, + opal_async_tokens[token].state == + ASYNC_TOKEN_COMPLETED); + if (!ret) + memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg)); + + return ret; +} +EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible); + +/* Called from interrupt context */ static int opal_async_comp_event(struct notifier_block *nb, unsigned long msg_type, void *msg) { struct opal_msg *comp_msg = msg; + enum opal_async_token_state state; unsigned long flags; uint64_t token; @@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb, return 0; token = be64_to_cpu(comp_msg->params[0]); - memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg)); spin_lock_irqsave(&opal_async_comp_lock, flags); - __set_bit(token, opal_async_complete_map); + state = opal_async_tokens[token].state; + opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED; spin_unlock_irqrestore(&opal_async_comp_lock, flags); + if (state == ASYNC_TOKEN_ABANDONED) { + /* Free the token, no one else will */ + opal_async_release_token(token); + return 0; + } + memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg)); wake_up(&opal_async_wait); return 0; @@ -178,32 +269,23 @@ int __init opal_async_comp_init(void) } opal_max_async_tokens = be32_to_cpup(async); - if (opal_max_async_tokens > N_ASYNC_COMPLETIONS) - opal_max_async_tokens = N_ASYNC_COMPLETIONS; + opal_async_tokens = kcalloc(opal_max_async_tokens, + sizeof(*opal_async_tokens), GFP_KERNEL); + if (!opal_async_tokens) { + err = -ENOMEM; + goto out_opal_node; + } err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP, &opal_async_comp_nb); if (err) { pr_err("%s: Can't register OPAL event notifier (%d)\n", __func__, err); + kfree(opal_async_tokens); goto out_opal_node; } - opal_async_responses = kzalloc( - sizeof(*opal_async_responses) * opal_max_async_tokens, - GFP_KERNEL); - if (!opal_async_responses) { - pr_err("%s: Out of memory, failed to do asynchronous " - "completion init\n", __func__); - err = -ENOMEM; - goto out_opal_node; - } - - /* Initialize to 1 less than the maximum tokens available, as we may - * require to pop one during emergency through synchronous call to - * __opal_async_get_token() - */ - sema_init(&opal_async_sem, opal_max_async_tokens - 1); + sema_init(&opal_async_sem, opal_max_async_tokens); out_opal_node: of_node_put(opal_node); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index d78fed728cdf..c9e1a4ff295c 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -1,5 +1,5 @@ /* - * OPAL hypervisor Maintenance interrupt handling support in PowreNV. + * OPAL hypervisor Maintenance interrupt handling support in PowerNV. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index ecdcba9d1220..9d1b8c0aaf93 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -174,8 +174,14 @@ void opal_event_shutdown(void) /* First free interrupts, which will also mask them */ for (i = 0; i < opal_irq_count; i++) { - if (opal_irqs[i]) + if (!opal_irqs[i]) + continue; + + if (in_interrupt()) + disable_irq_nosync(opal_irqs[i]); + else free_irq(opal_irqs[i], NULL); + opal_irqs[i] = 0; } } diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 4495f428b500..d9916ea62305 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -1,5 +1,5 @@ /* - * OPAL asynchronus Memory error handling support in PowreNV. + * OPAL asynchronus Memory error handling support in PowerNV. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index aa267f120033..0a7074bb91dc 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -19,13 +19,10 @@ */ #include <linux/delay.h> -#include <linux/mutex.h> #include <linux/of_platform.h> #include <asm/opal.h> #include <asm/machdep.h> -static DEFINE_MUTEX(opal_sensor_mutex); - /* * This will return sensor information to driver based on the requested sensor * handle. A handle is an opaque id for the powernv, read by the driver from the @@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) __be32 data; token = opal_async_get_token_interruptible(); - if (token < 0) { - pr_err("%s: Couldn't get the token, returning\n", __func__); - ret = token; - goto out; - } + if (token < 0) + return token; - mutex_lock(&opal_sensor_mutex); ret = opal_sensor_read(sensor_hndl, token, &data); switch (ret) { case OPAL_ASYNC_COMPLETION: @@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) if (ret) { pr_err("%s: Failed to wait for the async response, %d\n", __func__, ret); - goto out_token; + goto out; } ret = opal_error_code(opal_get_async_rc(msg)); @@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) break; } -out_token: - mutex_unlock(&opal_sensor_mutex); - opal_async_release_token(token); out: + opal_async_release_token(token); return ret; } EXPORT_SYMBOL_GPL(opal_get_sensor_data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 8c1ede2d3f7e..6f4b00a2ac46 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -94,7 +94,7 @@ opal_return: * bytes (always BE) since MSR:LE will end up fixed up as a side * effect of the rfid. */ - FIXUP_ENDIAN + FIXUP_ENDIAN_HV ld r2,PACATOC(r13); lwz r4,8(r1); ld r5,PPC_LR_STKOFF(r1); @@ -120,7 +120,7 @@ opal_real_call: hrfid opal_return_realmode: - FIXUP_ENDIAN + FIXUP_ENDIAN_HV ld r2,PACATOC(r13); lwz r11,8(r1); ld r12,PPC_LR_STKOFF(r1) @@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); +OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 65c79ecf5a4d..041ddbd1fc57 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -998,6 +998,7 @@ int opal_error_code(int rc) case OPAL_PARAMETER: return -EINVAL; case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; + case OPAL_BUSY: case OPAL_BUSY_EVENT: return -EBUSY; case OPAL_NO_MEM: return -ENOMEM; case OPAL_PERMISSION: return -EPERM; @@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); /* Export this for KVM */ EXPORT_SYMBOL_GPL(opal_int_set_mfrr); EXPORT_SYMBOL_GPL(opal_int_eoi); +EXPORT_SYMBOL_GPL(opal_error_code); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 57f9e55f4352..749055553064 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) } /* - * After doing so, there would be a "hole" in the /proc/iomem when - * offset is a positive value. It looks like the device return some - * mmio back to the system, which actually no one could use it. + * Since M64 BAR shares segments among all possible 256 PEs, + * we have to shift the beginning of PF IOV BAR to make it start from + * the segment which belongs to the PE number assigned to the first VF. + * This creates a "hole" in the /proc/iomem which could be used for + * allocating other resources so we reserve this area below and + * release when IOV is released. */ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &dev->resource[i + PCI_IOV_RESOURCES]; @@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", i, &res2, res, (offset > 0) ? "En" : "Dis", num_vfs, offset); + + if (offset < 0) { + devm_release_resource(&dev->dev, &pdn->holes[i]); + memset(&pdn->holes[i], 0, sizeof(pdn->holes[i])); + } + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + + if (offset > 0) { + pdn->holes[i].start = res2.start; + pdn->holes[i].end = res2.start + size * offset - 1; + pdn->holes[i].flags = IORESOURCE_BUS; + pdn->holes[i].name = "pnv_iov_reserved"; + devm_request_resource(&dev->dev, res->parent, + &pdn->holes[i]); + } } return 0; } @@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) return -EINVAL; - if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) + if (!is_power_of_2(window_size)) return -EINVAL; /* Adjust direct table size from window_size and levels */ @@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void) pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH - eeh_init(); - eeh_addr_cache_build(); + pnv_eeh_post_init(); #endif } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b47f9406d97e..b772d7473896 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -188,6 +188,9 @@ struct pnv_phb { /* Bitmask for MMIO register usage */ unsigned long mmio_atsd_usage; + + /* Do we need to explicitly flush the nest mmu? */ + bool nmmu_flush; } npu; #ifdef CONFIG_CXL_BASE @@ -235,6 +238,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); +extern int pnv_eeh_post_init(void); extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index bbb73aa0eb8f..1edfbc1e40f4 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -36,6 +36,7 @@ #include <asm/opal.h> #include <asm/kexec.h> #include <asm/smp.h> +#include <asm/tm.h> #include "powernv.h" @@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.restart = pnv_restart; pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; + /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; ppc_md.hmi_exception_early = opal_hmi_exception_early; @@ -311,6 +313,28 @@ static int __init pnv_probe(void) return 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void __init pnv_tm_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL) || + !pvr_version_is(PVR_POWER9) || + early_cpu_has_feature(CPU_FTR_TM)) + return; + + if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS) + return; + + pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n"); + cur_cpu_spec->cpu_features |= CPU_FTR_TM; + /* Make sure "normal" HTM is off (it should be) */ + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM; + /* Turn on no suspend mode, and HTM no SC */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \ + PPC_FEATURE2_HTM_NOSC; + tm_suspend_disabled = true; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo @@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) { unsigned long ret_freq; - ret_freq = cpufreq_quick_get(cpu) * 1000ul; + ret_freq = cpufreq_get(cpu) * 1000ul; /* * If the backend cpufreq driver does not exist, diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index c17f81e433f7..ba030669eca1 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -49,6 +49,13 @@ static void pnv_smp_setup_cpu(int cpu) { + /* + * P9 workaround for CI vector load (see traps.c), + * enable the corresponding HMI interrupt + */ + if (pvr_version_is(PVR_POWER9)) + mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17)); + if (xive_enabled()) xive_smp_setup_cpu(); else if (cpu != boot_cpuid) @@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void) } } +static int pnv_system_reset_exception(struct pt_regs *regs) +{ + if (smp_handle_nmi_ipi(regs)) + return 1; + return 0; +} + +static int pnv_cause_nmi_ipi(int cpu) +{ + int64_t rc; + + if (cpu >= 0) { + rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu)); + if (rc != OPAL_SUCCESS) + return 0; + return 1; + + } else if (cpu == NMI_IPI_ALL_OTHERS) { + bool success = true; + int c; + + + /* + * We do not use broadcasts (yet), because it's not clear + * exactly what semantics Linux wants or the firmware should + * provide. + */ + for_each_online_cpu(c) { + if (c == smp_processor_id()) + continue; + + rc = opal_signal_system_reset( + get_hard_smp_processor_id(c)); + if (rc != OPAL_SUCCESS) + success = false; + } + if (success) + return 1; + + /* + * Caller will fall back to doorbells, which may pick + * up the remainders. + */ + } + + return 0; +} + static struct smp_ops_t pnv_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */ @@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = { /* This is called very early during platform setup_arch */ void __init pnv_smp_init(void) { + if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) { + ppc_md.system_reset_exception = pnv_system_reset_exception; + pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi; + } smp_ops = &pnv_smp_ops; #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c new file mode 100644 index 000000000000..ca22f1eae050 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -0,0 +1,209 @@ +/* + * Copyright 2016-17 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include "vas.h" + +static struct dentry *vas_debugfs; + +static char *cop_to_str(int cop) +{ + switch (cop) { + case VAS_COP_TYPE_FAULT: return "Fault"; + case VAS_COP_TYPE_842: return "NX-842 Normal Priority"; + case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority"; + case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority"; + case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority"; + case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup"; + default: return "Unknown"; + } +} + +static int info_dbg_show(struct seq_file *s, void *private) +{ + struct vas_window *window = s->private; + + mutex_lock(&vas_mutex); + + /* ensure window is not unmapped */ + if (!window->hvwc_map) + goto unlock; + + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), + window->tx_win ? "Send" : "Receive"); + seq_printf(s, "Pid : %d\n", window->pid); + +unlock: + mutex_unlock(&vas_mutex); + return 0; +} + +static int info_dbg_open(struct inode *inode, struct file *file) +{ + return single_open(file, info_dbg_show, inode->i_private); +} + +static const struct file_operations info_fops = { + .open = info_dbg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static inline void print_reg(struct seq_file *s, struct vas_window *win, + char *name, u32 reg) +{ + seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); +} + +static int hvwc_dbg_show(struct seq_file *s, void *private) +{ + struct vas_window *window = s->private; + + mutex_lock(&vas_mutex); + + /* ensure window is not unmapped */ + if (!window->hvwc_map) + goto unlock; + + print_reg(s, window, VREG(LPID)); + print_reg(s, window, VREG(PID)); + print_reg(s, window, VREG(XLATE_MSR)); + print_reg(s, window, VREG(XLATE_LPCR)); + print_reg(s, window, VREG(XLATE_CTL)); + print_reg(s, window, VREG(AMR)); + print_reg(s, window, VREG(SEIDR)); + print_reg(s, window, VREG(FAULT_TX_WIN)); + print_reg(s, window, VREG(OSU_INTR_SRC_RA)); + print_reg(s, window, VREG(HV_INTR_SRC_RA)); + print_reg(s, window, VREG(PSWID)); + print_reg(s, window, VREG(LFIFO_BAR)); + print_reg(s, window, VREG(LDATA_STAMP_CTL)); + print_reg(s, window, VREG(LDMA_CACHE_CTL)); + print_reg(s, window, VREG(LRFIFO_PUSH)); + print_reg(s, window, VREG(CURR_MSG_COUNT)); + print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT)); + print_reg(s, window, VREG(LRX_WCRED)); + print_reg(s, window, VREG(LRX_WCRED_ADDER)); + print_reg(s, window, VREG(TX_WCRED)); + print_reg(s, window, VREG(TX_WCRED_ADDER)); + print_reg(s, window, VREG(LFIFO_SIZE)); + print_reg(s, window, VREG(WINCTL)); + print_reg(s, window, VREG(WIN_STATUS)); + print_reg(s, window, VREG(WIN_CTX_CACHING_CTL)); + print_reg(s, window, VREG(TX_RSVD_BUF_COUNT)); + print_reg(s, window, VREG(LRFIFO_WIN_PTR)); + print_reg(s, window, VREG(LNOTIFY_CTL)); + print_reg(s, window, VREG(LNOTIFY_PID)); + print_reg(s, window, VREG(LNOTIFY_LPID)); + print_reg(s, window, VREG(LNOTIFY_TID)); + print_reg(s, window, VREG(LNOTIFY_SCOPE)); + print_reg(s, window, VREG(NX_UTIL_ADDER)); +unlock: + mutex_unlock(&vas_mutex); + return 0; +} + +static int hvwc_dbg_open(struct inode *inode, struct file *file) +{ + return single_open(file, hvwc_dbg_show, inode->i_private); +} + +static const struct file_operations hvwc_fops = { + .open = hvwc_dbg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void vas_window_free_dbgdir(struct vas_window *window) +{ + if (window->dbgdir) { + debugfs_remove_recursive(window->dbgdir); + kfree(window->dbgname); + window->dbgdir = NULL; + window->dbgname = NULL; + } +} + +void vas_window_init_dbgdir(struct vas_window *window) +{ + struct dentry *f, *d; + + if (!window->vinst->dbgdir) + return; + + window->dbgname = kzalloc(16, GFP_KERNEL); + if (!window->dbgname) + return; + + snprintf(window->dbgname, 16, "w%d", window->winid); + + d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); + if (IS_ERR(d)) + goto free_name; + + window->dbgdir = d; + + f = debugfs_create_file("info", 0444, d, window, &info_fops); + if (IS_ERR(f)) + goto remove_dir; + + f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); + if (IS_ERR(f)) + goto remove_dir; + + return; + +free_name: + kfree(window->dbgname); + window->dbgname = NULL; + +remove_dir: + debugfs_remove_recursive(window->dbgdir); + window->dbgdir = NULL; +} + +void vas_instance_init_dbgdir(struct vas_instance *vinst) +{ + struct dentry *d; + + if (!vas_debugfs) + return; + + vinst->dbgname = kzalloc(16, GFP_KERNEL); + if (!vinst->dbgname) + return; + + snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id); + + d = debugfs_create_dir(vinst->dbgname, vas_debugfs); + if (IS_ERR(d)) + goto free_name; + + vinst->dbgdir = d; + return; + +free_name: + kfree(vinst->dbgname); + vinst->dbgname = NULL; + vinst->dbgdir = NULL; +} + +void vas_init_dbgdir(void) +{ + vas_debugfs = debugfs_create_dir("vas", NULL); + if (IS_ERR(vas_debugfs)) + vas_debugfs = NULL; +} diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 5aae845b8cd9..2b3eb01ab110 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -16,7 +16,8 @@ #include <linux/log2.h> #include <linux/rcupdate.h> #include <linux/cred.h> - +#include <asm/switch_to.h> +#include <asm/ppc-opcode.h> #include "vas.h" #include "copy-paste.h" @@ -40,6 +41,16 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } +u64 vas_win_paste_addr(struct vas_window *win) +{ + u64 addr; + + compute_paste_address(win, &addr, NULL); + + return addr; +} +EXPORT_SYMBOL(vas_win_paste_addr); + static inline void get_hvwc_mmio_bar(struct vas_window *window, u64 *start, int *len) { @@ -145,23 +156,37 @@ static void unmap_paste_region(struct vas_window *window) } /* - * Unmap the MMIO regions for a window. + * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't + * unmap when the window's debugfs dir is in use. This serializes close + * of a window even on another VAS instance but since its not a critical + * path, just minimize the time we hold the mutex for now. We can add + * a per-instance mutex later if necessary. */ static void unmap_winctx_mmio_bars(struct vas_window *window) { int len; + void *uwc_map; + void *hvwc_map; u64 busaddr_start; - if (window->hvwc_map) { + mutex_lock(&vas_mutex); + + hvwc_map = window->hvwc_map; + window->hvwc_map = NULL; + + uwc_map = window->uwc_map; + window->uwc_map = NULL; + + mutex_unlock(&vas_mutex); + + if (hvwc_map) { get_hvwc_mmio_bar(window, &busaddr_start, &len); - unmap_region(window->hvwc_map, busaddr_start, len); - window->hvwc_map = NULL; + unmap_region(hvwc_map, busaddr_start, len); } - if (window->uwc_map) { + if (uwc_map) { get_uwc_mmio_bar(window, &busaddr_start, &len); - unmap_region(window->uwc_map, busaddr_start, len); - window->uwc_map = NULL; + unmap_region(uwc_map, busaddr_start, len); } } @@ -528,6 +553,9 @@ static void vas_window_free(struct vas_window *window) struct vas_instance *vinst = window->vinst; unmap_winctx_mmio_bars(window); + + vas_window_free_dbgdir(window); + kfree(window); vas_release_window_id(&vinst->ida, winid); @@ -552,6 +580,8 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst) if (map_winctx_mmio_bars(window)) goto out_free; + vas_window_init_dbgdir(window); + return window; out_free: @@ -569,6 +599,32 @@ static void put_rx_win(struct vas_window *rxwin) } /* + * Find the user space receive window given the @pswid. + * - We must have a valid vasid and it must belong to this instance. + * (so both send and receive windows are on the same VAS instance) + * - The window must refer to an OPEN, FTW, RECEIVE window. + * + * NOTE: We access ->windows[] table and assume that vinst->mutex is held. + */ +static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) +{ + int vasid, winid; + struct vas_window *rxwin; + + decode_pswid(pswid, &vasid, &winid); + + if (vinst->vas_id != vasid) + return ERR_PTR(-EINVAL); + + rxwin = vinst->windows[winid]; + + if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) + return ERR_PTR(-EINVAL); + + return rxwin; +} + +/* * Get the VAS receive window associated with NX engine identified * by @cop and if applicable, @pswid. * @@ -581,10 +637,10 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, mutex_lock(&vinst->mutex); - if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) - rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); + if (cop == VAS_COP_TYPE_FTW) + rxwin = get_user_rxwin(vinst, pswid); else - rxwin = ERR_PTR(-EINVAL); + rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); if (!IS_ERR(rxwin)) atomic_inc(&rxwin->num_txwins); @@ -674,15 +730,18 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->rx_fifo = rxattr->rx_fifo; winctx->rx_fifo_size = rxattr->rx_fifo_size; - winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; + winctx->wcreds_max = rxwin->wcreds_max; winctx->pin_win = rxattr->pin_win; winctx->nx_win = rxattr->nx_win; winctx->fault_win = rxattr->fault_win; + winctx->user_win = rxattr->user_win; + winctx->rej_no_credit = rxattr->rej_no_credit; winctx->rx_word_mode = rxattr->rx_win_ord_mode; winctx->tx_word_mode = rxattr->tx_win_ord_mode; winctx->rx_wcred_mode = rxattr->rx_wcred_mode; winctx->tx_wcred_mode = rxattr->tx_wcred_mode; + winctx->notify_early = rxattr->notify_early; if (winctx->nx_win) { winctx->data_stamp = true; @@ -723,7 +782,10 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, static bool rx_win_args_valid(enum vas_cop_type cop, struct vas_rx_win_attr *attr) { - dump_rx_win_attr(attr); + pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n", + attr->fault_win, attr->notify_disable, + attr->intr_disable, attr->notify_early, + attr->rx_fifo_size); if (cop >= VAS_COP_TYPE_MAX) return false; @@ -735,6 +797,9 @@ static bool rx_win_args_valid(enum vas_cop_type cop, if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) return false; + if (attr->wcreds_max > VAS_RX_WCREDS_MAX) + return false; + if (attr->nx_win) { /* cannot be fault or user window if it is nx */ if (attr->fault_win || attr->user_win) @@ -835,6 +900,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; rxwin->cop = cop; + rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; if (rxattr->user_win) rxwin->pid = task_pid_vnr(current); @@ -884,21 +950,23 @@ static void init_winctx_for_txwin(struct vas_window *txwin, */ memset(winctx, 0, sizeof(struct vas_winctx)); - winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT; + winctx->wcreds_max = txwin->wcreds_max; winctx->user_win = txattr->user_win; winctx->nx_win = txwin->rxwin->nx_win; winctx->pin_win = txattr->pin_win; + winctx->rej_no_credit = txattr->rej_no_credit; + winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable; winctx->rx_wcred_mode = txattr->rx_wcred_mode; winctx->tx_wcred_mode = txattr->tx_wcred_mode; winctx->rx_word_mode = txattr->rx_win_ord_mode; winctx->tx_word_mode = txattr->tx_win_ord_mode; + winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count; - if (winctx->nx_win) { + winctx->intr_disable = true; + if (winctx->nx_win) winctx->data_stamp = true; - winctx->intr_disable = true; - } winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; @@ -921,6 +989,9 @@ static bool tx_win_args_valid(enum vas_cop_type cop, if (cop > VAS_COP_TYPE_MAX) return false; + if (attr->wcreds_max > VAS_TX_WCREDS_MAX) + return false; + if (attr->user_win && (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) return false; @@ -940,6 +1011,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, if (!tx_win_args_valid(cop, attr)) return ERR_PTR(-EINVAL); + /* + * If caller did not specify a vasid but specified the PSWID of a + * receive window (applicable only to FTW windows), use the vasid + * from that receive window. + */ + if (vasid == -1 && attr->pswid) + decode_pswid(attr->pswid, &vasid, NULL); + vinst = find_vas_instance(vasid); if (!vinst) { pr_devel("vasid %d not found!\n", vasid); @@ -958,11 +1037,13 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, goto put_rxwin; } + txwin->cop = cop; txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; txwin->pid = attr->pid; txwin->user_win = attr->user_win; + txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; init_winctx_for_txwin(txwin, attr, &winctx); @@ -984,6 +1065,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } + /* + * Now that we have a send window, ensure context switch issues + * CP_ABORT for this thread. + */ + rc = -EINVAL; + if (set_thread_uses_vas() < 0) + goto free_window; + set_vinst_win(vinst, txwin); return txwin; @@ -1038,50 +1127,110 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) else rc = -EINVAL; - print_fifo_msg_count(txwin); + pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, + read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); return rc; } EXPORT_SYMBOL_GPL(vas_paste_crb); +/* + * If credit checking is enabled for this window, poll for the return + * of window credits (i.e for NX engines to process any outstanding CRBs). + * Since NX-842 waits for the CRBs to be processed before closing the + * window, we should not have to wait for too long. + * + * TODO: We retry in 10ms intervals now. We could/should probably peek at + * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending + * CRBs on the FIFO and compute the delay dynamically on each retry. + * But that is not really needed until we support NX-GZIP access from + * user space. (NX-842 driver waits for CSB and Fast thread-wakeup + * doesn't use credit checking). + */ +static void poll_window_credits(struct vas_window *window) +{ + u64 val; + int creds, mode; + + val = read_hvwc_reg(window, VREG(WINCTL)); + if (window->tx_win) + mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val); + else + mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val); + + if (!mode) + return; +retry: + if (window->tx_win) { + val = read_hvwc_reg(window, VREG(TX_WCRED)); + creds = GET_FIELD(VAS_TX_WCRED, val); + } else { + val = read_hvwc_reg(window, VREG(LRX_WCRED)); + creds = GET_FIELD(VAS_LRX_WCRED, val); + } + + if (creds < window->wcreds_max) { + val = 0; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(10)); + goto retry; + } +} + +/* + * Wait for the window to go to "not-busy" state. It should only take a + * short time to queue a CRB, so window should not be busy for too long. + * Trying 5ms intervals. + */ static void poll_window_busy_state(struct vas_window *window) { int busy; u64 val; retry: - /* - * Poll Window Busy flag - */ val = read_hvwc_reg(window, VREG(WIN_STATUS)); busy = GET_FIELD(VAS_WIN_BUSY, val); if (busy) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); + schedule_timeout(msecs_to_jiffies(5)); goto retry; } } +/* + * Have the hardware cast a window out of cache and wait for it to + * be completed. + * + * NOTE: It can take a relatively long time to cast the window context + * out of the cache. It is not strictly necessary to cast out if: + * + * - we clear the "Pin Window" bit (so hardware is free to evict) + * + * - we re-initialize the window context when it is reassigned. + * + * We do the former in vas_win_close() and latter in vas_win_open(). + * So, ignoring the cast-out for now. We can add it as needed. If + * casting out becomes necessary we should consider offloading the + * job to a worker thread, so the window close can proceed quickly. + */ static void poll_window_castout(struct vas_window *window) { - int cached; - u64 val; + /* stub for now */ +} - /* Cast window context out of the cache */ -retry: - val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL)); - cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val); - if (cached) { - val = 0ULL; - val = SET_FIELD(VAS_CASTOUT_REQ, val, 1); - val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0); - write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val); +/* + * Unpin and close a window so no new requests are accepted and the + * hardware can evict this window from cache if necessary. + */ +static void unpin_close_window(struct vas_window *window) +{ + u64 val; - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - goto retry; - } + val = read_hvwc_reg(window, VREG(WINCTL)); + val = SET_FIELD(VAS_WINCTL_PIN, val, 0); + val = SET_FIELD(VAS_WINCTL_OPEN, val, 0); + write_hvwc_reg(window, VREG(WINCTL), val); } /* @@ -1098,8 +1247,6 @@ retry: */ int vas_win_close(struct vas_window *window) { - u64 val; - if (!window) return 0; @@ -1115,11 +1262,9 @@ int vas_win_close(struct vas_window *window) poll_window_busy_state(window); - /* Unpin window from cache and close it */ - val = read_hvwc_reg(window, VREG(WINCTL)); - val = SET_FIELD(VAS_WINCTL_PIN, val, 0); - val = SET_FIELD(VAS_WINCTL_OPEN, val, 0); - write_hvwc_reg(window, VREG(WINCTL), val); + unpin_close_window(window); + + poll_window_credits(window); poll_window_castout(window); @@ -1132,3 +1277,12 @@ int vas_win_close(struct vas_window *window) return 0; } EXPORT_SYMBOL_GPL(vas_win_close); + +/* + * Return a system-wide unique window id for the window @win. + */ +u32 vas_win_id(struct vas_window *win) +{ + return encode_pswid(win->vinst->vas_id, win->winid); +} +EXPORT_SYMBOL_GPL(vas_win_id); diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index 565a4878fefa..c488621dbec3 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -18,15 +18,18 @@ #include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/of.h> +#include <asm/prom.h> #include "vas.h" -static DEFINE_MUTEX(vas_mutex); +DEFINE_MUTEX(vas_mutex); static LIST_HEAD(vas_instances); +static DEFINE_PER_CPU(int, cpu_vas_id); + static int init_vas_instance(struct platform_device *pdev) { - int rc, vasid; + int rc, cpu, vasid; struct resource *res; struct vas_instance *vinst; struct device_node *dn = pdev->dev.of_node; @@ -74,10 +77,17 @@ static int init_vas_instance(struct platform_device *pdev) "paste_win_id_shift 0x%llx\n", pdev->name, vasid, vinst->paste_base_addr, vinst->paste_win_id_shift); + for_each_possible_cpu(cpu) { + if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) + per_cpu(cpu_vas_id, cpu) = vasid; + } + mutex_lock(&vas_mutex); list_add(&vinst->node, &vas_instances); mutex_unlock(&vas_mutex); + vas_instance_init_dbgdir(vinst); + dev_set_drvdata(&pdev->dev, vinst); return 0; @@ -98,6 +108,10 @@ struct vas_instance *find_vas_instance(int vasid) struct vas_instance *vinst; mutex_lock(&vas_mutex); + + if (vasid == -1) + vasid = per_cpu(cpu_vas_id, smp_processor_id()); + list_for_each(ent, &vas_instances) { vinst = list_entry(ent, struct vas_instance, node); if (vinst->vas_id == vasid) { @@ -111,6 +125,17 @@ struct vas_instance *find_vas_instance(int vasid) return NULL; } +int chip_to_vas_id(int chipid) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu_to_chip_id(cpu) == chipid) + return per_cpu(cpu_vas_id, cpu); + } + return -1; +} + static int vas_probe(struct platform_device *pdev) { return init_vas_instance(pdev); @@ -134,6 +159,8 @@ static int __init vas_init(void) int found = 0; struct device_node *dn; + vas_init_dbgdir(); + platform_driver_register(&vas_driver); for_each_compatible_node(dn, NULL, "ibm,vas") { diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 38dee5d50f31..ae0100fd35bb 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -13,6 +13,8 @@ #include <linux/idr.h> #include <asm/vas.h> #include <linux/io.h> +#include <linux/dcache.h> +#include <linux/mutex.h> /* * Overview of Virtual Accelerator Switchboard (VAS). @@ -106,8 +108,8 @@ * * TODO: Needs tuning for per-process credits */ -#define VAS_WCREDS_MIN 16 -#define VAS_WCREDS_MAX ((64 << 10) - 1) +#define VAS_RX_WCREDS_MAX ((64 << 10) - 1) +#define VAS_TX_WCREDS_MAX ((4 << 10) - 1) #define VAS_WCREDS_DEFAULT (1 << 10) /* @@ -259,6 +261,16 @@ #define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63) /* + * VREG(x): + * Expand a register's short name (eg: LPID) into two parameters: + * - the register's short name in string form ("LPID"), and + * - the name of the macro (eg: VAS_LPID_OFFSET), defining the + * register's offset in the window context + */ +#define VREG_SFX(n, s) __stringify(n), VAS_##n##s +#define VREG(r) VREG_SFX(r, _OFFSET) + +/* * Local Notify Scope Control Register. (Receive windows only). */ enum vas_notify_scope { @@ -307,6 +319,9 @@ struct vas_instance { struct mutex mutex; struct vas_window *rxwin[VAS_COP_TYPE_MAX]; struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + + char *dbgname; + struct dentry *dbgdir; }; /* @@ -322,6 +337,10 @@ struct vas_window { void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ pid_t pid; /* Linux process id of owner */ + int wcreds_max; /* Window credits */ + + char *dbgname; + struct dentry *dbgdir; /* Fields applicable only to send windows */ void *paste_kaddr; @@ -383,45 +402,23 @@ struct vas_winctx { enum vas_notify_after_count notify_after_count; }; -extern struct vas_instance *find_vas_instance(int vasid); +extern struct mutex vas_mutex; -/* - * VREG(x): - * Expand a register's short name (eg: LPID) into two parameters: - * - the register's short name in string form ("LPID"), and - * - the name of the macro (eg: VAS_LPID_OFFSET), defining the - * register's offset in the window context - */ -#define VREG_SFX(n, s) __stringify(n), VAS_##n##s -#define VREG(r) VREG_SFX(r, _OFFSET) - -#ifdef vas_debug -static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr) -{ - pr_err("fault %d, notify %d, intr %d early %d\n", - attr->fault_win, attr->notify_disable, - attr->intr_disable, attr->notify_early); - - pr_err("rx_fifo_size %d, max value %d\n", - attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX); -} +extern struct vas_instance *find_vas_instance(int vasid); +extern void vas_init_dbgdir(void); +extern void vas_instance_init_dbgdir(struct vas_instance *vinst); +extern void vas_window_init_dbgdir(struct vas_window *win); +extern void vas_window_free_dbgdir(struct vas_window *win); static inline void vas_log_write(struct vas_window *win, char *name, void *regptr, u64 val) { if (val) - pr_err("%swin #%d: %s reg %p, val 0x%016llx\n", + pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", win->tx_win ? "Tx" : "Rx", win->winid, name, regptr, val); } -#else /* vas_debug */ - -#define vas_log_write(win, name, reg, val) -#define dump_rx_win_attr(attr) - -#endif /* vas_debug */ - static inline void write_uwc_reg(struct vas_window *win, char *name, s32 reg, u64 val) { @@ -450,18 +447,32 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } -#ifdef vas_debug - -static void print_fifo_msg_count(struct vas_window *txwin) +/* + * Encode/decode the Partition Send Window ID (PSWID) for a window in + * a way that we can uniquely identify any window in the system. i.e. + * we should be able to locate the 'struct vas_window' given the PSWID. + * + * Bits Usage + * 0:7 VAS id (8 bits) + * 8:15 Unused, 0 (3 bits) + * 16:31 Window id (16 bits) + */ +static inline u32 encode_pswid(int vasid, int winid) { - uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o); - pr_devel("Winid %d, Msg count %llu\n", txwin->winid, - (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); -} -#else /* vas_debug */ + u32 pswid = 0; -#define print_fifo_msg_count(window) + pswid |= vasid << (31 - 7); + pswid |= winid; -#endif /* vas_debug */ + return pswid; +} + +static inline void decode_pswid(u32 pswid, int *vasid, int *winid) +{ + if (vasid) + *vasid = pswid >> (31 - 7) & 0xFF; + if (winid) + *winid = pswid & 0xFFFF; +} #endif /* _VAS_H */ diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index fadb95efbb9e..a7d14aa7bb7c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -363,6 +363,7 @@ static int dlpar_online_cpu(struct device_node *dn) BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); cpu_maps_update_done(); + timed_topology_update(1); rc = device_online(get_cpu_device(cpu)); if (rc) goto out; @@ -533,6 +534,7 @@ static int dlpar_offline_cpu(struct device_node *dn) set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); cpu_maps_update_done(); + timed_topology_update(1); rc = device_offline(get_cpu_device(cpu)); if (rc) goto out; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 7c181467d0ad..69921f72e2da 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -55,23 +55,23 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) { - struct iommu_table_group *table_group = NULL; - struct iommu_table *tbl = NULL; - struct iommu_table_group_link *tgl = NULL; + struct iommu_table_group *table_group; + struct iommu_table *tbl; + struct iommu_table_group_link *tgl; table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, node); if (!table_group) - goto fail_exit; + return NULL; tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); if (!tbl) - goto fail_exit; + goto free_group; tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, node); if (!tgl) - goto fail_exit; + goto free_table; INIT_LIST_HEAD_RCU(&tbl->it_group_list); kref_init(&tbl->it_kref); @@ -82,11 +82,10 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) return table_group; -fail_exit: - kfree(tgl); - kfree(table_group); +free_table: kfree(tbl); - +free_group: + kfree(table_group); return NULL; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 495ba4e7336d..0ee4a469a4ae 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -93,7 +93,7 @@ void vpa_init(int cpu) return; } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. @@ -106,7 +106,7 @@ void vpa_init(int cpu) "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ /* * Register dispatch trace log, if one has been allocated. @@ -129,7 +129,7 @@ void vpa_init(int cpu) } } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, @@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif /* CONFIG_PPC_SMLPAR */ -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_TRACEPOINTS #ifdef HAVE_JUMP_LABEL diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 779fc2a1c8f7..b2706c483067 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca_shared_proc(get_lppaca())); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 12277bc9fd9e..d86938260a86 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1592,6 +1592,8 @@ ATTRIBUTE_GROUPS(vio_dev); void vio_unregister_device(struct vio_dev *viodev) { device_unregister(&viodev->dev); + if (viodev->family == VDEVICE) + irq_dispose_mapping(viodev->irq); } EXPORT_SYMBOL(vio_unregister_device); |