From f9122ee4f5580194014d4ca72092787ae33f29fb Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 16 May 2017 14:19:48 +0530 Subject: cpuidle-powernv: Allow Deep stop states that don't stop time The current code in the cpuidle-powernv intialization only allows deep stop states (indicated by OPAL_PM_STOP_INST_DEEP) which lose timebase (indicated by OPAL_PM_TIMEBASE_STOP). This assumption goes back to POWER8 time where deep states used to lose the timebase. However, on POWER9, we do have stop states that are deep (they lose hypervisor state) but retain the timebase. Fix the initialization code in the cpuidle-powernv driver to allow such deep states. Further, there is a bug in cpuidle-powernv driver with CONFIG_TICK_ONESHOT=n where we end up incrementing the nr_idle_states even if a platform idle state which loses time base was not added to the cpuidle table. Fix this by ensuring that the nr_idle_states variable gets incremented only when the platform idle state was added to the cpuidle table. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- drivers/cpuidle/cpuidle-powernv.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 12409a519cc5..45eaf06462ae 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -354,6 +354,7 @@ static int powernv_add_idle_states(void) for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; + bool stops_timebase = false; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it @@ -381,6 +382,9 @@ static int powernv_add_idle_states(void) } } + if (flags[i] & OPAL_PM_TIMEBASE_STOP) + stops_timebase = true; + /* * For nap and fastsleep, use default target_residency * values if f/w does not expose it. @@ -392,8 +396,7 @@ static int powernv_add_idle_states(void) add_powernv_state(nr_idle_states, "Nap", CPUIDLE_FLAG_NONE, nap_loop, target_residency, exit_latency, 0, 0); - } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && - !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { + } else if (has_stop_states && !stops_timebase) { add_powernv_state(nr_idle_states, names[i], CPUIDLE_FLAG_NONE, stop_loop, target_residency, exit_latency, @@ -405,8 +408,8 @@ static int powernv_add_idle_states(void) * within this config dependency check. */ #ifdef CONFIG_TICK_ONESHOT - if (flags[i] & OPAL_PM_SLEEP_ENABLED || - flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { + else if (flags[i] & OPAL_PM_SLEEP_ENABLED || + flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { if (!rc) target_residency = 300000; /* Add FASTSLEEP state */ @@ -414,14 +417,15 @@ static int powernv_add_idle_states(void) CPUIDLE_FLAG_TIMER_STOP, fastsleep_loop, target_residency, exit_latency, 0, 0); - } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && - (flags[i] & OPAL_PM_TIMEBASE_STOP)) { + } else if (has_stop_states && stops_timebase) { add_powernv_state(nr_idle_states, names[i], CPUIDLE_FLAG_TIMER_STOP, stop_loop, target_residency, exit_latency, psscr_val[i], psscr_mask[i]); } #endif + else + continue; nr_idle_states++; } out: -- cgit v1.2.3 From b802ab46ba12c617fd55b072f1906627e636947b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Jun 2017 16:49:12 +1000 Subject: powerpc: Fix some spelling mistakes Collation of some spelling fixes from Colin. Attemping -> Attempting intialized -> initialized missmanaged -> mismanaged Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +- arch/powerpc/sysdev/xive/common.c | 2 +- drivers/tty/hvc/hvcs.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 7bc0e91f8715..6afd1efd3633 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -554,7 +554,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) { int rc; - pr_debug("Attemping to remove CPU %s, drc index: %x\n", + pr_debug("Attempting to remove CPU %s, drc index: %x\n", dn->name, drc_index); rc = dlpar_offline_cpu(dn); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 913825086b8d..afc9484ae747 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1417,7 +1417,7 @@ bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, /* Get ready for interrupts */ xive_setup_cpu(); - pr_info("Interrupt handling intialized with %s backend\n", + pr_info("Interrupt handling initialized with %s backend\n", xive_ops->name); pr_info("Using priority %d for all interrupts\n", max_prio); diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c index 99bb875178d7..423e28ec27fb 100644 --- a/drivers/tty/hvc/hvcs.c +++ b/drivers/tty/hvc/hvcs.c @@ -1242,8 +1242,7 @@ static void hvcs_close(struct tty_struct *tty, struct file *filp) free_irq(irq, hvcsd); return; } else if (hvcsd->port.count < 0) { - printk(KERN_ERR "HVCS: vty-server@%X open_count: %d" - " is missmanaged.\n", + printk(KERN_ERR "HVCS: vty-server@%X open_count: %d is mismanaged.\n", hvcsd->vdev->unit_address, hvcsd->port.count); } -- cgit v1.2.3 From 58d876fa7181f2f393190c1d32c056b5a9d34aa2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 May 2017 08:34:58 +0300 Subject: cxl: Unlock on error in probe We should unlock if get_cxl_adapter() fails. Fixes: 594ff7d067ca ("cxl: Support to flash a new image on the adapter from a guest") Signed-off-by: Dan Carpenter Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/flash.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c index 7c61c70ba3f6..3aa216bf0939 100644 --- a/drivers/misc/cxl/flash.c +++ b/drivers/misc/cxl/flash.c @@ -401,8 +401,10 @@ static int device_open(struct inode *inode, struct file *file) if (down_interruptible(&sem) != 0) return -EPERM; - if (!(adapter = get_cxl_adapter(adapter_num))) - return -ENODEV; + if (!(adapter = get_cxl_adapter(adapter_num))) { + rc = -ENODEV; + goto err_unlock; + } file->private_data = adapter; continue_token = 0; @@ -446,6 +448,8 @@ err1: free_page((unsigned long) le); err: put_device(&adapter->dev); +err_unlock: + up(&sem); return rc; } -- cgit v1.2.3 From 42bed042556261b56aa92e05f4f388f0f7ac1210 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Mon, 29 May 2017 10:26:28 -0300 Subject: drivers/watchdog/Kconfig: Update CONFIG_WATCHDOG_RTAS dependencies drivers/watchdog/wdrtas.c uses symbols defined in arch/powerpc/kernel/rtas.c, which are exported iff CONFIG_PPC_RTAS is selected. Building wdrtas.c without setting CONFIG_PPC_RTAS throws the following errors: ERROR: ".rtas_token" [drivers/watchdog/wdrtas.ko] undefined! ERROR: "rtas_data_buf" [drivers/watchdog/wdrtas.ko] undefined! ERROR: "rtas_data_buf_lock" [drivers/watchdog/wdrtas.ko] undefined! ERROR: ".rtas_get_sensor" [drivers/watchdog/wdrtas.ko] undefined! ERROR: ".rtas_call" [drivers/watchdog/wdrtas.ko] undefined! This was identified during a randconfig build where CONFIG_WATCHDOG_RTAS=m and CONFIG_PPC_RTAS was not set. Logs are here: http://kisskb.ellerman.id.au/kisskb/buildresult/12982152/ This patch fixes the issue by updating CONFIG_WATCHDOG_RTAS to depend on just CONFIG_PPC_RTAS, removing COMPILE_TEST entirely. Signed-off-by: Murilo Opsfelder Araujo Reviewed-by: Guenter Roeck Signed-off-by: Michael Ellerman --- drivers/watchdog/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 8b9049dac094..e6e31a16f68f 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1688,7 +1688,7 @@ config MEN_A21_WDT config WATCHDOG_RTAS tristate "RTAS watchdog" - depends on PPC_RTAS || (PPC64 && COMPILE_TEST) + depends on PPC_RTAS help This driver adds watchdog support for the RTAS watchdog. -- cgit v1.2.3 From 2201f994a5742c03e660623c385fd6897dd1fa2f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 13 Jun 2017 23:05:45 +1000 Subject: powerpc/64s/idle: Move soft interrupt mask logic into C code This simplifies the asm and fixes irq-off tracing over sleep instructions. Also move powersave_nap check for POWER8 into C code, and move PSSCR register value calculation for POWER9 into C. Reviewed-by: Gautham R. Shenoy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 3 ++ arch/powerpc/include/asm/machdep.h | 1 + arch/powerpc/include/asm/processor.h | 10 ++-- arch/powerpc/kernel/idle_book3s.S | 82 ++++++-------------------------- arch/powerpc/kernel/irq.c | 33 ++++++++++++- arch/powerpc/platforms/powernv/idle.c | 71 ++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/smp.c | 2 - arch/powerpc/platforms/powernv/subcore.c | 3 +- drivers/cpuidle/cpuidle-powernv.c | 12 ++--- 9 files changed, 128 insertions(+), 89 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index eba60416536e..f06112cf8734 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -129,6 +129,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) } extern bool prep_irq_for_idle(void); +extern bool prep_irq_for_idle_irqsoff(void); + +#define fini_irq_for_idle_irqsoff() trace_hardirqs_off(); extern void force_external_irq_replay(void); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index f90b22c722e1..cd2fc1cc1cc7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -226,6 +226,7 @@ struct machdep_calls { extern void e500_idle(void); extern void power4_idle(void); extern void power7_idle(void); +extern void power9_idle(void); extern void ppc6xx_idle(void); extern void book3e_idle(void); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a2123f291ab0..c49165a7439c 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -481,11 +481,11 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern unsigned long power7_nap(int check_irq); -extern unsigned long power7_sleep(void); -extern unsigned long power7_winkle(void); -extern unsigned long power9_idle_stop(unsigned long stop_psscr_val, - unsigned long stop_psscr_mask); +extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ +extern void power7_idle_type(unsigned long type); +extern unsigned long power9_idle_stop(unsigned long psscr_val); +extern void power9_idle_type(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 98a6d07ecb5c..35cf5bb7daed 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -109,13 +109,9 @@ core_idle_lock_held: /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 - * - Requested STOP state in POWER9 + * - Requested PSSCR value in POWER9 * - * To check IRQ_HAPPENED in r4 - * 0 - don't check - * 1 - check - * - * Address to 'rfid' to in r5 + * Address of idle handler to 'rfid' to in r4 */ pnv_powersave_common: /* Use r3 to pass state nap/sleep/winkle */ @@ -131,30 +127,7 @@ pnv_powersave_common: std r0,_LINK(r1) std r0,_NIP(r1) - /* Hard disable interrupts */ - mfmsr r9 - rldicl r9,r9,48,1 - rotldi r9,r9,16 - mtmsrd r9,1 /* hard-disable interrupts */ - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - andi. r0,r0,~PACA_IRQ_HARD_DIS@l - beq 1f - cmpwi cr0,r4,0 - beq 1f - addi r1,r1,INT_FRAME_SIZE - ld r0,16(r1) - li r3,0 /* Return 0 (no nap) */ - mtlr r0 - blr - -1: /* We mark irqs hard disabled as this is the state we'll - * be in when returning and we need to tell arch_local_irq_restore() - * about it - */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) + mfmsr r9 /* We haven't lost state ... yet */ li r0,0 @@ -163,8 +136,8 @@ pnv_powersave_common: /* Continue saving state */ SAVE_GPR(2, r1) SAVE_NVGPRS(r1) - mfcr r4 - std r4,_CCR(r1) + mfcr r5 + std r5,_CCR(r1) std r9,_MSR(r1) std r1,PACAR1(r13) @@ -178,7 +151,7 @@ pnv_powersave_common: li r6, MSR_RI andc r6, r9, r6 mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r7 rfid @@ -322,35 +295,14 @@ lwarx_loop_stop: IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -_GLOBAL(power7_idle) +/* + * Entered with MSR[EE]=0 and no soft-masked interrupts pending. + * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE). + */ +_GLOBAL(power7_idle_insn) /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - li r3, 1 - /* fall through */ - -_GLOBAL(power7_nap) - mr r4,r3 - li r3,PNV_THREAD_NAP - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) - b pnv_powersave_common - /* No return */ - -_GLOBAL(power7_sleep) - li r3,PNV_THREAD_SLEEP - li r4,1 - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode) b pnv_powersave_common - /* No return */ - -_GLOBAL(power7_winkle) - li r3,PNV_THREAD_WINKLE - li r4,1 - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) - b pnv_powersave_common - /* No return */ #define CHECK_HMI_INTERRUPT \ mfspr r0,SPRN_SRR1; \ @@ -372,17 +324,13 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ 20: nop; /* - * r3 - The PSSCR value corresponding to the stop state. - * r4 - The PSSCR mask corrresonding to the stop state. + * Entered with MSR[EE]=0 and no soft-masked interrupts pending. + * r3 contains desired PSSCR register value. */ _GLOBAL(power9_idle_stop) - mfspr r5,SPRN_PSSCR - andc r5,r5,r4 - or r3,r3,r5 std r3, PACA_REQ_PSSCR(r13) mtspr SPRN_PSSCR,r3 - LOAD_REG_ADDR(r5,power_enter_stop) - li r4,1 + LOAD_REG_ADDR(r4,power_enter_stop) b pnv_powersave_common /* No return */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c291df30fe3..58dcac88bc79 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -322,7 +322,8 @@ bool prep_irq_for_idle(void) * First we need to hard disable to ensure no interrupt * occurs before we effectively enter the low power state */ - hard_irq_disable(); + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; /* * If anything happened while we were soft-disabled, @@ -347,6 +348,36 @@ bool prep_irq_for_idle(void) return true; } +/* + * This is for idle sequences that return with IRQs off, but the + * idle state itself wakes on interrupt. Tell the irq tracer that + * IRQs are enabled for the duration of idle so it does not get long + * off times. Must be paired with fini_irq_for_idle_irqsoff. + */ +bool prep_irq_for_idle_irqsoff(void) +{ + WARN_ON(!irqs_disabled()); + + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + return true; +} + /* * Force a replay of the external interrupt handler on this CPU. */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 46946a587004..f875879ff1eb 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "powernv.h" #include "subcore.h" @@ -283,12 +284,68 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); +static unsigned long __power7_idle_type(unsigned long type) +{ + unsigned long srr1; + + if (!prep_irq_for_idle_irqsoff()) + return 0; + + ppc64_runlatch_off(); + srr1 = power7_idle_insn(type); + ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + + return srr1; +} + +void power7_idle_type(unsigned long type) +{ + __power7_idle_type(type); +} + +void power7_idle(void) +{ + if (!powersave_nap) + return; + + power7_idle_type(PNV_THREAD_NAP); +} + +static unsigned long __power9_idle_type(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask) +{ + unsigned long psscr; + unsigned long srr1; + + if (!prep_irq_for_idle_irqsoff()) + return 0; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + ppc64_runlatch_off(); + srr1 = power9_idle_stop(psscr); + ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + + return srr1; +} + +void power9_idle_type(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask) +{ + __power9_idle_type(stop_psscr_val, stop_psscr_mask); +} + /* * Used for ppc_md.power_save which needs a function with no parameters */ -static void power9_idle(void) +void power9_idle(void) { - power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask); + power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); } #ifdef CONFIG_HOTPLUG_CPU @@ -303,16 +360,17 @@ unsigned long pnv_cpu_offline(unsigned int cpu) u32 idle_states = pnv_get_supported_cpuidle_states(); if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { - srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val, + srr1 = __power9_idle_type(pnv_deepest_stop_psscr_val, pnv_deepest_stop_psscr_mask); } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { - srr1 = power7_winkle(); + srr1 = __power7_idle_type(PNV_THREAD_WINKLE); } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - srr1 = power7_sleep(); + srr1 = __power7_idle_type(PNV_THREAD_SLEEP); } else if (idle_states & OPAL_PM_NAP_ENABLED) { - srr1 = power7_nap(1); + srr1 = __power7_idle_type(PNV_THREAD_NAP); } else { + ppc64_runlatch_off(); /* This is the fallback method. We emulate snooze */ while (!generic_check_cpu_restart(cpu)) { HMT_low(); @@ -320,6 +378,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu) } srr1 = 0; HMT_medium(); + ppc64_runlatch_on(); } return srr1; diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 4aff754b6f2c..f8752795decf 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -182,9 +182,7 @@ static void pnv_smp_cpu_kill_self(void) */ kvmppc_set_host_ipi(cpu, 0); - ppc64_runlatch_off(); srr1 = pnv_cpu_offline(cpu); - ppc64_runlatch_on(); /* * If the SRR1 value indicates that we woke up due to diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 0babef11136f..d975d78188a9 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -182,7 +183,7 @@ static void unsplit_core(void) cpu = smp_processor_id(); if (cpu_thread_in_core(cpu) != 0) { while (mfspr(SPRN_HID0) & mask) - power7_nap(0); + power7_idle_insn(PNV_THREAD_NAP); per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; return; diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 45eaf06462ae..79152676f62b 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -73,9 +73,8 @@ static int nap_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - ppc64_runlatch_off(); - power7_idle(); - ppc64_runlatch_on(); + power7_idle_type(PNV_THREAD_NAP); + return index; } @@ -98,7 +97,8 @@ static int fastsleep_loop(struct cpuidle_device *dev, new_lpcr &= ~LPCR_PECE1; mtspr(SPRN_LPCR, new_lpcr); - power7_sleep(); + + power7_idle_type(PNV_THREAD_SLEEP); mtspr(SPRN_LPCR, old_lpcr); @@ -110,10 +110,8 @@ static int stop_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - ppc64_runlatch_off(); - power9_idle_stop(stop_psscr_table[index].val, + power9_idle_type(stop_psscr_table[index].val, stop_psscr_table[index].mask); - ppc64_runlatch_on(); return index; } -- cgit v1.2.3 From 3fc5ee927ff4ffed6aa2fcd44d2fbf07ac893cdc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 14 Jun 2017 23:02:39 +1000 Subject: cpuidle: powerpc: cpuidle set polling before enabling irqs local_irq_enable can cause interrupts to be taken which could take significant amount of processing time. The idle process should set its polling flag before this, so another process that wakes it during this time will not have to send an IPI. Expand the TIF_POLLING_NRFLAG coverage to as large as possible. Reviewed-by: Gautham R. Shenoy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/cpuidle/cpuidle-powernv.c | 4 +++- drivers/cpuidle/cpuidle-pseries.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 79152676f62b..50b3c2e0306f 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -51,9 +51,10 @@ static int snooze_loop(struct cpuidle_device *dev, { u64 snooze_exit_time; - local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); + local_irq_enable(); + snooze_exit_time = get_tb() + snooze_timeout; ppc64_runlatch_off(); HMT_very_low(); @@ -66,6 +67,7 @@ static int snooze_loop(struct cpuidle_device *dev, ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); + return index; } diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 166ccd711ec9..7b12bb2ea70f 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -62,9 +62,10 @@ static int snooze_loop(struct cpuidle_device *dev, unsigned long in_purr; u64 snooze_exit_time; + set_thread_flag(TIF_POLLING_NRFLAG); + idle_loop_prolog(&in_purr); local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); snooze_exit_time = get_tb() + snooze_timeout; while (!need_resched()) { -- cgit v1.2.3 From 624e46d03576dd4d5667bad9d2ef814135d0075c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 14 Jun 2017 23:02:40 +1000 Subject: cpuidle: powerpc: read mostly for common globals Ensure these don't get put into bouncing cachelines. Reviewed-by: Vaidyanathan Srinivasan Reviewed-by: Gautham R. Shenoy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/cpuidle/cpuidle-powernv.c | 10 +++++----- drivers/cpuidle/cpuidle-pseries.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 50b3c2e0306f..9d03326ac05e 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -32,18 +32,18 @@ static struct cpuidle_driver powernv_idle_driver = { .owner = THIS_MODULE, }; -static int max_idle_state; -static struct cpuidle_state *cpuidle_state_table; +static int max_idle_state __read_mostly; +static struct cpuidle_state *cpuidle_state_table __read_mostly; struct stop_psscr_table { u64 val; u64 mask; }; -static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX]; +static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly; -static u64 snooze_timeout; -static bool snooze_timeout_en; +static u64 snooze_timeout __read_mostly; +static bool snooze_timeout_en __read_mostly; static int snooze_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 7b12bb2ea70f..a404f352d284 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -25,10 +25,10 @@ struct cpuidle_driver pseries_idle_driver = { .owner = THIS_MODULE, }; -static int max_idle_state; -static struct cpuidle_state *cpuidle_state_table; -static u64 snooze_timeout; -static bool snooze_timeout_en; +static int max_idle_state __read_mostly; +static struct cpuidle_state *cpuidle_state_table __read_mostly; +static u64 snooze_timeout __read_mostly; +static bool snooze_timeout_en __read_mostly; static inline void idle_loop_prolog(unsigned long *in_purr) { -- cgit v1.2.3 From 7ded429152e84831f6696585755f318fb351e67f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 14 Jun 2017 23:02:41 +1000 Subject: cpuidle: powerpc: no memory barrier after break from idle A memory barrier is not required after the task wakes up, only if we clear the polling flag before waking. The case where we have work to do is the important one, so optimise for it. Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- drivers/cpuidle/cpuidle-powernv.c | 11 +++++++++-- drivers/cpuidle/cpuidle-pseries.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 9d03326ac05e..37b0698b7193 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -59,14 +59,21 @@ static int snooze_loop(struct cpuidle_device *dev, ppc64_runlatch_off(); HMT_very_low(); while (!need_resched()) { - if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) + if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) { + /* + * Task has not woken up but we are exiting the polling + * loop anyway. Require a barrier after polling is + * cleared to order subsequent test of need_resched(). + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); break; + } } HMT_medium(); ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); return index; } diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index a404f352d284..e9b3853d93ea 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -71,13 +71,20 @@ static int snooze_loop(struct cpuidle_device *dev, while (!need_resched()) { HMT_low(); HMT_very_low(); - if (snooze_timeout_en && get_tb() > snooze_exit_time) + if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) { + /* + * Task has not woken up but we are exiting the polling + * loop anyway. Require a barrier after polling is + * cleared to order subsequent test of need_resched(). + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); break; + } } HMT_medium(); clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); idle_loop_epilog(in_purr); -- cgit v1.2.3 From 3ced8d73006321bd2a0412fa0ff4b065a02e7514 Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Thu, 22 Jun 2017 15:07:27 +0200 Subject: cxl: Export library to support IBM XSL This patch exports a in-kernel 'library' API which can be called by other drivers to help interacting with an IBM XSL on a POWER9 system. The XSL (Translation Service Layer) is a stripped down version of the PSL (Power Service Layer) used in some cards such as the Mellanox CX5. Like the PSL, it implements the CAIA architecture, but has a number of differences, mostly in it's implementation dependent registers. The XSL also uses a special DMA cxl mode, which uses a slightly different init sequence for the CAPP and PHB. Signed-off-by: Andrew Donnellan Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 1 + drivers/misc/cxl/Kconfig | 5 + drivers/misc/cxl/Makefile | 2 +- drivers/misc/cxl/cxl.h | 6 + drivers/misc/cxl/cxllib.c | 246 ++++++++++++++++++++++++++++++++++++ drivers/misc/cxl/fault.c | 29 +++-- drivers/misc/cxl/native.c | 16 ++- drivers/misc/cxl/pci.c | 41 ++++-- include/misc/cxllib.h | 133 +++++++++++++++++++ 9 files changed, 449 insertions(+), 30 deletions(-) create mode 100644 drivers/misc/cxl/cxllib.c create mode 100644 include/misc/cxllib.h (limited to 'drivers') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0b543f0f54f5..ef930ba500f9 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -948,6 +948,7 @@ enum { OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2, OPAL_PHB_CAPI_MODE_SNOOP_ON = 3, OPAL_PHB_CAPI_MODE_DMA = 4, + OPAL_PHB_CAPI_MODE_DMA_TVT1 = 5, }; /* OPAL I2C request */ diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index b75cf830d08a..93397cb05b15 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -11,11 +11,16 @@ config CXL_AFU_DRIVER_OPS bool default n +config CXL_LIB + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE select CXL_AFU_DRIVER_OPS + select CXL_LIB default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index c14fd6b65b5a..0b5fd749d96d 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o pci.o trace.o -cxl-y += vphb.o phb.o api.o +cxl-y += vphb.o phb.o api.o cxllib.o cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o cxl-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_CXL) += cxl.o diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a03f8e7535e5..b1afeccbb97f 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -1010,6 +1010,7 @@ static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct den void cxl_handle_fault(struct work_struct *work); void cxl_prefault(struct cxl_context *ctx, u64 wed); +int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar); struct cxl *get_cxl_adapter(int num); int cxl_alloc_sst(struct cxl_context *ctx); @@ -1061,6 +1062,11 @@ int cxl_afu_slbia(struct cxl_afu *afu); int cxl_data_cache_flush(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); +int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, + u32 *phb_index, u64 *capp_unit_id); +int cxl_slot_is_switched(struct pci_dev *dev); +int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg); +u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9); void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx); void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx); diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c new file mode 100644 index 000000000000..5dba23ca2e5f --- /dev/null +++ b/drivers/misc/cxl/cxllib.c @@ -0,0 +1,246 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include "cxl.h" + +#define CXL_INVALID_DRA ~0ull +#define CXL_DUMMY_READ_SIZE 128 +#define CXL_DUMMY_READ_ALIGN 8 +#define CXL_CAPI_WINDOW_START 0x2000000000000ull +#define CXL_CAPI_WINDOW_LOG_SIZE 48 +#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 + + +bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) +{ + int rc; + u32 phb_index; + u64 chip_id, capp_unit_id; + + /* No flags currently supported */ + if (flags) + return false; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return false; + + if (!cxl_is_power9()) + return false; + + if (cxl_slot_is_switched(dev)) + return false; + + /* on p9, some pci slots are not connected to a CAPP unit */ + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); + if (rc) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); + +static DEFINE_MUTEX(dra_mutex); +static u64 dummy_read_addr = CXL_INVALID_DRA; + +static int allocate_dummy_read_buf(void) +{ + u64 buf, vaddr; + size_t buf_size; + + /* + * Dummy read buffer is 128-byte long, aligned on a + * 256-byte boundary and we need the physical address. + */ + buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); + buf = (u64) kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & + (~0ull << CXL_DUMMY_READ_ALIGN); + + WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), + "Dummy read buffer alignment issue"); + dummy_read_addr = virt_to_phys((void *) vaddr); + return 0; +} + +int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) +{ + int rc; + u32 phb_index; + u64 chip_id, capp_unit_id; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EINVAL; + + mutex_lock(&dra_mutex); + if (dummy_read_addr == CXL_INVALID_DRA) { + rc = allocate_dummy_read_buf(); + if (rc) { + mutex_unlock(&dra_mutex); + return rc; + } + } + mutex_unlock(&dra_mutex); + + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); + if (rc) + return rc; + + rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl); + if (rc) + return rc; + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + /* workaround for DD1 - nbwind = capiind */ + cfg->dsnctl |= ((u64)0x02 << (63-47)); + } + + cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; + cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; + cfg->bar_addr = CXL_CAPI_WINDOW_START; + cfg->dra = dummy_read_addr; + return 0; +} +EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); + +int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, + unsigned long flags) +{ + int rc = 0; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EINVAL; + + switch (mode) { + case CXL_MODE_PCI: + /* + * We currently don't support going back to PCI mode + * However, we'll turn the invalidations off, so that + * the firmware doesn't have to ack them and can do + * things like reset, etc.. with no worries. + * So always return EPERM (can't go back to PCI) or + * EBUSY if we couldn't even turn off snooping + */ + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); + if (rc) + rc = -EBUSY; + else + rc = -EPERM; + break; + case CXL_MODE_CXL: + /* DMA only supported on TVT1 for the time being */ + if (flags != CXL_MODE_DMA_TVT1) + return -EINVAL; + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); + if (rc) + return rc; + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); + break; + default: + rc = -EINVAL; + } + return rc; +} +EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); + +/* + * When switching the PHB to capi mode, the TVT#1 entry for + * the Partitionable Endpoint is set in bypass mode, like + * in PCI mode. + * Configure the device dma to use TVT#1, which is done + * by calling dma_set_mask() with a mask large enough. + */ +int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) +{ + int rc; + + if (flags) + return -EINVAL; + + rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); + return rc; +} +EXPORT_SYMBOL_GPL(cxllib_set_device_dma); + +int cxllib_get_PE_attributes(struct task_struct *task, + unsigned long translation_mode, + struct cxllib_pe_attributes *attr) +{ + struct mm_struct *mm = NULL; + + if (translation_mode != CXL_TRANSLATED_MODE && + translation_mode != CXL_REAL_MODE) + return -EINVAL; + + attr->sr = cxl_calculate_sr(false, + task == NULL, + translation_mode == CXL_REAL_MODE, + true); + attr->lpid = mfspr(SPRN_LPID); + if (task) { + mm = get_task_mm(task); + if (mm == NULL) + return -EINVAL; + /* + * Caller is keeping a reference on mm_users for as long + * as XSL uses the memory context + */ + attr->pid = mm->context.id; + mmput(mm); + } else { + attr->pid = 0; + } + attr->tid = 0; + return 0; +} +EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); + +int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) +{ + int rc; + u64 dar; + struct vm_area_struct *vma = NULL; + unsigned long page_size; + + if (mm == NULL) + return -EFAULT; + + down_read(&mm->mmap_sem); + + for (dar = addr; dar < addr + size; dar += page_size) { + if (!vma || dar < vma->vm_start || dar > vma->vm_end) { + vma = find_vma(mm, addr); + if (!vma) { + pr_err("Can't find vma for addr %016llx\n", addr); + rc = -EFAULT; + goto out; + } + /* get the size of the pages allocated */ + page_size = vma_kernel_pagesize(vma); + } + + rc = cxl_handle_mm_fault(mm, flags, dar); + if (rc) { + pr_err("cxl_handle_mm_fault failed %d", rc); + rc = -EFAULT; + goto out; + } + } + rc = 0; +out: + up_read(&mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(cxllib_handle_fault); diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index c79e39bad7a4..6eed7d03e2b5 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -132,18 +132,15 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx, return IRQ_HANDLED; } -static void cxl_handle_page_fault(struct cxl_context *ctx, - struct mm_struct *mm, u64 dsisr, u64 dar) +int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar) { unsigned flt = 0; int result; unsigned long access, flags, inv_flags = 0; - trace_cxl_pte_miss(ctx, dsisr, dar); - if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); - return cxl_ack_ae(ctx); + return result; } if (!radix_enabled()) { @@ -155,9 +152,8 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, if (dsisr & CXL_PSL_DSISR_An_S) access |= _PAGE_WRITE; - access |= _PAGE_PRIVILEGED; - if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID)) - access &= ~_PAGE_PRIVILEGED; + if (!mm && (REGION_ID(dar) != USER_REGION_ID)) + access |= _PAGE_PRIVILEGED; if (dsisr & DSISR_NOHPTE) inv_flags |= HPTE_NOHPTE_UPDATE; @@ -166,8 +162,21 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, hash_page_mm(mm, dar, access, 0x300, inv_flags); local_irq_restore(flags); } - pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + return 0; +} + +static void cxl_handle_page_fault(struct cxl_context *ctx, + struct mm_struct *mm, + u64 dsisr, u64 dar) +{ + trace_cxl_pte_miss(ctx, dsisr, dar); + + if (cxl_handle_mm_fault(mm, dsisr, dar)) { + cxl_ack_ae(ctx); + } else { + pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + } } /* diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 2b2f8894149d..4a82c313cf71 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -586,17 +586,17 @@ err: #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) #endif -static u64 calculate_sr(struct cxl_context *ctx) +u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) { u64 sr = 0; set_endian(sr); - if (ctx->master) + if (master) sr |= CXL_PSL_SR_An_MP; if (mfspr(SPRN_LPCR) & LPCR_TC) sr |= CXL_PSL_SR_An_TC; - if (ctx->kernel) { - if (!ctx->real_mode) + if (kernel) { + if (!real_mode) sr |= CXL_PSL_SR_An_R; sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV; } else { @@ -608,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx) if (!test_tsk_thread_flag(current, TIF_32BIT)) sr |= CXL_PSL_SR_An_SF; } - if (cxl_is_power9()) { + if (p9) { if (radix_enabled()) sr |= CXL_PSL_SR_An_XLAT_ror; else @@ -617,6 +617,12 @@ static u64 calculate_sr(struct cxl_context *ctx) return sr; } +static u64 calculate_sr(struct cxl_context *ctx) +{ + return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode, + cxl_is_power9()); +} + static void update_ivtes_directed(struct cxl_context *ctx) { bool need_update = (ctx->status == STARTED); diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1eb9859809bf..d18b3d9292fd 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -375,7 +375,7 @@ static u64 get_capp_unit_id(struct device_node *np, u32 phb_index) return 0; } -static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, +int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, u32 *phb_index, u64 *capp_unit_id) { int rc; @@ -408,17 +408,9 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, return 0; } -static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci_dev *dev) +int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg) { - u64 xsl_dsnctl, psl_fircntl; - u64 chipid; - u32 phb_index; - u64 capp_unit_id; - int rc; - - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); - if (rc) - return rc; + u64 xsl_dsnctl; /* * CAPI Identifier bits [0:7] @@ -454,6 +446,27 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci xsl_dsnctl |= ((u64)0x04 << (63-55)); } + *reg = xsl_dsnctl; + return 0; +} + +static int init_implementation_adapter_regs_psl9(struct cxl *adapter, + struct pci_dev *dev) +{ + u64 xsl_dsnctl, psl_fircntl; + u64 chipid; + u32 phb_index; + u64 capp_unit_id; + int rc; + + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); + if (rc) + return rc; + + rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl); + if (rc) + return rc; + cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl); /* Set fir_cntl to recommended value for production env */ @@ -505,7 +518,7 @@ static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci u64 capp_unit_id; int rc; - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); if (rc) return rc; @@ -538,7 +551,7 @@ static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_ u64 capp_unit_id; int rc; - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); if (rc) return rc; @@ -1897,7 +1910,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter) #define CXL_MAX_PCIEX_PARENT 2 -static int cxl_slot_is_switched(struct pci_dev *dev) +int cxl_slot_is_switched(struct pci_dev *dev) { struct device_node *np; int depth = 0; diff --git a/include/misc/cxllib.h b/include/misc/cxllib.h new file mode 100644 index 000000000000..e5aa29f019a6 --- /dev/null +++ b/include/misc/cxllib.h @@ -0,0 +1,133 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MISC_CXLLIB_H +#define _MISC_CXLLIB_H + +#include +#include + +/* + * cxl driver exports a in-kernel 'library' API which can be called by + * other drivers to help interacting with an IBM XSL. + */ + +/* + * tells whether capi is supported on the PCIe slot where the + * device is seated + * + * Input: + * dev: device whose slot needs to be checked + * flags: 0 for the time being + */ +bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags); + + +/* + * Returns the configuration parameters to be used by the XSL or device + * + * Input: + * dev: device, used to find PHB + * Output: + * struct cxllib_xsl_config: + * version + * capi BAR address, i.e. 0x2000000000000-0x2FFFFFFFFFFFF + * capi BAR size + * data send control (XSL_DSNCTL) + * dummy read address (XSL_DRA) + */ +#define CXL_XSL_CONFIG_VERSION1 1 +struct cxllib_xsl_config { + u32 version; /* format version for register encoding */ + u32 log_bar_size;/* log size of the capi_window */ + u64 bar_addr; /* address of the start of capi window */ + u64 dsnctl; /* matches definition of XSL_DSNCTL */ + u64 dra; /* real address that can be used for dummy read */ +}; + +int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg); + + +/* + * Activate capi for the pci host bridge associated with the device. + * Can be extended to deactivate once we know how to do it. + * Device must be ready to accept messages from the CAPP unit and + * respond accordingly (TLB invalidates, ...) + * + * PHB is switched to capi mode through calls to skiboot. + * CAPP snooping is activated + * + * Input: + * dev: device whose PHB should switch mode + * mode: mode to switch to i.e. CAPI or PCI + * flags: options related to the mode + */ +enum cxllib_mode { + CXL_MODE_CXL, + CXL_MODE_PCI, +}; + +#define CXL_MODE_NO_DMA 0 +#define CXL_MODE_DMA_TVT0 1 +#define CXL_MODE_DMA_TVT1 2 + +int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, + unsigned long flags); + + +/* + * Set the device for capi DMA. + * Define its dma_ops and dma offset so that allocations will be using TVT#1 + * + * Input: + * dev: device to set + * flags: options. CXL_MODE_DMA_TVT1 should be used + */ +int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags); + + +/* + * Get the Process Element structure for the given thread + * + * Input: + * task: task_struct for the context of the translation + * translation_mode: whether addresses should be translated + * Output: + * attr: attributes to fill up the Process Element structure from CAIA + */ +struct cxllib_pe_attributes { + u64 sr; + u32 lpid; + u32 tid; + u32 pid; +}; +#define CXL_TRANSLATED_MODE 0 +#define CXL_REAL_MODE 1 + +int cxllib_get_PE_attributes(struct task_struct *task, + unsigned long translation_mode, struct cxllib_pe_attributes *attr); + + +/* + * Handle memory fault. + * Fault in all the pages of the specified buffer for the permissions + * provided in ‘flags’ + * + * Shouldn't be called from interrupt context + * + * Input: + * mm: struct mm for the thread faulting the pages + * addr: base address of the buffer to page in + * size: size of the buffer to page in + * flags: permission requested (DSISR_ISSTORE...) + */ +int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags); + + +#endif /* _MISC_CXLLIB_H */ -- cgit v1.2.3